github.com/ferranbt/nomad@v0.9.3-0.20190607002617-85c449b7667c/command/agent/agent.go (about) 1 package agent 2 3 import ( 4 "fmt" 5 "io" 6 "io/ioutil" 7 golog "log" 8 "net" 9 "os" 10 "path/filepath" 11 "runtime" 12 "strings" 13 "sync" 14 "sync/atomic" 15 "time" 16 17 metrics "github.com/armon/go-metrics" 18 "github.com/hashicorp/consul/api" 19 "github.com/hashicorp/consul/lib" 20 log "github.com/hashicorp/go-hclog" 21 uuidparse "github.com/hashicorp/go-uuid" 22 "github.com/hashicorp/nomad/client" 23 clientconfig "github.com/hashicorp/nomad/client/config" 24 "github.com/hashicorp/nomad/client/state" 25 "github.com/hashicorp/nomad/command/agent/consul" 26 "github.com/hashicorp/nomad/helper/pluginutils/loader" 27 "github.com/hashicorp/nomad/helper/uuid" 28 "github.com/hashicorp/nomad/nomad" 29 "github.com/hashicorp/nomad/nomad/structs" 30 "github.com/hashicorp/nomad/nomad/structs/config" 31 "github.com/hashicorp/raft" 32 ) 33 34 const ( 35 agentHttpCheckInterval = 10 * time.Second 36 agentHttpCheckTimeout = 5 * time.Second 37 serverRpcCheckInterval = 10 * time.Second 38 serverRpcCheckTimeout = 3 * time.Second 39 serverSerfCheckInterval = 10 * time.Second 40 serverSerfCheckTimeout = 3 * time.Second 41 42 // roles used in identifying Consul entries for Nomad agents 43 consulRoleServer = "server" 44 consulRoleClient = "client" 45 ) 46 47 // Agent is a long running daemon that is used to run both 48 // clients and servers. Servers are responsible for managing 49 // state and making scheduling decisions. Clients can be 50 // scheduled to, and are responsible for interfacing with 51 // servers to run allocations. 52 type Agent struct { 53 config *Config 54 configLock sync.Mutex 55 56 logger log.Logger 57 httpLogger log.Logger 58 logOutput io.Writer 59 60 // consulService is Nomad's custom Consul client for managing services 61 // and checks. 62 consulService *consul.ServiceClient 63 64 // consulCatalog is the subset of Consul's Catalog API Nomad uses. 65 consulCatalog consul.CatalogAPI 66 67 // client is the launched Nomad Client. Can be nil if the agent isn't 68 // configured to run a client. 69 client *client.Client 70 71 // server is the launched Nomad Server. Can be nil if the agent isn't 72 // configured to run a server. 73 server *nomad.Server 74 75 // pluginLoader is used to load plugins 76 pluginLoader loader.PluginCatalog 77 78 // pluginSingletonLoader is a plugin loader that will returns singleton 79 // instances of the plugins. 80 pluginSingletonLoader loader.PluginCatalog 81 82 shutdown bool 83 shutdownCh chan struct{} 84 shutdownLock sync.Mutex 85 86 InmemSink *metrics.InmemSink 87 } 88 89 // NewAgent is used to create a new agent with the given configuration 90 func NewAgent(config *Config, logger log.Logger, logOutput io.Writer, inmem *metrics.InmemSink) (*Agent, error) { 91 a := &Agent{ 92 config: config, 93 logOutput: logOutput, 94 shutdownCh: make(chan struct{}), 95 InmemSink: inmem, 96 } 97 98 // Create the loggers 99 a.logger = logger 100 a.httpLogger = a.logger.ResetNamed("http") 101 102 // Global logger should match internal logger as much as possible 103 golog.SetFlags(golog.LstdFlags | golog.Lmicroseconds) 104 105 if err := a.setupConsul(config.Consul); err != nil { 106 return nil, fmt.Errorf("Failed to initialize Consul client: %v", err) 107 } 108 109 if err := a.setupPlugins(); err != nil { 110 return nil, err 111 } 112 113 if err := a.setupServer(); err != nil { 114 return nil, err 115 } 116 if err := a.setupClient(); err != nil { 117 return nil, err 118 } 119 if a.client == nil && a.server == nil { 120 return nil, fmt.Errorf("must have at least client or server mode enabled") 121 } 122 123 return a, nil 124 } 125 126 // convertServerConfig takes an agent config and log output and returns a Nomad 127 // Config. There may be missing fields that must be set by the agent. To do this 128 // call finalizeServerConfig 129 func convertServerConfig(agentConfig *Config) (*nomad.Config, error) { 130 conf := agentConfig.NomadConfig 131 if conf == nil { 132 conf = nomad.DefaultConfig() 133 } 134 conf.DevMode = agentConfig.DevMode 135 conf.Build = agentConfig.Version.VersionNumber() 136 if agentConfig.Region != "" { 137 conf.Region = agentConfig.Region 138 } 139 140 // Set the Authoritative Region if set, otherwise default to 141 // the same as the local region. 142 if agentConfig.Server.AuthoritativeRegion != "" { 143 conf.AuthoritativeRegion = agentConfig.Server.AuthoritativeRegion 144 } else if agentConfig.Region != "" { 145 conf.AuthoritativeRegion = agentConfig.Region 146 } 147 148 if agentConfig.Datacenter != "" { 149 conf.Datacenter = agentConfig.Datacenter 150 } 151 if agentConfig.NodeName != "" { 152 conf.NodeName = agentConfig.NodeName 153 } 154 if agentConfig.Server.BootstrapExpect > 0 { 155 if agentConfig.Server.BootstrapExpect == 1 { 156 conf.Bootstrap = true 157 } else { 158 atomic.StoreInt32(&conf.BootstrapExpect, int32(agentConfig.Server.BootstrapExpect)) 159 } 160 } 161 if agentConfig.DataDir != "" { 162 conf.DataDir = filepath.Join(agentConfig.DataDir, "server") 163 } 164 if agentConfig.Server.DataDir != "" { 165 conf.DataDir = agentConfig.Server.DataDir 166 } 167 if agentConfig.Server.ProtocolVersion != 0 { 168 conf.ProtocolVersion = uint8(agentConfig.Server.ProtocolVersion) 169 } 170 if agentConfig.Server.RaftProtocol != 0 { 171 conf.RaftConfig.ProtocolVersion = raft.ProtocolVersion(agentConfig.Server.RaftProtocol) 172 } 173 if agentConfig.Server.NumSchedulers != nil { 174 conf.NumSchedulers = *agentConfig.Server.NumSchedulers 175 } 176 if len(agentConfig.Server.EnabledSchedulers) != 0 { 177 // Convert to a set and require the core scheduler 178 set := make(map[string]struct{}, 4) 179 set[structs.JobTypeCore] = struct{}{} 180 for _, sched := range agentConfig.Server.EnabledSchedulers { 181 set[sched] = struct{}{} 182 } 183 184 schedulers := make([]string, 0, len(set)) 185 for k := range set { 186 schedulers = append(schedulers, k) 187 } 188 189 conf.EnabledSchedulers = schedulers 190 191 } 192 if agentConfig.ACL.Enabled { 193 conf.ACLEnabled = true 194 } 195 if agentConfig.ACL.ReplicationToken != "" { 196 conf.ReplicationToken = agentConfig.ACL.ReplicationToken 197 } 198 if agentConfig.Sentinel != nil { 199 conf.SentinelConfig = agentConfig.Sentinel 200 } 201 if agentConfig.Server.NonVotingServer { 202 conf.NonVoter = true 203 } 204 if agentConfig.Server.RedundancyZone != "" { 205 conf.RedundancyZone = agentConfig.Server.RedundancyZone 206 } 207 if agentConfig.Server.UpgradeVersion != "" { 208 conf.UpgradeVersion = agentConfig.Server.UpgradeVersion 209 } 210 if agentConfig.Autopilot != nil { 211 if agentConfig.Autopilot.CleanupDeadServers != nil { 212 conf.AutopilotConfig.CleanupDeadServers = *agentConfig.Autopilot.CleanupDeadServers 213 } 214 if agentConfig.Autopilot.ServerStabilizationTime != 0 { 215 conf.AutopilotConfig.ServerStabilizationTime = agentConfig.Autopilot.ServerStabilizationTime 216 } 217 if agentConfig.Autopilot.LastContactThreshold != 0 { 218 conf.AutopilotConfig.LastContactThreshold = agentConfig.Autopilot.LastContactThreshold 219 } 220 if agentConfig.Autopilot.MaxTrailingLogs != 0 { 221 conf.AutopilotConfig.MaxTrailingLogs = uint64(agentConfig.Autopilot.MaxTrailingLogs) 222 } 223 if agentConfig.Autopilot.EnableRedundancyZones != nil { 224 conf.AutopilotConfig.EnableRedundancyZones = *agentConfig.Autopilot.EnableRedundancyZones 225 } 226 if agentConfig.Autopilot.DisableUpgradeMigration != nil { 227 conf.AutopilotConfig.DisableUpgradeMigration = *agentConfig.Autopilot.DisableUpgradeMigration 228 } 229 if agentConfig.Autopilot.EnableCustomUpgrades != nil { 230 conf.AutopilotConfig.EnableCustomUpgrades = *agentConfig.Autopilot.EnableCustomUpgrades 231 } 232 } 233 234 // Set up the bind addresses 235 rpcAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.RPC) 236 if err != nil { 237 return nil, fmt.Errorf("Failed to parse RPC address %q: %v", agentConfig.normalizedAddrs.RPC, err) 238 } 239 serfAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.Serf) 240 if err != nil { 241 return nil, fmt.Errorf("Failed to parse Serf address %q: %v", agentConfig.normalizedAddrs.Serf, err) 242 } 243 conf.RPCAddr.Port = rpcAddr.Port 244 conf.RPCAddr.IP = rpcAddr.IP 245 conf.SerfConfig.MemberlistConfig.BindPort = serfAddr.Port 246 conf.SerfConfig.MemberlistConfig.BindAddr = serfAddr.IP.String() 247 248 // Set up the advertise addresses 249 rpcAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.RPC) 250 if err != nil { 251 return nil, fmt.Errorf("Failed to parse RPC advertise address %q: %v", agentConfig.AdvertiseAddrs.RPC, err) 252 } 253 serfAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.Serf) 254 if err != nil { 255 return nil, fmt.Errorf("Failed to parse Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err) 256 } 257 258 // Server address is the serf advertise address and rpc port. This is the 259 // address that all servers should be able to communicate over RPC with. 260 serverAddr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(serfAddr.IP.String(), fmt.Sprintf("%d", rpcAddr.Port))) 261 if err != nil { 262 return nil, fmt.Errorf("Failed to resolve Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err) 263 } 264 265 conf.SerfConfig.MemberlistConfig.AdvertiseAddr = serfAddr.IP.String() 266 conf.SerfConfig.MemberlistConfig.AdvertisePort = serfAddr.Port 267 conf.ClientRPCAdvertise = rpcAddr 268 conf.ServerRPCAdvertise = serverAddr 269 270 // Set up gc threshold and heartbeat grace period 271 if gcThreshold := agentConfig.Server.NodeGCThreshold; gcThreshold != "" { 272 dur, err := time.ParseDuration(gcThreshold) 273 if err != nil { 274 return nil, err 275 } 276 conf.NodeGCThreshold = dur 277 } 278 if gcThreshold := agentConfig.Server.JobGCThreshold; gcThreshold != "" { 279 dur, err := time.ParseDuration(gcThreshold) 280 if err != nil { 281 return nil, err 282 } 283 conf.JobGCThreshold = dur 284 } 285 if gcThreshold := agentConfig.Server.EvalGCThreshold; gcThreshold != "" { 286 dur, err := time.ParseDuration(gcThreshold) 287 if err != nil { 288 return nil, err 289 } 290 conf.EvalGCThreshold = dur 291 } 292 if gcThreshold := agentConfig.Server.DeploymentGCThreshold; gcThreshold != "" { 293 dur, err := time.ParseDuration(gcThreshold) 294 if err != nil { 295 return nil, err 296 } 297 conf.DeploymentGCThreshold = dur 298 } 299 300 if heartbeatGrace := agentConfig.Server.HeartbeatGrace; heartbeatGrace != 0 { 301 conf.HeartbeatGrace = heartbeatGrace 302 } 303 if min := agentConfig.Server.MinHeartbeatTTL; min != 0 { 304 conf.MinHeartbeatTTL = min 305 } 306 if maxHPS := agentConfig.Server.MaxHeartbeatsPerSecond; maxHPS != 0 { 307 conf.MaxHeartbeatsPerSecond = maxHPS 308 } 309 310 if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ServerServiceName == "" { 311 return nil, fmt.Errorf("server_service_name must be set when auto_advertise is enabled") 312 } 313 314 // Add the Consul and Vault configs 315 conf.ConsulConfig = agentConfig.Consul 316 conf.VaultConfig = agentConfig.Vault 317 318 // Set the TLS config 319 conf.TLSConfig = agentConfig.TLSConfig 320 321 // Setup telemetry related config 322 conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval 323 conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics 324 conf.DisableDispatchedJobSummaryMetrics = agentConfig.Telemetry.DisableDispatchedJobSummaryMetrics 325 conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics 326 327 return conf, nil 328 } 329 330 // serverConfig is used to generate a new server configuration struct 331 // for initializing a nomad server. 332 func (a *Agent) serverConfig() (*nomad.Config, error) { 333 c, err := convertServerConfig(a.config) 334 if err != nil { 335 return nil, err 336 } 337 338 a.finalizeServerConfig(c) 339 return c, nil 340 } 341 342 // finalizeServerConfig sets configuration fields on the server config that are 343 // not staticly convertable and are from the agent. 344 func (a *Agent) finalizeServerConfig(c *nomad.Config) { 345 // Setup the logging 346 c.Logger = a.logger 347 c.LogOutput = a.logOutput 348 349 // Setup the plugin loaders 350 c.PluginLoader = a.pluginLoader 351 c.PluginSingletonLoader = a.pluginSingletonLoader 352 } 353 354 // clientConfig is used to generate a new client configuration struct for 355 // initializing a Nomad client. 356 func (a *Agent) clientConfig() (*clientconfig.Config, error) { 357 c, err := convertClientConfig(a.config) 358 if err != nil { 359 return nil, err 360 } 361 362 if err := a.finalizeClientConfig(c); err != nil { 363 return nil, err 364 } 365 366 return c, nil 367 } 368 369 // finalizeClientConfig sets configuration fields on the client config that are 370 // not staticly convertable and are from the agent. 371 func (a *Agent) finalizeClientConfig(c *clientconfig.Config) error { 372 // Setup the logging 373 c.Logger = a.logger 374 c.LogOutput = a.logOutput 375 376 // If we are running a server, append both its bind and advertise address so 377 // we are able to at least talk to the local server even if that isn't 378 // configured explicitly. This handles both running server and client on one 379 // host and -dev mode. 380 if a.server != nil { 381 if a.config.AdvertiseAddrs == nil || a.config.AdvertiseAddrs.RPC == "" { 382 return fmt.Errorf("AdvertiseAddrs is nil or empty") 383 } else if a.config.normalizedAddrs == nil || a.config.normalizedAddrs.RPC == "" { 384 return fmt.Errorf("normalizedAddrs is nil or empty") 385 } 386 387 c.Servers = append(c.Servers, 388 a.config.normalizedAddrs.RPC, 389 a.config.AdvertiseAddrs.RPC) 390 } 391 392 // Setup the plugin loaders 393 c.PluginLoader = a.pluginLoader 394 c.PluginSingletonLoader = a.pluginSingletonLoader 395 396 // Log deprecation messages about Consul related configuration in client 397 // options 398 var invalidConsulKeys []string 399 for key := range c.Options { 400 if strings.HasPrefix(key, "consul") { 401 invalidConsulKeys = append(invalidConsulKeys, fmt.Sprintf("options.%s", key)) 402 } 403 } 404 if len(invalidConsulKeys) > 0 { 405 a.logger.Warn("invalid consul keys", "keys", strings.Join(invalidConsulKeys, ",")) 406 a.logger.Warn(`Nomad client ignores consul related configuration in client options. 407 Please refer to the guide https://www.nomadproject.io/docs/agent/configuration/consul.html 408 to configure Nomad to work with Consul.`) 409 } 410 411 return nil 412 } 413 414 // convertClientConfig takes an agent config and log output and returns a client 415 // Config. There may be missing fields that must be set by the agent. To do this 416 // call finalizeServerConfig 417 func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) { 418 // Setup the configuration 419 conf := agentConfig.ClientConfig 420 if conf == nil { 421 conf = clientconfig.DefaultConfig() 422 } 423 424 conf.Servers = agentConfig.Client.Servers 425 conf.LogLevel = agentConfig.LogLevel 426 conf.DevMode = agentConfig.DevMode 427 if agentConfig.Region != "" { 428 conf.Region = agentConfig.Region 429 } 430 if agentConfig.DataDir != "" { 431 conf.StateDir = filepath.Join(agentConfig.DataDir, "client") 432 conf.AllocDir = filepath.Join(agentConfig.DataDir, "alloc") 433 } 434 if agentConfig.Client.StateDir != "" { 435 conf.StateDir = agentConfig.Client.StateDir 436 } 437 if agentConfig.Client.AllocDir != "" { 438 conf.AllocDir = agentConfig.Client.AllocDir 439 } 440 if agentConfig.Client.NetworkInterface != "" { 441 conf.NetworkInterface = agentConfig.Client.NetworkInterface 442 } 443 conf.ChrootEnv = agentConfig.Client.ChrootEnv 444 conf.Options = agentConfig.Client.Options 445 if agentConfig.Client.NetworkSpeed != 0 { 446 conf.NetworkSpeed = agentConfig.Client.NetworkSpeed 447 } 448 if agentConfig.Client.CpuCompute != 0 { 449 conf.CpuCompute = agentConfig.Client.CpuCompute 450 } 451 if agentConfig.Client.MemoryMB != 0 { 452 conf.MemoryMB = agentConfig.Client.MemoryMB 453 } 454 if agentConfig.Client.MaxKillTimeout != "" { 455 dur, err := time.ParseDuration(agentConfig.Client.MaxKillTimeout) 456 if err != nil { 457 return nil, fmt.Errorf("Error parsing max kill timeout: %s", err) 458 } 459 conf.MaxKillTimeout = dur 460 } 461 conf.ClientMaxPort = uint(agentConfig.Client.ClientMaxPort) 462 conf.ClientMinPort = uint(agentConfig.Client.ClientMinPort) 463 conf.DisableRemoteExec = agentConfig.Client.DisableRemoteExec 464 465 // Setup the node 466 conf.Node = new(structs.Node) 467 conf.Node.Datacenter = agentConfig.Datacenter 468 conf.Node.Name = agentConfig.NodeName 469 conf.Node.Meta = agentConfig.Client.Meta 470 conf.Node.NodeClass = agentConfig.Client.NodeClass 471 472 // Set up the HTTP advertise address 473 conf.Node.HTTPAddr = agentConfig.AdvertiseAddrs.HTTP 474 475 // Reserve resources on the node. 476 // COMPAT(0.10): Remove in 0.10 477 r := conf.Node.Reserved 478 if r == nil { 479 r = new(structs.Resources) 480 conf.Node.Reserved = r 481 } 482 r.CPU = agentConfig.Client.Reserved.CPU 483 r.MemoryMB = agentConfig.Client.Reserved.MemoryMB 484 r.DiskMB = agentConfig.Client.Reserved.DiskMB 485 486 res := conf.Node.ReservedResources 487 if res == nil { 488 res = new(structs.NodeReservedResources) 489 conf.Node.ReservedResources = res 490 } 491 res.Cpu.CpuShares = int64(agentConfig.Client.Reserved.CPU) 492 res.Memory.MemoryMB = int64(agentConfig.Client.Reserved.MemoryMB) 493 res.Disk.DiskMB = int64(agentConfig.Client.Reserved.DiskMB) 494 res.Networks.ReservedHostPorts = agentConfig.Client.Reserved.ReservedPorts 495 496 conf.Version = agentConfig.Version 497 498 if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ClientServiceName == "" { 499 return nil, fmt.Errorf("client_service_name must be set when auto_advertise is enabled") 500 } 501 502 conf.ConsulConfig = agentConfig.Consul 503 conf.VaultConfig = agentConfig.Vault 504 505 // Set up Telemetry configuration 506 conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval 507 conf.PublishNodeMetrics = agentConfig.Telemetry.PublishNodeMetrics 508 conf.PublishAllocationMetrics = agentConfig.Telemetry.PublishAllocationMetrics 509 conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics 510 conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics 511 512 // Set the TLS related configs 513 conf.TLSConfig = agentConfig.TLSConfig 514 conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP 515 516 // Set the GC related configs 517 conf.GCInterval = agentConfig.Client.GCInterval 518 conf.GCParallelDestroys = agentConfig.Client.GCParallelDestroys 519 conf.GCDiskUsageThreshold = agentConfig.Client.GCDiskUsageThreshold 520 conf.GCInodeUsageThreshold = agentConfig.Client.GCInodeUsageThreshold 521 conf.GCMaxAllocs = agentConfig.Client.GCMaxAllocs 522 if agentConfig.Client.NoHostUUID != nil { 523 conf.NoHostUUID = *agentConfig.Client.NoHostUUID 524 } else { 525 // Default no_host_uuid to true 526 conf.NoHostUUID = true 527 } 528 529 // Setup the ACLs 530 conf.ACLEnabled = agentConfig.ACL.Enabled 531 conf.ACLTokenTTL = agentConfig.ACL.TokenTTL 532 conf.ACLPolicyTTL = agentConfig.ACL.PolicyTTL 533 534 return conf, nil 535 } 536 537 // setupServer is used to setup the server if enabled 538 func (a *Agent) setupServer() error { 539 if !a.config.Server.Enabled { 540 return nil 541 } 542 543 // Setup the configuration 544 conf, err := a.serverConfig() 545 if err != nil { 546 return fmt.Errorf("server config setup failed: %s", err) 547 } 548 549 // Generate a node ID and persist it if it is the first instance, otherwise 550 // read the persisted node ID. 551 if err := a.setupNodeID(conf); err != nil { 552 return fmt.Errorf("setting up server node ID failed: %s", err) 553 } 554 555 // Sets up the keyring for gossip encryption 556 if err := a.setupKeyrings(conf); err != nil { 557 return fmt.Errorf("failed to configure keyring: %v", err) 558 } 559 560 // Create the server 561 server, err := nomad.NewServer(conf, a.consulCatalog) 562 if err != nil { 563 return fmt.Errorf("server setup failed: %v", err) 564 } 565 a.server = server 566 567 // Consul check addresses default to bind but can be toggled to use advertise 568 rpcCheckAddr := a.config.normalizedAddrs.RPC 569 serfCheckAddr := a.config.normalizedAddrs.Serf 570 if *a.config.Consul.ChecksUseAdvertise { 571 rpcCheckAddr = a.config.AdvertiseAddrs.RPC 572 serfCheckAddr = a.config.AdvertiseAddrs.Serf 573 } 574 575 // Create the Nomad Server services for Consul 576 if *a.config.Consul.AutoAdvertise { 577 httpServ := &structs.Service{ 578 Name: a.config.Consul.ServerServiceName, 579 PortLabel: a.config.AdvertiseAddrs.HTTP, 580 Tags: []string{consul.ServiceTagHTTP}, 581 } 582 const isServer = true 583 if check := a.agentHTTPCheck(isServer); check != nil { 584 httpServ.Checks = []*structs.ServiceCheck{check} 585 } 586 rpcServ := &structs.Service{ 587 Name: a.config.Consul.ServerServiceName, 588 PortLabel: a.config.AdvertiseAddrs.RPC, 589 Tags: []string{consul.ServiceTagRPC}, 590 Checks: []*structs.ServiceCheck{ 591 { 592 Name: a.config.Consul.ServerRPCCheckName, 593 Type: "tcp", 594 Interval: serverRpcCheckInterval, 595 Timeout: serverRpcCheckTimeout, 596 PortLabel: rpcCheckAddr, 597 }, 598 }, 599 } 600 serfServ := &structs.Service{ 601 Name: a.config.Consul.ServerServiceName, 602 PortLabel: a.config.AdvertiseAddrs.Serf, 603 Tags: []string{consul.ServiceTagSerf}, 604 Checks: []*structs.ServiceCheck{ 605 { 606 Name: a.config.Consul.ServerSerfCheckName, 607 Type: "tcp", 608 Interval: serverSerfCheckInterval, 609 Timeout: serverSerfCheckTimeout, 610 PortLabel: serfCheckAddr, 611 }, 612 }, 613 } 614 615 // Add the http port check if TLS isn't enabled 616 consulServices := []*structs.Service{ 617 rpcServ, 618 serfServ, 619 httpServ, 620 } 621 if err := a.consulService.RegisterAgent(consulRoleServer, consulServices); err != nil { 622 return err 623 } 624 } 625 626 return nil 627 } 628 629 // setupNodeID will pull the persisted node ID, if any, or create a random one 630 // and persist it. 631 func (a *Agent) setupNodeID(config *nomad.Config) error { 632 // For dev mode we have no filesystem access so just make a node ID. 633 if a.config.DevMode { 634 config.NodeID = uuid.Generate() 635 return nil 636 } 637 638 // Load saved state, if any. Since a user could edit this, we also 639 // validate it. Saved state overwrites any configured node id 640 fileID := filepath.Join(config.DataDir, "node-id") 641 if _, err := os.Stat(fileID); err == nil { 642 rawID, err := ioutil.ReadFile(fileID) 643 if err != nil { 644 return err 645 } 646 647 nodeID := strings.TrimSpace(string(rawID)) 648 nodeID = strings.ToLower(nodeID) 649 if _, err := uuidparse.ParseUUID(nodeID); err != nil { 650 return err 651 } 652 config.NodeID = nodeID 653 return nil 654 } 655 656 // If they've configured a node ID manually then just use that, as 657 // long as it's valid. 658 if config.NodeID != "" { 659 config.NodeID = strings.ToLower(config.NodeID) 660 if _, err := uuidparse.ParseUUID(config.NodeID); err != nil { 661 return err 662 } 663 // Persist this configured nodeID to our data directory 664 if err := lib.EnsurePath(fileID, false); err != nil { 665 return err 666 } 667 if err := ioutil.WriteFile(fileID, []byte(config.NodeID), 0600); err != nil { 668 return err 669 } 670 return nil 671 } 672 673 // If we still don't have a valid node ID, make one. 674 if config.NodeID == "" { 675 id := uuid.Generate() 676 if err := lib.EnsurePath(fileID, false); err != nil { 677 return err 678 } 679 if err := ioutil.WriteFile(fileID, []byte(id), 0600); err != nil { 680 return err 681 } 682 683 config.NodeID = id 684 } 685 return nil 686 } 687 688 // setupKeyrings is used to initialize and load keyrings during agent startup 689 func (a *Agent) setupKeyrings(config *nomad.Config) error { 690 file := filepath.Join(a.config.DataDir, serfKeyring) 691 692 if a.config.Server.EncryptKey == "" { 693 goto LOAD 694 } 695 if _, err := os.Stat(file); err != nil { 696 if err := initKeyring(file, a.config.Server.EncryptKey); err != nil { 697 return err 698 } 699 } 700 701 LOAD: 702 if _, err := os.Stat(file); err == nil { 703 config.SerfConfig.KeyringFile = file 704 } 705 if err := loadKeyringFile(config.SerfConfig); err != nil { 706 return err 707 } 708 // Success! 709 return nil 710 } 711 712 // setupClient is used to setup the client if enabled 713 func (a *Agent) setupClient() error { 714 if !a.config.Client.Enabled { 715 return nil 716 } 717 718 // Setup the configuration 719 conf, err := a.clientConfig() 720 if err != nil { 721 return fmt.Errorf("client setup failed: %v", err) 722 } 723 724 // Reserve some ports for the plugins if we are on Windows 725 if runtime.GOOS == "windows" { 726 if err := a.reservePortsForClient(conf); err != nil { 727 return err 728 } 729 } 730 if conf.StateDBFactory == nil { 731 conf.StateDBFactory = state.GetStateDBFactory(conf.DevMode) 732 } 733 734 client, err := client.NewClient(conf, a.consulCatalog, a.consulService) 735 if err != nil { 736 return fmt.Errorf("client setup failed: %v", err) 737 } 738 a.client = client 739 740 // Create the Nomad Client services for Consul 741 if *a.config.Consul.AutoAdvertise { 742 httpServ := &structs.Service{ 743 Name: a.config.Consul.ClientServiceName, 744 PortLabel: a.config.AdvertiseAddrs.HTTP, 745 Tags: []string{consul.ServiceTagHTTP}, 746 } 747 const isServer = false 748 if check := a.agentHTTPCheck(isServer); check != nil { 749 httpServ.Checks = []*structs.ServiceCheck{check} 750 } 751 if err := a.consulService.RegisterAgent(consulRoleClient, []*structs.Service{httpServ}); err != nil { 752 return err 753 } 754 } 755 756 return nil 757 } 758 759 // agentHTTPCheck returns a health check for the agent's HTTP API if possible. 760 // If no HTTP health check can be supported nil is returned. 761 func (a *Agent) agentHTTPCheck(server bool) *structs.ServiceCheck { 762 // Resolve the http check address 763 httpCheckAddr := a.config.normalizedAddrs.HTTP 764 if *a.config.Consul.ChecksUseAdvertise { 765 httpCheckAddr = a.config.AdvertiseAddrs.HTTP 766 } 767 check := structs.ServiceCheck{ 768 Name: a.config.Consul.ClientHTTPCheckName, 769 Type: "http", 770 Path: "/v1/agent/health?type=client", 771 Protocol: "http", 772 Interval: agentHttpCheckInterval, 773 Timeout: agentHttpCheckTimeout, 774 PortLabel: httpCheckAddr, 775 } 776 // Switch to endpoint that doesn't require a leader for servers 777 if server { 778 check.Name = a.config.Consul.ServerHTTPCheckName 779 check.Path = "/v1/agent/health?type=server" 780 } 781 if !a.config.TLSConfig.EnableHTTP { 782 // No HTTPS, return a plain http check 783 return &check 784 } 785 if a.config.TLSConfig.VerifyHTTPSClient { 786 a.logger.Warn("not registering Nomad HTTPS Health Check because verify_https_client enabled") 787 return nil 788 } 789 790 // HTTPS enabled; skip verification 791 check.Protocol = "https" 792 check.TLSSkipVerify = true 793 return &check 794 } 795 796 // reservePortsForClient reserves a range of ports for the client to use when 797 // it creates various plugins for log collection, executors, drivers, etc 798 func (a *Agent) reservePortsForClient(conf *clientconfig.Config) error { 799 if conf.Node.ReservedResources == nil { 800 conf.Node.ReservedResources = &structs.NodeReservedResources{} 801 } 802 803 res := conf.Node.ReservedResources.Networks.ReservedHostPorts 804 if res == "" { 805 res = fmt.Sprintf("%d-%d", conf.ClientMinPort, conf.ClientMaxPort) 806 } else { 807 res += fmt.Sprintf(",%d-%d", conf.ClientMinPort, conf.ClientMaxPort) 808 } 809 conf.Node.ReservedResources.Networks.ReservedHostPorts = res 810 return nil 811 } 812 813 // findLoopbackDevice iterates through all the interfaces on a machine and 814 // returns the ip addr, mask of the loopback device 815 func (a *Agent) findLoopbackDevice() (string, string, string, error) { 816 var ifcs []net.Interface 817 var err error 818 ifcs, err = net.Interfaces() 819 if err != nil { 820 return "", "", "", err 821 } 822 for _, ifc := range ifcs { 823 addrs, err := ifc.Addrs() 824 if err != nil { 825 return "", "", "", err 826 } 827 for _, addr := range addrs { 828 var ip net.IP 829 switch v := addr.(type) { 830 case *net.IPNet: 831 ip = v.IP 832 case *net.IPAddr: 833 ip = v.IP 834 } 835 if ip.IsLoopback() { 836 if ip.To4() == nil { 837 continue 838 } 839 return ifc.Name, ip.String(), addr.String(), nil 840 } 841 } 842 } 843 844 return "", "", "", fmt.Errorf("no loopback devices with IPV4 addr found") 845 } 846 847 // Leave is used gracefully exit. Clients will inform servers 848 // of their departure so that allocations can be rescheduled. 849 func (a *Agent) Leave() error { 850 if a.client != nil { 851 if err := a.client.Leave(); err != nil { 852 a.logger.Error("client leave failed", "error", err) 853 } 854 } 855 if a.server != nil { 856 if err := a.server.Leave(); err != nil { 857 a.logger.Error("server leave failed", "error", err) 858 } 859 } 860 return nil 861 } 862 863 // Shutdown is used to terminate the agent. 864 func (a *Agent) Shutdown() error { 865 a.shutdownLock.Lock() 866 defer a.shutdownLock.Unlock() 867 868 if a.shutdown { 869 return nil 870 } 871 872 a.logger.Info("requesting shutdown") 873 if a.client != nil { 874 if err := a.client.Shutdown(); err != nil { 875 a.logger.Error("client shutdown failed", "error", err) 876 } 877 } 878 if a.server != nil { 879 if err := a.server.Shutdown(); err != nil { 880 a.logger.Error("server shutdown failed", "error", err) 881 } 882 } 883 884 if err := a.consulService.Shutdown(); err != nil { 885 a.logger.Error("shutting down Consul client failed", "error", err) 886 } 887 888 a.logger.Info("shutdown complete") 889 a.shutdown = true 890 close(a.shutdownCh) 891 return nil 892 } 893 894 // RPC is used to make an RPC call to the Nomad servers 895 func (a *Agent) RPC(method string, args interface{}, reply interface{}) error { 896 if a.server != nil { 897 return a.server.RPC(method, args, reply) 898 } 899 return a.client.RPC(method, args, reply) 900 } 901 902 // Client returns the configured client or nil 903 func (a *Agent) Client() *client.Client { 904 return a.client 905 } 906 907 // Server returns the configured server or nil 908 func (a *Agent) Server() *nomad.Server { 909 return a.server 910 } 911 912 // Stats is used to return statistics for debugging and insight 913 // for various sub-systems 914 func (a *Agent) Stats() map[string]map[string]string { 915 stats := make(map[string]map[string]string) 916 if a.server != nil { 917 subStat := a.server.Stats() 918 for k, v := range subStat { 919 stats[k] = v 920 } 921 } 922 if a.client != nil { 923 subStat := a.client.Stats() 924 for k, v := range subStat { 925 stats[k] = v 926 } 927 } 928 return stats 929 } 930 931 // ShouldReload determines if we should reload the configuration and agent 932 // connections. If the TLS Configuration has not changed, we shouldn't reload. 933 func (a *Agent) ShouldReload(newConfig *Config) (agent, http bool) { 934 a.configLock.Lock() 935 defer a.configLock.Unlock() 936 937 isEqual, err := a.config.TLSConfig.CertificateInfoIsEqual(newConfig.TLSConfig) 938 if err != nil { 939 a.logger.Error("parsing TLS certificate", "error", err) 940 return false, false 941 } else if !isEqual { 942 return true, true 943 } 944 945 // Allow the ability to only reload HTTP connections 946 if a.config.TLSConfig.EnableHTTP != newConfig.TLSConfig.EnableHTTP { 947 http = true 948 agent = true 949 } 950 951 // Allow the ability to only reload HTTP connections 952 if a.config.TLSConfig.EnableRPC != newConfig.TLSConfig.EnableRPC { 953 agent = true 954 } 955 956 return agent, http 957 } 958 959 // Reload handles configuration changes for the agent. Provides a method that 960 // is easier to unit test, as this action is invoked via SIGHUP. 961 func (a *Agent) Reload(newConfig *Config) error { 962 a.configLock.Lock() 963 defer a.configLock.Unlock() 964 965 if newConfig == nil || newConfig.TLSConfig == nil { 966 return fmt.Errorf("cannot reload agent with nil configuration") 967 } 968 969 // This is just a TLS configuration reload, we don't need to refresh 970 // existing network connections 971 if !a.config.TLSConfig.IsEmpty() && !newConfig.TLSConfig.IsEmpty() { 972 973 // Reload the certificates on the keyloader and on success store the 974 // updated TLS config. It is important to reuse the same keyloader 975 // as this allows us to dynamically reload configurations not only 976 // on the Agent but on the Server and Client too (they are 977 // referencing the same keyloader). 978 keyloader := a.config.TLSConfig.GetKeyLoader() 979 _, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile) 980 if err != nil { 981 return err 982 } 983 a.config.TLSConfig = newConfig.TLSConfig 984 a.config.TLSConfig.KeyLoader = keyloader 985 return nil 986 } 987 988 // Completely reload the agent's TLS configuration (moving from non-TLS to 989 // TLS, or vice versa) 990 // This does not handle errors in loading the new TLS configuration 991 a.config.TLSConfig = newConfig.TLSConfig.Copy() 992 993 if newConfig.TLSConfig.IsEmpty() { 994 a.logger.Warn("downgrading agent's existing TLS configuration to plaintext") 995 } else { 996 a.logger.Info("upgrading from plaintext configuration to TLS") 997 } 998 999 return nil 1000 } 1001 1002 // GetConfig creates a locked reference to the agent's config 1003 func (a *Agent) GetConfig() *Config { 1004 a.configLock.Lock() 1005 defer a.configLock.Unlock() 1006 1007 return a.config 1008 } 1009 1010 // setupConsul creates the Consul client and starts its main Run loop. 1011 func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error { 1012 apiConf, err := consulConfig.ApiConfig() 1013 if err != nil { 1014 return err 1015 } 1016 client, err := api.NewClient(apiConf) 1017 if err != nil { 1018 return err 1019 } 1020 1021 // Determine version for TLSSkipVerify 1022 1023 // Create Consul Catalog client for service discovery. 1024 a.consulCatalog = client.Catalog() 1025 1026 // Create Consul Service client for service advertisement and checks. 1027 isClient := false 1028 if a.config.Client != nil && a.config.Client.Enabled { 1029 isClient = true 1030 } 1031 a.consulService = consul.NewServiceClient(client.Agent(), a.logger, isClient) 1032 1033 // Run the Consul service client's sync'ing main loop 1034 go a.consulService.Run() 1035 return nil 1036 }