github.com/quite/nomad@v0.8.6/command/agent/agent.go (about) 1 package agent 2 3 import ( 4 "fmt" 5 "io" 6 "io/ioutil" 7 "log" 8 "net" 9 "os" 10 "path/filepath" 11 "runtime" 12 "strings" 13 "sync" 14 "sync/atomic" 15 "time" 16 17 metrics "github.com/armon/go-metrics" 18 "github.com/hashicorp/consul/api" 19 "github.com/hashicorp/consul/lib" 20 uuidparse "github.com/hashicorp/go-uuid" 21 "github.com/hashicorp/nomad/client" 22 clientconfig "github.com/hashicorp/nomad/client/config" 23 "github.com/hashicorp/nomad/command/agent/consul" 24 "github.com/hashicorp/nomad/helper/uuid" 25 "github.com/hashicorp/nomad/nomad" 26 "github.com/hashicorp/nomad/nomad/structs" 27 "github.com/hashicorp/nomad/nomad/structs/config" 28 "github.com/hashicorp/raft" 29 ) 30 31 const ( 32 agentHttpCheckInterval = 10 * time.Second 33 agentHttpCheckTimeout = 5 * time.Second 34 serverRpcCheckInterval = 10 * time.Second 35 serverRpcCheckTimeout = 3 * time.Second 36 serverSerfCheckInterval = 10 * time.Second 37 serverSerfCheckTimeout = 3 * time.Second 38 39 // roles used in identifying Consul entries for Nomad agents 40 consulRoleServer = "server" 41 consulRoleClient = "client" 42 ) 43 44 // Agent is a long running daemon that is used to run both 45 // clients and servers. Servers are responsible for managing 46 // state and making scheduling decisions. Clients can be 47 // scheduled to, and are responsible for interfacing with 48 // servers to run allocations. 49 type Agent struct { 50 config *Config 51 configLock sync.Mutex 52 53 logger *log.Logger 54 logOutput io.Writer 55 56 // consulService is Nomad's custom Consul client for managing services 57 // and checks. 58 consulService *consul.ServiceClient 59 60 // consulCatalog is the subset of Consul's Catalog API Nomad uses. 61 consulCatalog consul.CatalogAPI 62 63 client *client.Client 64 65 server *nomad.Server 66 67 shutdown bool 68 shutdownCh chan struct{} 69 shutdownLock sync.Mutex 70 71 InmemSink *metrics.InmemSink 72 } 73 74 // NewAgent is used to create a new agent with the given configuration 75 func NewAgent(config *Config, logOutput io.Writer, inmem *metrics.InmemSink) (*Agent, error) { 76 a := &Agent{ 77 config: config, 78 logger: log.New(logOutput, "", log.LstdFlags|log.Lmicroseconds), 79 logOutput: logOutput, 80 shutdownCh: make(chan struct{}), 81 InmemSink: inmem, 82 } 83 84 // Global logger should match internal logger as much as possible 85 log.SetFlags(log.LstdFlags | log.Lmicroseconds) 86 87 if err := a.setupConsul(config.Consul); err != nil { 88 return nil, fmt.Errorf("Failed to initialize Consul client: %v", err) 89 } 90 if err := a.setupServer(); err != nil { 91 return nil, err 92 } 93 if err := a.setupClient(); err != nil { 94 return nil, err 95 } 96 if a.client == nil && a.server == nil { 97 return nil, fmt.Errorf("must have at least client or server mode enabled") 98 } 99 100 return a, nil 101 } 102 103 // convertServerConfig takes an agent config and log output and returns a Nomad 104 // Config. 105 func convertServerConfig(agentConfig *Config, logOutput io.Writer) (*nomad.Config, error) { 106 conf := agentConfig.NomadConfig 107 if conf == nil { 108 conf = nomad.DefaultConfig() 109 } 110 conf.LogOutput = logOutput 111 conf.DevMode = agentConfig.DevMode 112 conf.Build = agentConfig.Version.VersionNumber() 113 if agentConfig.Region != "" { 114 conf.Region = agentConfig.Region 115 } 116 117 // Set the Authoritative Region if set, otherwise default to 118 // the same as the local region. 119 if agentConfig.Server.AuthoritativeRegion != "" { 120 conf.AuthoritativeRegion = agentConfig.Server.AuthoritativeRegion 121 } else if agentConfig.Region != "" { 122 conf.AuthoritativeRegion = agentConfig.Region 123 } 124 125 if agentConfig.Datacenter != "" { 126 conf.Datacenter = agentConfig.Datacenter 127 } 128 if agentConfig.NodeName != "" { 129 conf.NodeName = agentConfig.NodeName 130 } 131 if agentConfig.Server.BootstrapExpect > 0 { 132 if agentConfig.Server.BootstrapExpect == 1 { 133 conf.Bootstrap = true 134 } else { 135 atomic.StoreInt32(&conf.BootstrapExpect, int32(agentConfig.Server.BootstrapExpect)) 136 } 137 } 138 if agentConfig.DataDir != "" { 139 conf.DataDir = filepath.Join(agentConfig.DataDir, "server") 140 } 141 if agentConfig.Server.DataDir != "" { 142 conf.DataDir = agentConfig.Server.DataDir 143 } 144 if agentConfig.Server.ProtocolVersion != 0 { 145 conf.ProtocolVersion = uint8(agentConfig.Server.ProtocolVersion) 146 } 147 if agentConfig.Server.RaftProtocol != 0 { 148 conf.RaftConfig.ProtocolVersion = raft.ProtocolVersion(agentConfig.Server.RaftProtocol) 149 } 150 if agentConfig.Server.NumSchedulers != nil { 151 conf.NumSchedulers = *agentConfig.Server.NumSchedulers 152 } 153 if len(agentConfig.Server.EnabledSchedulers) != 0 { 154 // Convert to a set and require the core scheduler 155 set := make(map[string]struct{}, 4) 156 set[structs.JobTypeCore] = struct{}{} 157 for _, sched := range agentConfig.Server.EnabledSchedulers { 158 set[sched] = struct{}{} 159 } 160 161 schedulers := make([]string, 0, len(set)) 162 for k := range set { 163 schedulers = append(schedulers, k) 164 } 165 166 conf.EnabledSchedulers = schedulers 167 168 } 169 if agentConfig.ACL.Enabled { 170 conf.ACLEnabled = true 171 } 172 if agentConfig.ACL.ReplicationToken != "" { 173 conf.ReplicationToken = agentConfig.ACL.ReplicationToken 174 } 175 if agentConfig.Sentinel != nil { 176 conf.SentinelConfig = agentConfig.Sentinel 177 } 178 if agentConfig.Server.NonVotingServer { 179 conf.NonVoter = true 180 } 181 if agentConfig.Server.RedundancyZone != "" { 182 conf.RedundancyZone = agentConfig.Server.RedundancyZone 183 } 184 if agentConfig.Server.UpgradeVersion != "" { 185 conf.UpgradeVersion = agentConfig.Server.UpgradeVersion 186 } 187 if agentConfig.Autopilot != nil { 188 if agentConfig.Autopilot.CleanupDeadServers != nil { 189 conf.AutopilotConfig.CleanupDeadServers = *agentConfig.Autopilot.CleanupDeadServers 190 } 191 if agentConfig.Autopilot.ServerStabilizationTime != 0 { 192 conf.AutopilotConfig.ServerStabilizationTime = agentConfig.Autopilot.ServerStabilizationTime 193 } 194 if agentConfig.Autopilot.LastContactThreshold != 0 { 195 conf.AutopilotConfig.LastContactThreshold = agentConfig.Autopilot.LastContactThreshold 196 } 197 if agentConfig.Autopilot.MaxTrailingLogs != 0 { 198 conf.AutopilotConfig.MaxTrailingLogs = uint64(agentConfig.Autopilot.MaxTrailingLogs) 199 } 200 if agentConfig.Autopilot.EnableRedundancyZones != nil { 201 conf.AutopilotConfig.EnableRedundancyZones = *agentConfig.Autopilot.EnableRedundancyZones 202 } 203 if agentConfig.Autopilot.DisableUpgradeMigration != nil { 204 conf.AutopilotConfig.DisableUpgradeMigration = *agentConfig.Autopilot.DisableUpgradeMigration 205 } 206 if agentConfig.Autopilot.EnableCustomUpgrades != nil { 207 conf.AutopilotConfig.EnableCustomUpgrades = *agentConfig.Autopilot.EnableCustomUpgrades 208 } 209 } 210 211 // Set up the bind addresses 212 rpcAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.RPC) 213 if err != nil { 214 return nil, fmt.Errorf("Failed to parse RPC address %q: %v", agentConfig.normalizedAddrs.RPC, err) 215 } 216 serfAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.Serf) 217 if err != nil { 218 return nil, fmt.Errorf("Failed to parse Serf address %q: %v", agentConfig.normalizedAddrs.Serf, err) 219 } 220 conf.RPCAddr.Port = rpcAddr.Port 221 conf.RPCAddr.IP = rpcAddr.IP 222 conf.SerfConfig.MemberlistConfig.BindPort = serfAddr.Port 223 conf.SerfConfig.MemberlistConfig.BindAddr = serfAddr.IP.String() 224 225 // Set up the advertise addresses 226 rpcAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.RPC) 227 if err != nil { 228 return nil, fmt.Errorf("Failed to parse RPC advertise address %q: %v", agentConfig.AdvertiseAddrs.RPC, err) 229 } 230 serfAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.Serf) 231 if err != nil { 232 return nil, fmt.Errorf("Failed to parse Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err) 233 } 234 235 // Server address is the serf advertise address and rpc port. This is the 236 // address that all servers should be able to communicate over RPC with. 237 serverAddr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(serfAddr.IP.String(), fmt.Sprintf("%d", rpcAddr.Port))) 238 if err != nil { 239 return nil, fmt.Errorf("Failed to resolve Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err) 240 } 241 242 conf.SerfConfig.MemberlistConfig.AdvertiseAddr = serfAddr.IP.String() 243 conf.SerfConfig.MemberlistConfig.AdvertisePort = serfAddr.Port 244 conf.ClientRPCAdvertise = rpcAddr 245 conf.ServerRPCAdvertise = serverAddr 246 247 // Set up gc threshold and heartbeat grace period 248 if gcThreshold := agentConfig.Server.NodeGCThreshold; gcThreshold != "" { 249 dur, err := time.ParseDuration(gcThreshold) 250 if err != nil { 251 return nil, err 252 } 253 conf.NodeGCThreshold = dur 254 } 255 if gcThreshold := agentConfig.Server.JobGCThreshold; gcThreshold != "" { 256 dur, err := time.ParseDuration(gcThreshold) 257 if err != nil { 258 return nil, err 259 } 260 conf.JobGCThreshold = dur 261 } 262 if gcThreshold := agentConfig.Server.EvalGCThreshold; gcThreshold != "" { 263 dur, err := time.ParseDuration(gcThreshold) 264 if err != nil { 265 return nil, err 266 } 267 conf.EvalGCThreshold = dur 268 } 269 if gcThreshold := agentConfig.Server.DeploymentGCThreshold; gcThreshold != "" { 270 dur, err := time.ParseDuration(gcThreshold) 271 if err != nil { 272 return nil, err 273 } 274 conf.DeploymentGCThreshold = dur 275 } 276 277 if heartbeatGrace := agentConfig.Server.HeartbeatGrace; heartbeatGrace != 0 { 278 conf.HeartbeatGrace = heartbeatGrace 279 } 280 if min := agentConfig.Server.MinHeartbeatTTL; min != 0 { 281 conf.MinHeartbeatTTL = min 282 } 283 if maxHPS := agentConfig.Server.MaxHeartbeatsPerSecond; maxHPS != 0 { 284 conf.MaxHeartbeatsPerSecond = maxHPS 285 } 286 287 if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ServerServiceName == "" { 288 return nil, fmt.Errorf("server_service_name must be set when auto_advertise is enabled") 289 } 290 291 // Add the Consul and Vault configs 292 conf.ConsulConfig = agentConfig.Consul 293 conf.VaultConfig = agentConfig.Vault 294 295 // Set the TLS config 296 conf.TLSConfig = agentConfig.TLSConfig 297 298 // Setup telemetry related config 299 conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval 300 conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics 301 conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics 302 303 return conf, nil 304 } 305 306 // serverConfig is used to generate a new server configuration struct 307 // for initializing a nomad server. 308 func (a *Agent) serverConfig() (*nomad.Config, error) { 309 return convertServerConfig(a.config, a.logOutput) 310 } 311 312 // clientConfig is used to generate a new client configuration struct 313 // for initializing a Nomad client. 314 func (a *Agent) clientConfig() (*clientconfig.Config, error) { 315 // Setup the configuration 316 conf := a.config.ClientConfig 317 if conf == nil { 318 conf = clientconfig.DefaultConfig() 319 } 320 321 // If we are running a server, append both its bind and advertise address so 322 // we are able to at least talk to the local server even if that isn't 323 // configured explicitly. This handles both running server and client on one 324 // host and -dev mode. 325 conf.Servers = a.config.Client.Servers 326 if a.server != nil { 327 if a.config.AdvertiseAddrs == nil || a.config.AdvertiseAddrs.RPC == "" { 328 return nil, fmt.Errorf("AdvertiseAddrs is nil or empty") 329 } else if a.config.normalizedAddrs == nil || a.config.normalizedAddrs.RPC == "" { 330 return nil, fmt.Errorf("normalizedAddrs is nil or empty") 331 } 332 333 conf.Servers = append(conf.Servers, 334 a.config.normalizedAddrs.RPC, 335 a.config.AdvertiseAddrs.RPC) 336 } 337 338 conf.LogOutput = a.logOutput 339 conf.LogLevel = a.config.LogLevel 340 conf.DevMode = a.config.DevMode 341 if a.config.Region != "" { 342 conf.Region = a.config.Region 343 } 344 if a.config.DataDir != "" { 345 conf.StateDir = filepath.Join(a.config.DataDir, "client") 346 conf.AllocDir = filepath.Join(a.config.DataDir, "alloc") 347 } 348 if a.config.Client.StateDir != "" { 349 conf.StateDir = a.config.Client.StateDir 350 } 351 if a.config.Client.AllocDir != "" { 352 conf.AllocDir = a.config.Client.AllocDir 353 } 354 if a.config.Client.NetworkInterface != "" { 355 conf.NetworkInterface = a.config.Client.NetworkInterface 356 } 357 conf.ChrootEnv = a.config.Client.ChrootEnv 358 conf.Options = a.config.Client.Options 359 // Logging deprecation messages about consul related configuration in client 360 // options 361 var invalidConsulKeys []string 362 for key := range conf.Options { 363 if strings.HasPrefix(key, "consul") { 364 invalidConsulKeys = append(invalidConsulKeys, fmt.Sprintf("options.%s", key)) 365 } 366 } 367 if len(invalidConsulKeys) > 0 { 368 a.logger.Printf("[WARN] agent: Invalid keys: %v", strings.Join(invalidConsulKeys, ",")) 369 a.logger.Printf(`Nomad client ignores consul related configuration in client options. 370 Please refer to the guide https://www.nomadproject.io/docs/agent/configuration/consul.html 371 to configure Nomad to work with Consul.`) 372 } 373 374 if a.config.Client.NetworkSpeed != 0 { 375 conf.NetworkSpeed = a.config.Client.NetworkSpeed 376 } 377 if a.config.Client.CpuCompute != 0 { 378 conf.CpuCompute = a.config.Client.CpuCompute 379 } 380 if a.config.Client.MemoryMB != 0 { 381 conf.MemoryMB = a.config.Client.MemoryMB 382 } 383 if a.config.Client.MaxKillTimeout != "" { 384 dur, err := time.ParseDuration(a.config.Client.MaxKillTimeout) 385 if err != nil { 386 return nil, fmt.Errorf("Error parsing max kill timeout: %s", err) 387 } 388 conf.MaxKillTimeout = dur 389 } 390 conf.ClientMaxPort = uint(a.config.Client.ClientMaxPort) 391 conf.ClientMinPort = uint(a.config.Client.ClientMinPort) 392 393 // Setup the node 394 conf.Node = new(structs.Node) 395 conf.Node.Datacenter = a.config.Datacenter 396 conf.Node.Name = a.config.NodeName 397 conf.Node.Meta = a.config.Client.Meta 398 conf.Node.NodeClass = a.config.Client.NodeClass 399 400 // Set up the HTTP advertise address 401 conf.Node.HTTPAddr = a.config.AdvertiseAddrs.HTTP 402 403 // Reserve resources on the node. 404 r := conf.Node.Reserved 405 if r == nil { 406 r = new(structs.Resources) 407 conf.Node.Reserved = r 408 } 409 r.CPU = a.config.Client.Reserved.CPU 410 r.MemoryMB = a.config.Client.Reserved.MemoryMB 411 r.DiskMB = a.config.Client.Reserved.DiskMB 412 r.IOPS = a.config.Client.Reserved.IOPS 413 conf.GloballyReservedPorts = a.config.Client.Reserved.ParsedReservedPorts 414 415 conf.Version = a.config.Version 416 417 if *a.config.Consul.AutoAdvertise && a.config.Consul.ClientServiceName == "" { 418 return nil, fmt.Errorf("client_service_name must be set when auto_advertise is enabled") 419 } 420 421 conf.ConsulConfig = a.config.Consul 422 conf.VaultConfig = a.config.Vault 423 424 // Set up Telemetry configuration 425 conf.StatsCollectionInterval = a.config.Telemetry.collectionInterval 426 conf.PublishNodeMetrics = a.config.Telemetry.PublishNodeMetrics 427 conf.PublishAllocationMetrics = a.config.Telemetry.PublishAllocationMetrics 428 conf.DisableTaggedMetrics = a.config.Telemetry.DisableTaggedMetrics 429 conf.BackwardsCompatibleMetrics = a.config.Telemetry.BackwardsCompatibleMetrics 430 431 // Set the TLS related configs 432 conf.TLSConfig = a.config.TLSConfig 433 conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP 434 435 // Set the GC related configs 436 conf.GCInterval = a.config.Client.GCInterval 437 conf.GCParallelDestroys = a.config.Client.GCParallelDestroys 438 conf.GCDiskUsageThreshold = a.config.Client.GCDiskUsageThreshold 439 conf.GCInodeUsageThreshold = a.config.Client.GCInodeUsageThreshold 440 conf.GCMaxAllocs = a.config.Client.GCMaxAllocs 441 if a.config.Client.NoHostUUID != nil { 442 conf.NoHostUUID = *a.config.Client.NoHostUUID 443 } else { 444 // Default no_host_uuid to true 445 conf.NoHostUUID = true 446 } 447 448 // Setup the ACLs 449 conf.ACLEnabled = a.config.ACL.Enabled 450 conf.ACLTokenTTL = a.config.ACL.TokenTTL 451 conf.ACLPolicyTTL = a.config.ACL.PolicyTTL 452 453 return conf, nil 454 } 455 456 // setupServer is used to setup the server if enabled 457 func (a *Agent) setupServer() error { 458 if !a.config.Server.Enabled { 459 return nil 460 } 461 462 // Setup the configuration 463 conf, err := a.serverConfig() 464 if err != nil { 465 return fmt.Errorf("server config setup failed: %s", err) 466 } 467 468 // Generate a node ID and persist it if it is the first instance, otherwise 469 // read the persisted node ID. 470 if err := a.setupNodeID(conf); err != nil { 471 return fmt.Errorf("setting up server node ID failed: %s", err) 472 } 473 474 // Sets up the keyring for gossip encryption 475 if err := a.setupKeyrings(conf); err != nil { 476 return fmt.Errorf("failed to configure keyring: %v", err) 477 } 478 479 // Create the server 480 server, err := nomad.NewServer(conf, a.consulCatalog, a.logger) 481 if err != nil { 482 return fmt.Errorf("server setup failed: %v", err) 483 } 484 a.server = server 485 486 // Consul check addresses default to bind but can be toggled to use advertise 487 rpcCheckAddr := a.config.normalizedAddrs.RPC 488 serfCheckAddr := a.config.normalizedAddrs.Serf 489 if *a.config.Consul.ChecksUseAdvertise { 490 rpcCheckAddr = a.config.AdvertiseAddrs.RPC 491 serfCheckAddr = a.config.AdvertiseAddrs.Serf 492 } 493 494 // Create the Nomad Server services for Consul 495 if *a.config.Consul.AutoAdvertise { 496 httpServ := &structs.Service{ 497 Name: a.config.Consul.ServerServiceName, 498 PortLabel: a.config.AdvertiseAddrs.HTTP, 499 Tags: []string{consul.ServiceTagHTTP}, 500 } 501 const isServer = true 502 if check := a.agentHTTPCheck(isServer); check != nil { 503 httpServ.Checks = []*structs.ServiceCheck{check} 504 } 505 rpcServ := &structs.Service{ 506 Name: a.config.Consul.ServerServiceName, 507 PortLabel: a.config.AdvertiseAddrs.RPC, 508 Tags: []string{consul.ServiceTagRPC}, 509 Checks: []*structs.ServiceCheck{ 510 { 511 Name: a.config.Consul.ServerRPCCheckName, 512 Type: "tcp", 513 Interval: serverRpcCheckInterval, 514 Timeout: serverRpcCheckTimeout, 515 PortLabel: rpcCheckAddr, 516 }, 517 }, 518 } 519 serfServ := &structs.Service{ 520 Name: a.config.Consul.ServerServiceName, 521 PortLabel: a.config.AdvertiseAddrs.Serf, 522 Tags: []string{consul.ServiceTagSerf}, 523 Checks: []*structs.ServiceCheck{ 524 { 525 Name: a.config.Consul.ServerSerfCheckName, 526 Type: "tcp", 527 Interval: serverSerfCheckInterval, 528 Timeout: serverSerfCheckTimeout, 529 PortLabel: serfCheckAddr, 530 }, 531 }, 532 } 533 534 // Add the http port check if TLS isn't enabled 535 consulServices := []*structs.Service{ 536 rpcServ, 537 serfServ, 538 httpServ, 539 } 540 if err := a.consulService.RegisterAgent(consulRoleServer, consulServices); err != nil { 541 return err 542 } 543 } 544 545 return nil 546 } 547 548 // setupNodeID will pull the persisted node ID, if any, or create a random one 549 // and persist it. 550 func (a *Agent) setupNodeID(config *nomad.Config) error { 551 // For dev mode we have no filesystem access so just make a node ID. 552 if a.config.DevMode { 553 config.NodeID = uuid.Generate() 554 return nil 555 } 556 557 // Load saved state, if any. Since a user could edit this, we also 558 // validate it. Saved state overwrites any configured node id 559 fileID := filepath.Join(config.DataDir, "node-id") 560 if _, err := os.Stat(fileID); err == nil { 561 rawID, err := ioutil.ReadFile(fileID) 562 if err != nil { 563 return err 564 } 565 566 nodeID := strings.TrimSpace(string(rawID)) 567 nodeID = strings.ToLower(nodeID) 568 if _, err := uuidparse.ParseUUID(nodeID); err != nil { 569 return err 570 } 571 config.NodeID = nodeID 572 return nil 573 } 574 575 // If they've configured a node ID manually then just use that, as 576 // long as it's valid. 577 if config.NodeID != "" { 578 config.NodeID = strings.ToLower(config.NodeID) 579 if _, err := uuidparse.ParseUUID(config.NodeID); err != nil { 580 return err 581 } 582 // Persist this configured nodeID to our data directory 583 if err := lib.EnsurePath(fileID, false); err != nil { 584 return err 585 } 586 if err := ioutil.WriteFile(fileID, []byte(config.NodeID), 0600); err != nil { 587 return err 588 } 589 return nil 590 } 591 592 // If we still don't have a valid node ID, make one. 593 if config.NodeID == "" { 594 id := uuid.Generate() 595 if err := lib.EnsurePath(fileID, false); err != nil { 596 return err 597 } 598 if err := ioutil.WriteFile(fileID, []byte(id), 0600); err != nil { 599 return err 600 } 601 602 config.NodeID = id 603 } 604 return nil 605 } 606 607 // setupKeyrings is used to initialize and load keyrings during agent startup 608 func (a *Agent) setupKeyrings(config *nomad.Config) error { 609 file := filepath.Join(a.config.DataDir, serfKeyring) 610 611 if a.config.Server.EncryptKey == "" { 612 goto LOAD 613 } 614 if _, err := os.Stat(file); err != nil { 615 if err := initKeyring(file, a.config.Server.EncryptKey); err != nil { 616 return err 617 } 618 } 619 620 LOAD: 621 if _, err := os.Stat(file); err == nil { 622 config.SerfConfig.KeyringFile = file 623 } 624 if err := loadKeyringFile(config.SerfConfig); err != nil { 625 return err 626 } 627 // Success! 628 return nil 629 } 630 631 // setupClient is used to setup the client if enabled 632 func (a *Agent) setupClient() error { 633 if !a.config.Client.Enabled { 634 return nil 635 } 636 637 // Setup the configuration 638 conf, err := a.clientConfig() 639 if err != nil { 640 return fmt.Errorf("client setup failed: %v", err) 641 } 642 643 // Reserve some ports for the plugins if we are on Windows 644 if runtime.GOOS == "windows" { 645 if err := a.reservePortsForClient(conf); err != nil { 646 return err 647 } 648 } 649 650 client, err := client.NewClient(conf, a.consulCatalog, a.consulService, a.logger) 651 if err != nil { 652 return fmt.Errorf("client setup failed: %v", err) 653 } 654 a.client = client 655 656 // Create the Nomad Client services for Consul 657 if *a.config.Consul.AutoAdvertise { 658 httpServ := &structs.Service{ 659 Name: a.config.Consul.ClientServiceName, 660 PortLabel: a.config.AdvertiseAddrs.HTTP, 661 Tags: []string{consul.ServiceTagHTTP}, 662 } 663 const isServer = false 664 if check := a.agentHTTPCheck(isServer); check != nil { 665 httpServ.Checks = []*structs.ServiceCheck{check} 666 } 667 if err := a.consulService.RegisterAgent(consulRoleClient, []*structs.Service{httpServ}); err != nil { 668 return err 669 } 670 } 671 672 return nil 673 } 674 675 // agentHTTPCheck returns a health check for the agent's HTTP API if possible. 676 // If no HTTP health check can be supported nil is returned. 677 func (a *Agent) agentHTTPCheck(server bool) *structs.ServiceCheck { 678 // Resolve the http check address 679 httpCheckAddr := a.config.normalizedAddrs.HTTP 680 if *a.config.Consul.ChecksUseAdvertise { 681 httpCheckAddr = a.config.AdvertiseAddrs.HTTP 682 } 683 check := structs.ServiceCheck{ 684 Name: a.config.Consul.ClientHTTPCheckName, 685 Type: "http", 686 Path: "/v1/agent/health?type=client", 687 Protocol: "http", 688 Interval: agentHttpCheckInterval, 689 Timeout: agentHttpCheckTimeout, 690 PortLabel: httpCheckAddr, 691 } 692 // Switch to endpoint that doesn't require a leader for servers 693 if server { 694 check.Name = a.config.Consul.ServerHTTPCheckName 695 check.Path = "/v1/agent/health?type=server" 696 } 697 if !a.config.TLSConfig.EnableHTTP { 698 // No HTTPS, return a plain http check 699 return &check 700 } 701 if a.config.TLSConfig.VerifyHTTPSClient { 702 a.logger.Printf("[WARN] agent: not registering Nomad HTTPS Health Check because verify_https_client enabled") 703 return nil 704 } 705 706 // HTTPS enabled; skip verification 707 check.Protocol = "https" 708 check.TLSSkipVerify = true 709 return &check 710 } 711 712 // reservePortsForClient reserves a range of ports for the client to use when 713 // it creates various plugins for log collection, executors, drivers, etc 714 func (a *Agent) reservePortsForClient(conf *clientconfig.Config) error { 715 // finding the device name for loopback 716 deviceName, addr, mask, err := a.findLoopbackDevice() 717 if err != nil { 718 return fmt.Errorf("error finding the device name for loopback: %v", err) 719 } 720 721 // seeing if the user has already reserved some resources on this device 722 var nr *structs.NetworkResource 723 if conf.Node.Reserved == nil { 724 conf.Node.Reserved = &structs.Resources{} 725 } 726 for _, n := range conf.Node.Reserved.Networks { 727 if n.Device == deviceName { 728 nr = n 729 } 730 } 731 // If the user hasn't already created the device, we create it 732 if nr == nil { 733 nr = &structs.NetworkResource{ 734 Device: deviceName, 735 IP: addr, 736 CIDR: mask, 737 ReservedPorts: make([]structs.Port, 0), 738 } 739 } 740 // appending the port ranges we want to use for the client to the list of 741 // reserved ports for this device 742 for i := conf.ClientMinPort; i <= conf.ClientMaxPort; i++ { 743 nr.ReservedPorts = append(nr.ReservedPorts, structs.Port{Label: fmt.Sprintf("plugin-%d", i), Value: int(i)}) 744 } 745 conf.Node.Reserved.Networks = append(conf.Node.Reserved.Networks, nr) 746 return nil 747 } 748 749 // findLoopbackDevice iterates through all the interfaces on a machine and 750 // returns the ip addr, mask of the loopback device 751 func (a *Agent) findLoopbackDevice() (string, string, string, error) { 752 var ifcs []net.Interface 753 var err error 754 ifcs, err = net.Interfaces() 755 if err != nil { 756 return "", "", "", err 757 } 758 for _, ifc := range ifcs { 759 addrs, err := ifc.Addrs() 760 if err != nil { 761 return "", "", "", err 762 } 763 for _, addr := range addrs { 764 var ip net.IP 765 switch v := addr.(type) { 766 case *net.IPNet: 767 ip = v.IP 768 case *net.IPAddr: 769 ip = v.IP 770 } 771 if ip.IsLoopback() { 772 if ip.To4() == nil { 773 continue 774 } 775 return ifc.Name, ip.String(), addr.String(), nil 776 } 777 } 778 } 779 780 return "", "", "", fmt.Errorf("no loopback devices with IPV4 addr found") 781 } 782 783 // Leave is used gracefully exit. Clients will inform servers 784 // of their departure so that allocations can be rescheduled. 785 func (a *Agent) Leave() error { 786 if a.client != nil { 787 if err := a.client.Leave(); err != nil { 788 a.logger.Printf("[ERR] agent: client leave failed: %v", err) 789 } 790 } 791 if a.server != nil { 792 if err := a.server.Leave(); err != nil { 793 a.logger.Printf("[ERR] agent: server leave failed: %v", err) 794 } 795 } 796 return nil 797 } 798 799 // Shutdown is used to terminate the agent. 800 func (a *Agent) Shutdown() error { 801 a.shutdownLock.Lock() 802 defer a.shutdownLock.Unlock() 803 804 if a.shutdown { 805 return nil 806 } 807 808 a.logger.Println("[INFO] agent: requesting shutdown") 809 if a.client != nil { 810 if err := a.client.Shutdown(); err != nil { 811 a.logger.Printf("[ERR] agent: client shutdown failed: %v", err) 812 } 813 } 814 if a.server != nil { 815 if err := a.server.Shutdown(); err != nil { 816 a.logger.Printf("[ERR] agent: server shutdown failed: %v", err) 817 } 818 } 819 820 if err := a.consulService.Shutdown(); err != nil { 821 a.logger.Printf("[ERR] agent: shutting down Consul client failed: %v", err) 822 } 823 824 a.logger.Println("[INFO] agent: shutdown complete") 825 a.shutdown = true 826 close(a.shutdownCh) 827 return nil 828 } 829 830 // RPC is used to make an RPC call to the Nomad servers 831 func (a *Agent) RPC(method string, args interface{}, reply interface{}) error { 832 if a.server != nil { 833 return a.server.RPC(method, args, reply) 834 } 835 return a.client.RPC(method, args, reply) 836 } 837 838 // Client returns the configured client or nil 839 func (a *Agent) Client() *client.Client { 840 return a.client 841 } 842 843 // Server returns the configured server or nil 844 func (a *Agent) Server() *nomad.Server { 845 return a.server 846 } 847 848 // Stats is used to return statistics for debugging and insight 849 // for various sub-systems 850 func (a *Agent) Stats() map[string]map[string]string { 851 stats := make(map[string]map[string]string) 852 if a.server != nil { 853 subStat := a.server.Stats() 854 for k, v := range subStat { 855 stats[k] = v 856 } 857 } 858 if a.client != nil { 859 subStat := a.client.Stats() 860 for k, v := range subStat { 861 stats[k] = v 862 } 863 } 864 return stats 865 } 866 867 // ShouldReload determines if we should reload the configuration and agent 868 // connections. If the TLS Configuration has not changed, we shouldn't reload. 869 func (a *Agent) ShouldReload(newConfig *Config) (agent, http, rpc bool) { 870 a.configLock.Lock() 871 defer a.configLock.Unlock() 872 873 isEqual, err := a.config.TLSConfig.CertificateInfoIsEqual(newConfig.TLSConfig) 874 if err != nil { 875 a.logger.Printf("[INFO] agent: error when parsing TLS certificate %v", err) 876 return false, false, false 877 } else if !isEqual { 878 return true, true, true 879 } 880 881 // Allow the ability to only reload HTTP connections 882 if a.config.TLSConfig.EnableHTTP != newConfig.TLSConfig.EnableHTTP { 883 http = true 884 agent = true 885 } 886 887 // Allow the ability to only reload HTTP connections 888 if a.config.TLSConfig.EnableRPC != newConfig.TLSConfig.EnableRPC { 889 rpc = true 890 agent = true 891 } 892 893 return agent, http, rpc 894 } 895 896 // Reload handles configuration changes for the agent. Provides a method that 897 // is easier to unit test, as this action is invoked via SIGHUP. 898 func (a *Agent) Reload(newConfig *Config) error { 899 a.configLock.Lock() 900 defer a.configLock.Unlock() 901 902 if newConfig == nil || newConfig.TLSConfig == nil { 903 return fmt.Errorf("cannot reload agent with nil configuration") 904 } 905 906 // This is just a TLS configuration reload, we don't need to refresh 907 // existing network connections 908 if !a.config.TLSConfig.IsEmpty() && !newConfig.TLSConfig.IsEmpty() { 909 910 // Reload the certificates on the keyloader and on success store the 911 // updated TLS config. It is important to reuse the same keyloader 912 // as this allows us to dynamically reload configurations not only 913 // on the Agent but on the Server and Client too (they are 914 // referencing the same keyloader). 915 keyloader := a.config.TLSConfig.GetKeyLoader() 916 _, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile) 917 if err != nil { 918 return err 919 } 920 a.config.TLSConfig = newConfig.TLSConfig 921 a.config.TLSConfig.KeyLoader = keyloader 922 return nil 923 } 924 925 // Completely reload the agent's TLS configuration (moving from non-TLS to 926 // TLS, or vice versa) 927 // This does not handle errors in loading the new TLS configuration 928 a.config.TLSConfig = newConfig.TLSConfig.Copy() 929 930 if newConfig.TLSConfig.IsEmpty() { 931 a.logger.Println("[WARN] agent: Downgrading agent's existing TLS configuration to plaintext") 932 } else { 933 a.logger.Println("[INFO] agent: Upgrading from plaintext configuration to TLS") 934 } 935 936 return nil 937 } 938 939 // GetConfig creates a locked reference to the agent's config 940 func (a *Agent) GetConfig() *Config { 941 a.configLock.Lock() 942 defer a.configLock.Unlock() 943 944 return a.config 945 } 946 947 // setupConsul creates the Consul client and starts its main Run loop. 948 func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error { 949 apiConf, err := consulConfig.ApiConfig() 950 if err != nil { 951 return err 952 } 953 client, err := api.NewClient(apiConf) 954 if err != nil { 955 return err 956 } 957 958 // Determine version for TLSSkipVerify 959 960 // Create Consul Catalog client for service discovery. 961 a.consulCatalog = client.Catalog() 962 963 // Create Consul Service client for service advertisement and checks. 964 isClient := false 965 if a.config.Client != nil && a.config.Client.Enabled { 966 isClient = true 967 } 968 a.consulService = consul.NewServiceClient(client.Agent(), a.logger, isClient) 969 970 // Run the Consul service client's sync'ing main loop 971 go a.consulService.Run() 972 return nil 973 }