github.com/smintz/nomad@v0.8.3/command/agent/agent.go (about) 1 package agent 2 3 import ( 4 "fmt" 5 "io" 6 "io/ioutil" 7 "log" 8 "net" 9 "os" 10 "path/filepath" 11 "runtime" 12 "strings" 13 "sync" 14 "sync/atomic" 15 "time" 16 17 metrics "github.com/armon/go-metrics" 18 "github.com/hashicorp/consul/api" 19 "github.com/hashicorp/consul/lib" 20 uuidparse "github.com/hashicorp/go-uuid" 21 "github.com/hashicorp/nomad/client" 22 clientconfig "github.com/hashicorp/nomad/client/config" 23 "github.com/hashicorp/nomad/command/agent/consul" 24 "github.com/hashicorp/nomad/helper/uuid" 25 "github.com/hashicorp/nomad/nomad" 26 "github.com/hashicorp/nomad/nomad/structs" 27 "github.com/hashicorp/nomad/nomad/structs/config" 28 "github.com/hashicorp/raft" 29 ) 30 31 const ( 32 agentHttpCheckInterval = 10 * time.Second 33 agentHttpCheckTimeout = 5 * time.Second 34 serverRpcCheckInterval = 10 * time.Second 35 serverRpcCheckTimeout = 3 * time.Second 36 serverSerfCheckInterval = 10 * time.Second 37 serverSerfCheckTimeout = 3 * time.Second 38 39 // roles used in identifying Consul entries for Nomad agents 40 consulRoleServer = "server" 41 consulRoleClient = "client" 42 ) 43 44 // Agent is a long running daemon that is used to run both 45 // clients and servers. Servers are responsible for managing 46 // state and making scheduling decisions. Clients can be 47 // scheduled to, and are responsible for interfacing with 48 // servers to run allocations. 49 type Agent struct { 50 config *Config 51 configLock sync.Mutex 52 53 logger *log.Logger 54 logOutput io.Writer 55 56 // consulService is Nomad's custom Consul client for managing services 57 // and checks. 58 consulService *consul.ServiceClient 59 60 // consulCatalog is the subset of Consul's Catalog API Nomad uses. 61 consulCatalog consul.CatalogAPI 62 63 client *client.Client 64 65 server *nomad.Server 66 67 shutdown bool 68 shutdownCh chan struct{} 69 shutdownLock sync.Mutex 70 71 InmemSink *metrics.InmemSink 72 } 73 74 // NewAgent is used to create a new agent with the given configuration 75 func NewAgent(config *Config, logOutput io.Writer, inmem *metrics.InmemSink) (*Agent, error) { 76 a := &Agent{ 77 config: config, 78 logger: log.New(logOutput, "", log.LstdFlags|log.Lmicroseconds), 79 logOutput: logOutput, 80 shutdownCh: make(chan struct{}), 81 InmemSink: inmem, 82 } 83 84 if err := a.setupConsul(config.Consul); err != nil { 85 return nil, fmt.Errorf("Failed to initialize Consul client: %v", err) 86 } 87 if err := a.setupServer(); err != nil { 88 return nil, err 89 } 90 if err := a.setupClient(); err != nil { 91 return nil, err 92 } 93 if a.client == nil && a.server == nil { 94 return nil, fmt.Errorf("must have at least client or server mode enabled") 95 } 96 97 return a, nil 98 } 99 100 // convertServerConfig takes an agent config and log output and returns a Nomad 101 // Config. 102 func convertServerConfig(agentConfig *Config, logOutput io.Writer) (*nomad.Config, error) { 103 conf := agentConfig.NomadConfig 104 if conf == nil { 105 conf = nomad.DefaultConfig() 106 } 107 conf.LogOutput = logOutput 108 conf.DevMode = agentConfig.DevMode 109 conf.Build = agentConfig.Version.VersionNumber() 110 if agentConfig.Region != "" { 111 conf.Region = agentConfig.Region 112 } 113 114 // Set the Authoritative Region if set, otherwise default to 115 // the same as the local region. 116 if agentConfig.Server.AuthoritativeRegion != "" { 117 conf.AuthoritativeRegion = agentConfig.Server.AuthoritativeRegion 118 } else if agentConfig.Region != "" { 119 conf.AuthoritativeRegion = agentConfig.Region 120 } 121 122 if agentConfig.Datacenter != "" { 123 conf.Datacenter = agentConfig.Datacenter 124 } 125 if agentConfig.NodeName != "" { 126 conf.NodeName = agentConfig.NodeName 127 } 128 if agentConfig.Server.BootstrapExpect > 0 { 129 if agentConfig.Server.BootstrapExpect == 1 { 130 conf.Bootstrap = true 131 } else { 132 atomic.StoreInt32(&conf.BootstrapExpect, int32(agentConfig.Server.BootstrapExpect)) 133 } 134 } 135 if agentConfig.DataDir != "" { 136 conf.DataDir = filepath.Join(agentConfig.DataDir, "server") 137 } 138 if agentConfig.Server.DataDir != "" { 139 conf.DataDir = agentConfig.Server.DataDir 140 } 141 if agentConfig.Server.ProtocolVersion != 0 { 142 conf.ProtocolVersion = uint8(agentConfig.Server.ProtocolVersion) 143 } 144 if agentConfig.Server.RaftProtocol != 0 { 145 conf.RaftConfig.ProtocolVersion = raft.ProtocolVersion(agentConfig.Server.RaftProtocol) 146 } 147 if agentConfig.Server.NumSchedulers != 0 { 148 conf.NumSchedulers = agentConfig.Server.NumSchedulers 149 } 150 if len(agentConfig.Server.EnabledSchedulers) != 0 { 151 // Convert to a set and require the core scheduler 152 set := make(map[string]struct{}, 4) 153 set[structs.JobTypeCore] = struct{}{} 154 for _, sched := range agentConfig.Server.EnabledSchedulers { 155 set[sched] = struct{}{} 156 } 157 158 schedulers := make([]string, 0, len(set)) 159 for k := range set { 160 schedulers = append(schedulers, k) 161 } 162 163 conf.EnabledSchedulers = schedulers 164 165 } 166 if agentConfig.ACL.Enabled { 167 conf.ACLEnabled = true 168 } 169 if agentConfig.ACL.ReplicationToken != "" { 170 conf.ReplicationToken = agentConfig.ACL.ReplicationToken 171 } 172 if agentConfig.Sentinel != nil { 173 conf.SentinelConfig = agentConfig.Sentinel 174 } 175 if agentConfig.Server.NonVotingServer { 176 conf.NonVoter = true 177 } 178 if agentConfig.Server.RedundancyZone != "" { 179 conf.RedundancyZone = agentConfig.Server.RedundancyZone 180 } 181 if agentConfig.Server.UpgradeVersion != "" { 182 conf.UpgradeVersion = agentConfig.Server.UpgradeVersion 183 } 184 if agentConfig.Autopilot != nil { 185 if agentConfig.Autopilot.CleanupDeadServers != nil { 186 conf.AutopilotConfig.CleanupDeadServers = *agentConfig.Autopilot.CleanupDeadServers 187 } 188 if agentConfig.Autopilot.ServerStabilizationTime != 0 { 189 conf.AutopilotConfig.ServerStabilizationTime = agentConfig.Autopilot.ServerStabilizationTime 190 } 191 if agentConfig.Autopilot.LastContactThreshold != 0 { 192 conf.AutopilotConfig.LastContactThreshold = agentConfig.Autopilot.LastContactThreshold 193 } 194 if agentConfig.Autopilot.MaxTrailingLogs != 0 { 195 conf.AutopilotConfig.MaxTrailingLogs = uint64(agentConfig.Autopilot.MaxTrailingLogs) 196 } 197 if agentConfig.Autopilot.EnableRedundancyZones != nil { 198 conf.AutopilotConfig.EnableRedundancyZones = *agentConfig.Autopilot.EnableRedundancyZones 199 } 200 if agentConfig.Autopilot.DisableUpgradeMigration != nil { 201 conf.AutopilotConfig.DisableUpgradeMigration = *agentConfig.Autopilot.DisableUpgradeMigration 202 } 203 if agentConfig.Autopilot.EnableCustomUpgrades != nil { 204 conf.AutopilotConfig.EnableCustomUpgrades = *agentConfig.Autopilot.EnableCustomUpgrades 205 } 206 } 207 208 // Set up the bind addresses 209 rpcAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.RPC) 210 if err != nil { 211 return nil, fmt.Errorf("Failed to parse RPC address %q: %v", agentConfig.normalizedAddrs.RPC, err) 212 } 213 serfAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.Serf) 214 if err != nil { 215 return nil, fmt.Errorf("Failed to parse Serf address %q: %v", agentConfig.normalizedAddrs.Serf, err) 216 } 217 conf.RPCAddr.Port = rpcAddr.Port 218 conf.RPCAddr.IP = rpcAddr.IP 219 conf.SerfConfig.MemberlistConfig.BindPort = serfAddr.Port 220 conf.SerfConfig.MemberlistConfig.BindAddr = serfAddr.IP.String() 221 222 // Set up the advertise addresses 223 rpcAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.RPC) 224 if err != nil { 225 return nil, fmt.Errorf("Failed to parse RPC advertise address %q: %v", agentConfig.AdvertiseAddrs.RPC, err) 226 } 227 serfAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.Serf) 228 if err != nil { 229 return nil, fmt.Errorf("Failed to parse Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err) 230 } 231 232 // Server address is the serf advertise address and rpc port. This is the 233 // address that all servers should be able to communicate over RPC with. 234 serverAddr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(serfAddr.IP.String(), fmt.Sprintf("%d", rpcAddr.Port))) 235 if err != nil { 236 return nil, fmt.Errorf("Failed to resolve Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err) 237 } 238 239 conf.SerfConfig.MemberlistConfig.AdvertiseAddr = serfAddr.IP.String() 240 conf.SerfConfig.MemberlistConfig.AdvertisePort = serfAddr.Port 241 conf.ClientRPCAdvertise = rpcAddr 242 conf.ServerRPCAdvertise = serverAddr 243 244 // Set up gc threshold and heartbeat grace period 245 if gcThreshold := agentConfig.Server.NodeGCThreshold; gcThreshold != "" { 246 dur, err := time.ParseDuration(gcThreshold) 247 if err != nil { 248 return nil, err 249 } 250 conf.NodeGCThreshold = dur 251 } 252 if gcThreshold := agentConfig.Server.JobGCThreshold; gcThreshold != "" { 253 dur, err := time.ParseDuration(gcThreshold) 254 if err != nil { 255 return nil, err 256 } 257 conf.JobGCThreshold = dur 258 } 259 if gcThreshold := agentConfig.Server.EvalGCThreshold; gcThreshold != "" { 260 dur, err := time.ParseDuration(gcThreshold) 261 if err != nil { 262 return nil, err 263 } 264 conf.EvalGCThreshold = dur 265 } 266 if gcThreshold := agentConfig.Server.DeploymentGCThreshold; gcThreshold != "" { 267 dur, err := time.ParseDuration(gcThreshold) 268 if err != nil { 269 return nil, err 270 } 271 conf.DeploymentGCThreshold = dur 272 } 273 274 if heartbeatGrace := agentConfig.Server.HeartbeatGrace; heartbeatGrace != 0 { 275 conf.HeartbeatGrace = heartbeatGrace 276 } 277 if min := agentConfig.Server.MinHeartbeatTTL; min != 0 { 278 conf.MinHeartbeatTTL = min 279 } 280 if maxHPS := agentConfig.Server.MaxHeartbeatsPerSecond; maxHPS != 0 { 281 conf.MaxHeartbeatsPerSecond = maxHPS 282 } 283 284 if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ServerServiceName == "" { 285 return nil, fmt.Errorf("server_service_name must be set when auto_advertise is enabled") 286 } 287 288 // Add the Consul and Vault configs 289 conf.ConsulConfig = agentConfig.Consul 290 conf.VaultConfig = agentConfig.Vault 291 292 // Set the TLS config 293 conf.TLSConfig = agentConfig.TLSConfig 294 295 // Setup telemetry related config 296 conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval 297 conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics 298 conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics 299 300 return conf, nil 301 } 302 303 // serverConfig is used to generate a new server configuration struct 304 // for initializing a nomad server. 305 func (a *Agent) serverConfig() (*nomad.Config, error) { 306 return convertServerConfig(a.config, a.logOutput) 307 } 308 309 // clientConfig is used to generate a new client configuration struct 310 // for initializing a Nomad client. 311 func (a *Agent) clientConfig() (*clientconfig.Config, error) { 312 // Setup the configuration 313 conf := a.config.ClientConfig 314 if conf == nil { 315 conf = clientconfig.DefaultConfig() 316 } 317 if a.server != nil { 318 conf.RPCHandler = a.server 319 } 320 conf.LogOutput = a.logOutput 321 conf.LogLevel = a.config.LogLevel 322 conf.DevMode = a.config.DevMode 323 if a.config.Region != "" { 324 conf.Region = a.config.Region 325 } 326 if a.config.DataDir != "" { 327 conf.StateDir = filepath.Join(a.config.DataDir, "client") 328 conf.AllocDir = filepath.Join(a.config.DataDir, "alloc") 329 } 330 if a.config.Client.StateDir != "" { 331 conf.StateDir = a.config.Client.StateDir 332 } 333 if a.config.Client.AllocDir != "" { 334 conf.AllocDir = a.config.Client.AllocDir 335 } 336 conf.Servers = a.config.Client.Servers 337 if a.config.Client.NetworkInterface != "" { 338 conf.NetworkInterface = a.config.Client.NetworkInterface 339 } 340 conf.ChrootEnv = a.config.Client.ChrootEnv 341 conf.Options = a.config.Client.Options 342 // Logging deprecation messages about consul related configuration in client 343 // options 344 var invalidConsulKeys []string 345 for key := range conf.Options { 346 if strings.HasPrefix(key, "consul") { 347 invalidConsulKeys = append(invalidConsulKeys, fmt.Sprintf("options.%s", key)) 348 } 349 } 350 if len(invalidConsulKeys) > 0 { 351 a.logger.Printf("[WARN] agent: Invalid keys: %v", strings.Join(invalidConsulKeys, ",")) 352 a.logger.Printf(`Nomad client ignores consul related configuration in client options. 353 Please refer to the guide https://www.nomadproject.io/docs/agent/configuration/consul.html 354 to configure Nomad to work with Consul.`) 355 } 356 357 if a.config.Client.NetworkSpeed != 0 { 358 conf.NetworkSpeed = a.config.Client.NetworkSpeed 359 } 360 if a.config.Client.CpuCompute != 0 { 361 conf.CpuCompute = a.config.Client.CpuCompute 362 } 363 if a.config.Client.MemoryMB != 0 { 364 conf.MemoryMB = a.config.Client.MemoryMB 365 } 366 if a.config.Client.MaxKillTimeout != "" { 367 dur, err := time.ParseDuration(a.config.Client.MaxKillTimeout) 368 if err != nil { 369 return nil, fmt.Errorf("Error parsing max kill timeout: %s", err) 370 } 371 conf.MaxKillTimeout = dur 372 } 373 conf.ClientMaxPort = uint(a.config.Client.ClientMaxPort) 374 conf.ClientMinPort = uint(a.config.Client.ClientMinPort) 375 376 // Setup the node 377 conf.Node = new(structs.Node) 378 conf.Node.Datacenter = a.config.Datacenter 379 conf.Node.Name = a.config.NodeName 380 conf.Node.Meta = a.config.Client.Meta 381 conf.Node.NodeClass = a.config.Client.NodeClass 382 383 // Set up the HTTP advertise address 384 conf.Node.HTTPAddr = a.config.AdvertiseAddrs.HTTP 385 386 // Reserve resources on the node. 387 r := conf.Node.Reserved 388 if r == nil { 389 r = new(structs.Resources) 390 conf.Node.Reserved = r 391 } 392 r.CPU = a.config.Client.Reserved.CPU 393 r.MemoryMB = a.config.Client.Reserved.MemoryMB 394 r.DiskMB = a.config.Client.Reserved.DiskMB 395 r.IOPS = a.config.Client.Reserved.IOPS 396 conf.GloballyReservedPorts = a.config.Client.Reserved.ParsedReservedPorts 397 398 conf.Version = a.config.Version 399 400 if *a.config.Consul.AutoAdvertise && a.config.Consul.ClientServiceName == "" { 401 return nil, fmt.Errorf("client_service_name must be set when auto_advertise is enabled") 402 } 403 404 conf.ConsulConfig = a.config.Consul 405 conf.VaultConfig = a.config.Vault 406 407 // Set up Telemetry configuration 408 conf.StatsCollectionInterval = a.config.Telemetry.collectionInterval 409 conf.PublishNodeMetrics = a.config.Telemetry.PublishNodeMetrics 410 conf.PublishAllocationMetrics = a.config.Telemetry.PublishAllocationMetrics 411 conf.DisableTaggedMetrics = a.config.Telemetry.DisableTaggedMetrics 412 conf.BackwardsCompatibleMetrics = a.config.Telemetry.BackwardsCompatibleMetrics 413 414 // Set the TLS related configs 415 conf.TLSConfig = a.config.TLSConfig 416 conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP 417 418 // Set the GC related configs 419 conf.GCInterval = a.config.Client.GCInterval 420 conf.GCParallelDestroys = a.config.Client.GCParallelDestroys 421 conf.GCDiskUsageThreshold = a.config.Client.GCDiskUsageThreshold 422 conf.GCInodeUsageThreshold = a.config.Client.GCInodeUsageThreshold 423 conf.GCMaxAllocs = a.config.Client.GCMaxAllocs 424 if a.config.Client.NoHostUUID != nil { 425 conf.NoHostUUID = *a.config.Client.NoHostUUID 426 } else { 427 // Default no_host_uuid to true 428 conf.NoHostUUID = true 429 } 430 431 // Setup the ACLs 432 conf.ACLEnabled = a.config.ACL.Enabled 433 conf.ACLTokenTTL = a.config.ACL.TokenTTL 434 conf.ACLPolicyTTL = a.config.ACL.PolicyTTL 435 436 return conf, nil 437 } 438 439 // setupServer is used to setup the server if enabled 440 func (a *Agent) setupServer() error { 441 if !a.config.Server.Enabled { 442 return nil 443 } 444 445 // Setup the configuration 446 conf, err := a.serverConfig() 447 if err != nil { 448 return fmt.Errorf("server config setup failed: %s", err) 449 } 450 451 // Generate a node ID and persist it if it is the first instance, otherwise 452 // read the persisted node ID. 453 if err := a.setupNodeID(conf); err != nil { 454 return fmt.Errorf("setting up server node ID failed: %s", err) 455 } 456 457 // Sets up the keyring for gossip encryption 458 if err := a.setupKeyrings(conf); err != nil { 459 return fmt.Errorf("failed to configure keyring: %v", err) 460 } 461 462 // Create the server 463 server, err := nomad.NewServer(conf, a.consulCatalog, a.logger) 464 if err != nil { 465 return fmt.Errorf("server setup failed: %v", err) 466 } 467 a.server = server 468 469 // Consul check addresses default to bind but can be toggled to use advertise 470 rpcCheckAddr := a.config.normalizedAddrs.RPC 471 serfCheckAddr := a.config.normalizedAddrs.Serf 472 if *a.config.Consul.ChecksUseAdvertise { 473 rpcCheckAddr = a.config.AdvertiseAddrs.RPC 474 serfCheckAddr = a.config.AdvertiseAddrs.Serf 475 } 476 477 // Create the Nomad Server services for Consul 478 if *a.config.Consul.AutoAdvertise { 479 httpServ := &structs.Service{ 480 Name: a.config.Consul.ServerServiceName, 481 PortLabel: a.config.AdvertiseAddrs.HTTP, 482 Tags: []string{consul.ServiceTagHTTP}, 483 } 484 const isServer = true 485 if check := a.agentHTTPCheck(isServer); check != nil { 486 httpServ.Checks = []*structs.ServiceCheck{check} 487 } 488 rpcServ := &structs.Service{ 489 Name: a.config.Consul.ServerServiceName, 490 PortLabel: a.config.AdvertiseAddrs.RPC, 491 Tags: []string{consul.ServiceTagRPC}, 492 Checks: []*structs.ServiceCheck{ 493 { 494 Name: a.config.Consul.ServerRPCCheckName, 495 Type: "tcp", 496 Interval: serverRpcCheckInterval, 497 Timeout: serverRpcCheckTimeout, 498 PortLabel: rpcCheckAddr, 499 }, 500 }, 501 } 502 serfServ := &structs.Service{ 503 Name: a.config.Consul.ServerServiceName, 504 PortLabel: a.config.AdvertiseAddrs.Serf, 505 Tags: []string{consul.ServiceTagSerf}, 506 Checks: []*structs.ServiceCheck{ 507 { 508 Name: a.config.Consul.ServerSerfCheckName, 509 Type: "tcp", 510 Interval: serverSerfCheckInterval, 511 Timeout: serverSerfCheckTimeout, 512 PortLabel: serfCheckAddr, 513 }, 514 }, 515 } 516 517 // Add the http port check if TLS isn't enabled 518 consulServices := []*structs.Service{ 519 rpcServ, 520 serfServ, 521 httpServ, 522 } 523 if err := a.consulService.RegisterAgent(consulRoleServer, consulServices); err != nil { 524 return err 525 } 526 } 527 528 return nil 529 } 530 531 // setupNodeID will pull the persisted node ID, if any, or create a random one 532 // and persist it. 533 func (a *Agent) setupNodeID(config *nomad.Config) error { 534 // For dev mode we have no filesystem access so just make a node ID. 535 if a.config.DevMode { 536 config.NodeID = uuid.Generate() 537 return nil 538 } 539 540 // Load saved state, if any. Since a user could edit this, we also 541 // validate it. Saved state overwrites any configured node id 542 fileID := filepath.Join(config.DataDir, "node-id") 543 if _, err := os.Stat(fileID); err == nil { 544 rawID, err := ioutil.ReadFile(fileID) 545 if err != nil { 546 return err 547 } 548 549 nodeID := strings.TrimSpace(string(rawID)) 550 nodeID = strings.ToLower(nodeID) 551 if _, err := uuidparse.ParseUUID(nodeID); err != nil { 552 return err 553 } 554 config.NodeID = nodeID 555 return nil 556 } 557 558 // If they've configured a node ID manually then just use that, as 559 // long as it's valid. 560 if config.NodeID != "" { 561 config.NodeID = strings.ToLower(config.NodeID) 562 if _, err := uuidparse.ParseUUID(config.NodeID); err != nil { 563 return err 564 } 565 // Persist this configured nodeID to our data directory 566 if err := lib.EnsurePath(fileID, false); err != nil { 567 return err 568 } 569 if err := ioutil.WriteFile(fileID, []byte(config.NodeID), 0600); err != nil { 570 return err 571 } 572 return nil 573 } 574 575 // If we still don't have a valid node ID, make one. 576 if config.NodeID == "" { 577 id := uuid.Generate() 578 if err := lib.EnsurePath(fileID, false); err != nil { 579 return err 580 } 581 if err := ioutil.WriteFile(fileID, []byte(id), 0600); err != nil { 582 return err 583 } 584 585 config.NodeID = id 586 } 587 return nil 588 } 589 590 // setupKeyrings is used to initialize and load keyrings during agent startup 591 func (a *Agent) setupKeyrings(config *nomad.Config) error { 592 file := filepath.Join(a.config.DataDir, serfKeyring) 593 594 if a.config.Server.EncryptKey == "" { 595 goto LOAD 596 } 597 if _, err := os.Stat(file); err != nil { 598 if err := initKeyring(file, a.config.Server.EncryptKey); err != nil { 599 return err 600 } 601 } 602 603 LOAD: 604 if _, err := os.Stat(file); err == nil { 605 config.SerfConfig.KeyringFile = file 606 } 607 if err := loadKeyringFile(config.SerfConfig); err != nil { 608 return err 609 } 610 // Success! 611 return nil 612 } 613 614 // setupClient is used to setup the client if enabled 615 func (a *Agent) setupClient() error { 616 if !a.config.Client.Enabled { 617 return nil 618 } 619 620 // Setup the configuration 621 conf, err := a.clientConfig() 622 if err != nil { 623 return fmt.Errorf("client setup failed: %v", err) 624 } 625 626 // Reserve some ports for the plugins if we are on Windows 627 if runtime.GOOS == "windows" { 628 if err := a.reservePortsForClient(conf); err != nil { 629 return err 630 } 631 } 632 633 client, err := client.NewClient(conf, a.consulCatalog, a.consulService, a.logger) 634 if err != nil { 635 return fmt.Errorf("client setup failed: %v", err) 636 } 637 a.client = client 638 639 // Create the Nomad Client services for Consul 640 if *a.config.Consul.AutoAdvertise { 641 httpServ := &structs.Service{ 642 Name: a.config.Consul.ClientServiceName, 643 PortLabel: a.config.AdvertiseAddrs.HTTP, 644 Tags: []string{consul.ServiceTagHTTP}, 645 } 646 const isServer = false 647 if check := a.agentHTTPCheck(isServer); check != nil { 648 httpServ.Checks = []*structs.ServiceCheck{check} 649 } 650 if err := a.consulService.RegisterAgent(consulRoleClient, []*structs.Service{httpServ}); err != nil { 651 return err 652 } 653 } 654 655 return nil 656 } 657 658 // agentHTTPCheck returns a health check for the agent's HTTP API if possible. 659 // If no HTTP health check can be supported nil is returned. 660 func (a *Agent) agentHTTPCheck(server bool) *structs.ServiceCheck { 661 // Resolve the http check address 662 httpCheckAddr := a.config.normalizedAddrs.HTTP 663 if *a.config.Consul.ChecksUseAdvertise { 664 httpCheckAddr = a.config.AdvertiseAddrs.HTTP 665 } 666 check := structs.ServiceCheck{ 667 Name: a.config.Consul.ClientHTTPCheckName, 668 Type: "http", 669 Path: "/v1/agent/health?type=client", 670 Protocol: "http", 671 Interval: agentHttpCheckInterval, 672 Timeout: agentHttpCheckTimeout, 673 PortLabel: httpCheckAddr, 674 } 675 // Switch to endpoint that doesn't require a leader for servers 676 if server { 677 check.Name = a.config.Consul.ServerHTTPCheckName 678 check.Path = "/v1/agent/health?type=server" 679 } 680 if !a.config.TLSConfig.EnableHTTP { 681 // No HTTPS, return a plain http check 682 return &check 683 } 684 if a.config.TLSConfig.VerifyHTTPSClient { 685 a.logger.Printf("[WARN] agent: not registering Nomad HTTPS Health Check because verify_https_client enabled") 686 return nil 687 } 688 689 // HTTPS enabled; skip verification 690 check.Protocol = "https" 691 check.TLSSkipVerify = true 692 return &check 693 } 694 695 // reservePortsForClient reserves a range of ports for the client to use when 696 // it creates various plugins for log collection, executors, drivers, etc 697 func (a *Agent) reservePortsForClient(conf *clientconfig.Config) error { 698 // finding the device name for loopback 699 deviceName, addr, mask, err := a.findLoopbackDevice() 700 if err != nil { 701 return fmt.Errorf("error finding the device name for loopback: %v", err) 702 } 703 704 // seeing if the user has already reserved some resources on this device 705 var nr *structs.NetworkResource 706 if conf.Node.Reserved == nil { 707 conf.Node.Reserved = &structs.Resources{} 708 } 709 for _, n := range conf.Node.Reserved.Networks { 710 if n.Device == deviceName { 711 nr = n 712 } 713 } 714 // If the user hasn't already created the device, we create it 715 if nr == nil { 716 nr = &structs.NetworkResource{ 717 Device: deviceName, 718 IP: addr, 719 CIDR: mask, 720 ReservedPorts: make([]structs.Port, 0), 721 } 722 } 723 // appending the port ranges we want to use for the client to the list of 724 // reserved ports for this device 725 for i := conf.ClientMinPort; i <= conf.ClientMaxPort; i++ { 726 nr.ReservedPorts = append(nr.ReservedPorts, structs.Port{Label: fmt.Sprintf("plugin-%d", i), Value: int(i)}) 727 } 728 conf.Node.Reserved.Networks = append(conf.Node.Reserved.Networks, nr) 729 return nil 730 } 731 732 // findLoopbackDevice iterates through all the interfaces on a machine and 733 // returns the ip addr, mask of the loopback device 734 func (a *Agent) findLoopbackDevice() (string, string, string, error) { 735 var ifcs []net.Interface 736 var err error 737 ifcs, err = net.Interfaces() 738 if err != nil { 739 return "", "", "", err 740 } 741 for _, ifc := range ifcs { 742 addrs, err := ifc.Addrs() 743 if err != nil { 744 return "", "", "", err 745 } 746 for _, addr := range addrs { 747 var ip net.IP 748 switch v := addr.(type) { 749 case *net.IPNet: 750 ip = v.IP 751 case *net.IPAddr: 752 ip = v.IP 753 } 754 if ip.IsLoopback() { 755 if ip.To4() == nil { 756 continue 757 } 758 return ifc.Name, ip.String(), addr.String(), nil 759 } 760 } 761 } 762 763 return "", "", "", fmt.Errorf("no loopback devices with IPV4 addr found") 764 } 765 766 // Leave is used gracefully exit. Clients will inform servers 767 // of their departure so that allocations can be rescheduled. 768 func (a *Agent) Leave() error { 769 if a.client != nil { 770 if err := a.client.Leave(); err != nil { 771 a.logger.Printf("[ERR] agent: client leave failed: %v", err) 772 } 773 } 774 if a.server != nil { 775 if err := a.server.Leave(); err != nil { 776 a.logger.Printf("[ERR] agent: server leave failed: %v", err) 777 } 778 } 779 return nil 780 } 781 782 // Shutdown is used to terminate the agent. 783 func (a *Agent) Shutdown() error { 784 a.shutdownLock.Lock() 785 defer a.shutdownLock.Unlock() 786 787 if a.shutdown { 788 return nil 789 } 790 791 a.logger.Println("[INFO] agent: requesting shutdown") 792 if a.client != nil { 793 if err := a.client.Shutdown(); err != nil { 794 a.logger.Printf("[ERR] agent: client shutdown failed: %v", err) 795 } 796 } 797 if a.server != nil { 798 if err := a.server.Shutdown(); err != nil { 799 a.logger.Printf("[ERR] agent: server shutdown failed: %v", err) 800 } 801 } 802 803 if err := a.consulService.Shutdown(); err != nil { 804 a.logger.Printf("[ERR] agent: shutting down Consul client failed: %v", err) 805 } 806 807 a.logger.Println("[INFO] agent: shutdown complete") 808 a.shutdown = true 809 close(a.shutdownCh) 810 return nil 811 } 812 813 // RPC is used to make an RPC call to the Nomad servers 814 func (a *Agent) RPC(method string, args interface{}, reply interface{}) error { 815 if a.server != nil { 816 return a.server.RPC(method, args, reply) 817 } 818 return a.client.RPC(method, args, reply) 819 } 820 821 // Client returns the configured client or nil 822 func (a *Agent) Client() *client.Client { 823 return a.client 824 } 825 826 // Server returns the configured server or nil 827 func (a *Agent) Server() *nomad.Server { 828 return a.server 829 } 830 831 // Stats is used to return statistics for debugging and insight 832 // for various sub-systems 833 func (a *Agent) Stats() map[string]map[string]string { 834 stats := make(map[string]map[string]string) 835 if a.server != nil { 836 subStat := a.server.Stats() 837 for k, v := range subStat { 838 stats[k] = v 839 } 840 } 841 if a.client != nil { 842 subStat := a.client.Stats() 843 for k, v := range subStat { 844 stats[k] = v 845 } 846 } 847 return stats 848 } 849 850 // ShouldReload determines if we should reload the configuration and agent 851 // connections. If the TLS Configuration has not changed, we shouldn't reload. 852 func (a *Agent) ShouldReload(newConfig *Config) (agent, http, rpc bool) { 853 a.configLock.Lock() 854 defer a.configLock.Unlock() 855 856 isEqual, err := a.config.TLSConfig.CertificateInfoIsEqual(newConfig.TLSConfig) 857 if err != nil { 858 a.logger.Printf("[INFO] agent: error when parsing TLS certificate %v", err) 859 return false, false, false 860 } else if !isEqual { 861 return true, true, true 862 } 863 864 // Allow the ability to only reload HTTP connections 865 if a.config.TLSConfig.EnableHTTP != newConfig.TLSConfig.EnableHTTP { 866 http = true 867 agent = true 868 } 869 870 // Allow the ability to only reload HTTP connections 871 if a.config.TLSConfig.EnableRPC != newConfig.TLSConfig.EnableRPC { 872 rpc = true 873 agent = true 874 } 875 876 return agent, http, rpc 877 } 878 879 // Reload handles configuration changes for the agent. Provides a method that 880 // is easier to unit test, as this action is invoked via SIGHUP. 881 func (a *Agent) Reload(newConfig *Config) error { 882 a.configLock.Lock() 883 defer a.configLock.Unlock() 884 885 if newConfig == nil || newConfig.TLSConfig == nil { 886 return fmt.Errorf("cannot reload agent with nil configuration") 887 } 888 889 // This is just a TLS configuration reload, we don't need to refresh 890 // existing network connections 891 if !a.config.TLSConfig.IsEmpty() && !newConfig.TLSConfig.IsEmpty() { 892 893 // Reload the certificates on the keyloader and on success store the 894 // updated TLS config. It is important to reuse the same keyloader 895 // as this allows us to dynamically reload configurations not only 896 // on the Agent but on the Server and Client too (they are 897 // referencing the same keyloader). 898 keyloader := a.config.TLSConfig.GetKeyLoader() 899 _, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile) 900 if err != nil { 901 return err 902 } 903 a.config.TLSConfig = newConfig.TLSConfig 904 a.config.TLSConfig.KeyLoader = keyloader 905 return nil 906 } 907 908 // Completely reload the agent's TLS configuration (moving from non-TLS to 909 // TLS, or vice versa) 910 // This does not handle errors in loading the new TLS configuration 911 a.config.TLSConfig = newConfig.TLSConfig.Copy() 912 913 if newConfig.TLSConfig.IsEmpty() { 914 a.logger.Println("[WARN] agent: Downgrading agent's existing TLS configuration to plaintext") 915 } else { 916 a.logger.Println("[INFO] agent: Upgrading from plaintext configuration to TLS") 917 } 918 919 return nil 920 } 921 922 // GetConfig creates a locked reference to the agent's config 923 func (a *Agent) GetConfig() *Config { 924 a.configLock.Lock() 925 defer a.configLock.Unlock() 926 927 return a.config 928 } 929 930 // setupConsul creates the Consul client and starts its main Run loop. 931 func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error { 932 apiConf, err := consulConfig.ApiConfig() 933 if err != nil { 934 return err 935 } 936 client, err := api.NewClient(apiConf) 937 if err != nil { 938 return err 939 } 940 941 // Determine version for TLSSkipVerify 942 943 // Create Consul Catalog client for service discovery. 944 a.consulCatalog = client.Catalog() 945 946 // Create Consul Service client for service advertisement and checks. 947 a.consulService = consul.NewServiceClient(client.Agent(), a.logger) 948 949 // Run the Consul service client's sync'ing main loop 950 go a.consulService.Run() 951 return nil 952 }