github.com/zhizhiboom/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/command/agent/agent.go (about) 1 package agent 2 3 import ( 4 "fmt" 5 "io" 6 "io/ioutil" 7 "log" 8 "net" 9 "os" 10 "path/filepath" 11 "runtime" 12 "strings" 13 "sync" 14 "sync/atomic" 15 "time" 16 17 metrics "github.com/armon/go-metrics" 18 "github.com/hashicorp/consul/api" 19 "github.com/hashicorp/consul/lib" 20 uuidparse "github.com/hashicorp/go-uuid" 21 "github.com/hashicorp/nomad/client" 22 clientconfig "github.com/hashicorp/nomad/client/config" 23 "github.com/hashicorp/nomad/command/agent/consul" 24 "github.com/hashicorp/nomad/helper/uuid" 25 "github.com/hashicorp/nomad/nomad" 26 "github.com/hashicorp/nomad/nomad/structs" 27 "github.com/hashicorp/nomad/nomad/structs/config" 28 "github.com/hashicorp/raft" 29 ) 30 31 const ( 32 agentHttpCheckInterval = 10 * time.Second 33 agentHttpCheckTimeout = 5 * time.Second 34 serverRpcCheckInterval = 10 * time.Second 35 serverRpcCheckTimeout = 3 * time.Second 36 serverSerfCheckInterval = 10 * time.Second 37 serverSerfCheckTimeout = 3 * time.Second 38 39 // roles used in identifying Consul entries for Nomad agents 40 consulRoleServer = "server" 41 consulRoleClient = "client" 42 ) 43 44 // Agent is a long running daemon that is used to run both 45 // clients and servers. Servers are responsible for managing 46 // state and making scheduling decisions. Clients can be 47 // scheduled to, and are responsible for interfacing with 48 // servers to run allocations. 49 type Agent struct { 50 config *Config 51 configLock sync.Mutex 52 53 logger *log.Logger 54 logOutput io.Writer 55 56 // consulService is Nomad's custom Consul client for managing services 57 // and checks. 58 consulService *consul.ServiceClient 59 60 // consulCatalog is the subset of Consul's Catalog API Nomad uses. 61 consulCatalog consul.CatalogAPI 62 63 client *client.Client 64 65 server *nomad.Server 66 67 shutdown bool 68 shutdownCh chan struct{} 69 shutdownLock sync.Mutex 70 71 InmemSink *metrics.InmemSink 72 } 73 74 // NewAgent is used to create a new agent with the given configuration 75 func NewAgent(config *Config, logOutput io.Writer, inmem *metrics.InmemSink) (*Agent, error) { 76 a := &Agent{ 77 config: config, 78 logger: log.New(logOutput, "", log.LstdFlags|log.Lmicroseconds), 79 logOutput: logOutput, 80 shutdownCh: make(chan struct{}), 81 InmemSink: inmem, 82 } 83 84 // Global logger should match internal logger as much as possible 85 log.SetFlags(log.LstdFlags | log.Lmicroseconds) 86 87 if err := a.setupConsul(config.Consul); err != nil { 88 return nil, fmt.Errorf("Failed to initialize Consul client: %v", err) 89 } 90 91 // TODO setup plugin loader 92 93 if err := a.setupServer(); err != nil { 94 return nil, err 95 } 96 if err := a.setupClient(); err != nil { 97 return nil, err 98 } 99 if a.client == nil && a.server == nil { 100 return nil, fmt.Errorf("must have at least client or server mode enabled") 101 } 102 103 return a, nil 104 } 105 106 // convertServerConfig takes an agent config and log output and returns a Nomad 107 // Config. 108 func convertServerConfig(agentConfig *Config, logOutput io.Writer) (*nomad.Config, error) { 109 conf := agentConfig.NomadConfig 110 if conf == nil { 111 conf = nomad.DefaultConfig() 112 } 113 conf.LogOutput = logOutput 114 conf.DevMode = agentConfig.DevMode 115 conf.Build = agentConfig.Version.VersionNumber() 116 if agentConfig.Region != "" { 117 conf.Region = agentConfig.Region 118 } 119 120 // Set the Authoritative Region if set, otherwise default to 121 // the same as the local region. 122 if agentConfig.Server.AuthoritativeRegion != "" { 123 conf.AuthoritativeRegion = agentConfig.Server.AuthoritativeRegion 124 } else if agentConfig.Region != "" { 125 conf.AuthoritativeRegion = agentConfig.Region 126 } 127 128 if agentConfig.Datacenter != "" { 129 conf.Datacenter = agentConfig.Datacenter 130 } 131 if agentConfig.NodeName != "" { 132 conf.NodeName = agentConfig.NodeName 133 } 134 if agentConfig.Server.BootstrapExpect > 0 { 135 if agentConfig.Server.BootstrapExpect == 1 { 136 conf.Bootstrap = true 137 } else { 138 atomic.StoreInt32(&conf.BootstrapExpect, int32(agentConfig.Server.BootstrapExpect)) 139 } 140 } 141 if agentConfig.DataDir != "" { 142 conf.DataDir = filepath.Join(agentConfig.DataDir, "server") 143 } 144 if agentConfig.Server.DataDir != "" { 145 conf.DataDir = agentConfig.Server.DataDir 146 } 147 if agentConfig.Server.ProtocolVersion != 0 { 148 conf.ProtocolVersion = uint8(agentConfig.Server.ProtocolVersion) 149 } 150 if agentConfig.Server.RaftProtocol != 0 { 151 conf.RaftConfig.ProtocolVersion = raft.ProtocolVersion(agentConfig.Server.RaftProtocol) 152 } 153 if agentConfig.Server.NumSchedulers != nil { 154 conf.NumSchedulers = *agentConfig.Server.NumSchedulers 155 } 156 if len(agentConfig.Server.EnabledSchedulers) != 0 { 157 // Convert to a set and require the core scheduler 158 set := make(map[string]struct{}, 4) 159 set[structs.JobTypeCore] = struct{}{} 160 for _, sched := range agentConfig.Server.EnabledSchedulers { 161 set[sched] = struct{}{} 162 } 163 164 schedulers := make([]string, 0, len(set)) 165 for k := range set { 166 schedulers = append(schedulers, k) 167 } 168 169 conf.EnabledSchedulers = schedulers 170 171 } 172 if agentConfig.ACL.Enabled { 173 conf.ACLEnabled = true 174 } 175 if agentConfig.ACL.ReplicationToken != "" { 176 conf.ReplicationToken = agentConfig.ACL.ReplicationToken 177 } 178 if agentConfig.Sentinel != nil { 179 conf.SentinelConfig = agentConfig.Sentinel 180 } 181 if agentConfig.Server.NonVotingServer { 182 conf.NonVoter = true 183 } 184 if agentConfig.Server.RedundancyZone != "" { 185 conf.RedundancyZone = agentConfig.Server.RedundancyZone 186 } 187 if agentConfig.Server.UpgradeVersion != "" { 188 conf.UpgradeVersion = agentConfig.Server.UpgradeVersion 189 } 190 if agentConfig.Autopilot != nil { 191 if agentConfig.Autopilot.CleanupDeadServers != nil { 192 conf.AutopilotConfig.CleanupDeadServers = *agentConfig.Autopilot.CleanupDeadServers 193 } 194 if agentConfig.Autopilot.ServerStabilizationTime != 0 { 195 conf.AutopilotConfig.ServerStabilizationTime = agentConfig.Autopilot.ServerStabilizationTime 196 } 197 if agentConfig.Autopilot.LastContactThreshold != 0 { 198 conf.AutopilotConfig.LastContactThreshold = agentConfig.Autopilot.LastContactThreshold 199 } 200 if agentConfig.Autopilot.MaxTrailingLogs != 0 { 201 conf.AutopilotConfig.MaxTrailingLogs = uint64(agentConfig.Autopilot.MaxTrailingLogs) 202 } 203 if agentConfig.Autopilot.EnableRedundancyZones != nil { 204 conf.AutopilotConfig.EnableRedundancyZones = *agentConfig.Autopilot.EnableRedundancyZones 205 } 206 if agentConfig.Autopilot.DisableUpgradeMigration != nil { 207 conf.AutopilotConfig.DisableUpgradeMigration = *agentConfig.Autopilot.DisableUpgradeMigration 208 } 209 if agentConfig.Autopilot.EnableCustomUpgrades != nil { 210 conf.AutopilotConfig.EnableCustomUpgrades = *agentConfig.Autopilot.EnableCustomUpgrades 211 } 212 } 213 214 // Set up the bind addresses 215 rpcAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.RPC) 216 if err != nil { 217 return nil, fmt.Errorf("Failed to parse RPC address %q: %v", agentConfig.normalizedAddrs.RPC, err) 218 } 219 serfAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.Serf) 220 if err != nil { 221 return nil, fmt.Errorf("Failed to parse Serf address %q: %v", agentConfig.normalizedAddrs.Serf, err) 222 } 223 conf.RPCAddr.Port = rpcAddr.Port 224 conf.RPCAddr.IP = rpcAddr.IP 225 conf.SerfConfig.MemberlistConfig.BindPort = serfAddr.Port 226 conf.SerfConfig.MemberlistConfig.BindAddr = serfAddr.IP.String() 227 228 // Set up the advertise addresses 229 rpcAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.RPC) 230 if err != nil { 231 return nil, fmt.Errorf("Failed to parse RPC advertise address %q: %v", agentConfig.AdvertiseAddrs.RPC, err) 232 } 233 serfAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.Serf) 234 if err != nil { 235 return nil, fmt.Errorf("Failed to parse Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err) 236 } 237 238 // Server address is the serf advertise address and rpc port. This is the 239 // address that all servers should be able to communicate over RPC with. 240 serverAddr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(serfAddr.IP.String(), fmt.Sprintf("%d", rpcAddr.Port))) 241 if err != nil { 242 return nil, fmt.Errorf("Failed to resolve Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err) 243 } 244 245 conf.SerfConfig.MemberlistConfig.AdvertiseAddr = serfAddr.IP.String() 246 conf.SerfConfig.MemberlistConfig.AdvertisePort = serfAddr.Port 247 conf.ClientRPCAdvertise = rpcAddr 248 conf.ServerRPCAdvertise = serverAddr 249 250 // Set up gc threshold and heartbeat grace period 251 if gcThreshold := agentConfig.Server.NodeGCThreshold; gcThreshold != "" { 252 dur, err := time.ParseDuration(gcThreshold) 253 if err != nil { 254 return nil, err 255 } 256 conf.NodeGCThreshold = dur 257 } 258 if gcThreshold := agentConfig.Server.JobGCThreshold; gcThreshold != "" { 259 dur, err := time.ParseDuration(gcThreshold) 260 if err != nil { 261 return nil, err 262 } 263 conf.JobGCThreshold = dur 264 } 265 if gcThreshold := agentConfig.Server.EvalGCThreshold; gcThreshold != "" { 266 dur, err := time.ParseDuration(gcThreshold) 267 if err != nil { 268 return nil, err 269 } 270 conf.EvalGCThreshold = dur 271 } 272 if gcThreshold := agentConfig.Server.DeploymentGCThreshold; gcThreshold != "" { 273 dur, err := time.ParseDuration(gcThreshold) 274 if err != nil { 275 return nil, err 276 } 277 conf.DeploymentGCThreshold = dur 278 } 279 280 if heartbeatGrace := agentConfig.Server.HeartbeatGrace; heartbeatGrace != 0 { 281 conf.HeartbeatGrace = heartbeatGrace 282 } 283 if min := agentConfig.Server.MinHeartbeatTTL; min != 0 { 284 conf.MinHeartbeatTTL = min 285 } 286 if maxHPS := agentConfig.Server.MaxHeartbeatsPerSecond; maxHPS != 0 { 287 conf.MaxHeartbeatsPerSecond = maxHPS 288 } 289 290 if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ServerServiceName == "" { 291 return nil, fmt.Errorf("server_service_name must be set when auto_advertise is enabled") 292 } 293 294 // Add the Consul and Vault configs 295 conf.ConsulConfig = agentConfig.Consul 296 conf.VaultConfig = agentConfig.Vault 297 298 // Set the TLS config 299 conf.TLSConfig = agentConfig.TLSConfig 300 301 // Setup telemetry related config 302 conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval 303 conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics 304 conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics 305 306 return conf, nil 307 } 308 309 // serverConfig is used to generate a new server configuration struct 310 // for initializing a nomad server. 311 func (a *Agent) serverConfig() (*nomad.Config, error) { 312 return convertServerConfig(a.config, a.logOutput) 313 } 314 315 // clientConfig is used to generate a new client configuration struct 316 // for initializing a Nomad client. 317 func (a *Agent) clientConfig() (*clientconfig.Config, error) { 318 // Setup the configuration 319 conf := a.config.ClientConfig 320 if conf == nil { 321 conf = clientconfig.DefaultConfig() 322 } 323 324 // If we are running a server, append both its bind and advertise address so 325 // we are able to at least talk to the local server even if that isn't 326 // configured explicitly. This handles both running server and client on one 327 // host and -dev mode. 328 conf.Servers = a.config.Client.Servers 329 if a.server != nil { 330 if a.config.AdvertiseAddrs == nil || a.config.AdvertiseAddrs.RPC == "" { 331 return nil, fmt.Errorf("AdvertiseAddrs is nil or empty") 332 } else if a.config.normalizedAddrs == nil || a.config.normalizedAddrs.RPC == "" { 333 return nil, fmt.Errorf("normalizedAddrs is nil or empty") 334 } 335 336 conf.Servers = append(conf.Servers, 337 a.config.normalizedAddrs.RPC, 338 a.config.AdvertiseAddrs.RPC) 339 } 340 341 conf.LogOutput = a.logOutput 342 conf.LogLevel = a.config.LogLevel 343 conf.DevMode = a.config.DevMode 344 if a.config.Region != "" { 345 conf.Region = a.config.Region 346 } 347 if a.config.DataDir != "" { 348 conf.StateDir = filepath.Join(a.config.DataDir, "client") 349 conf.AllocDir = filepath.Join(a.config.DataDir, "alloc") 350 } 351 if a.config.Client.StateDir != "" { 352 conf.StateDir = a.config.Client.StateDir 353 } 354 if a.config.Client.AllocDir != "" { 355 conf.AllocDir = a.config.Client.AllocDir 356 } 357 if a.config.Client.NetworkInterface != "" { 358 conf.NetworkInterface = a.config.Client.NetworkInterface 359 } 360 conf.ChrootEnv = a.config.Client.ChrootEnv 361 conf.Options = a.config.Client.Options 362 // Logging deprecation messages about consul related configuration in client 363 // options 364 var invalidConsulKeys []string 365 for key := range conf.Options { 366 if strings.HasPrefix(key, "consul") { 367 invalidConsulKeys = append(invalidConsulKeys, fmt.Sprintf("options.%s", key)) 368 } 369 } 370 if len(invalidConsulKeys) > 0 { 371 a.logger.Printf("[WARN] agent: Invalid keys: %v", strings.Join(invalidConsulKeys, ",")) 372 a.logger.Printf(`Nomad client ignores consul related configuration in client options. 373 Please refer to the guide https://www.nomadproject.io/docs/agent/configuration/consul.html 374 to configure Nomad to work with Consul.`) 375 } 376 377 if a.config.Client.NetworkSpeed != 0 { 378 conf.NetworkSpeed = a.config.Client.NetworkSpeed 379 } 380 if a.config.Client.CpuCompute != 0 { 381 conf.CpuCompute = a.config.Client.CpuCompute 382 } 383 if a.config.Client.MemoryMB != 0 { 384 conf.MemoryMB = a.config.Client.MemoryMB 385 } 386 if a.config.Client.MaxKillTimeout != "" { 387 dur, err := time.ParseDuration(a.config.Client.MaxKillTimeout) 388 if err != nil { 389 return nil, fmt.Errorf("Error parsing max kill timeout: %s", err) 390 } 391 conf.MaxKillTimeout = dur 392 } 393 conf.ClientMaxPort = uint(a.config.Client.ClientMaxPort) 394 conf.ClientMinPort = uint(a.config.Client.ClientMinPort) 395 396 // Setup the node 397 conf.Node = new(structs.Node) 398 conf.Node.Datacenter = a.config.Datacenter 399 conf.Node.Name = a.config.NodeName 400 conf.Node.Meta = a.config.Client.Meta 401 conf.Node.NodeClass = a.config.Client.NodeClass 402 403 // Set up the HTTP advertise address 404 conf.Node.HTTPAddr = a.config.AdvertiseAddrs.HTTP 405 406 // Reserve resources on the node. 407 r := conf.Node.Reserved 408 if r == nil { 409 r = new(structs.Resources) 410 conf.Node.Reserved = r 411 } 412 r.CPU = a.config.Client.Reserved.CPU 413 r.MemoryMB = a.config.Client.Reserved.MemoryMB 414 r.DiskMB = a.config.Client.Reserved.DiskMB 415 r.IOPS = a.config.Client.Reserved.IOPS 416 conf.GloballyReservedPorts = a.config.Client.Reserved.ParsedReservedPorts 417 418 conf.Version = a.config.Version 419 420 if *a.config.Consul.AutoAdvertise && a.config.Consul.ClientServiceName == "" { 421 return nil, fmt.Errorf("client_service_name must be set when auto_advertise is enabled") 422 } 423 424 conf.ConsulConfig = a.config.Consul 425 conf.VaultConfig = a.config.Vault 426 427 // Set up Telemetry configuration 428 conf.StatsCollectionInterval = a.config.Telemetry.collectionInterval 429 conf.PublishNodeMetrics = a.config.Telemetry.PublishNodeMetrics 430 conf.PublishAllocationMetrics = a.config.Telemetry.PublishAllocationMetrics 431 conf.DisableTaggedMetrics = a.config.Telemetry.DisableTaggedMetrics 432 conf.BackwardsCompatibleMetrics = a.config.Telemetry.BackwardsCompatibleMetrics 433 434 // Set the TLS related configs 435 conf.TLSConfig = a.config.TLSConfig 436 conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP 437 438 // Set the GC related configs 439 conf.GCInterval = a.config.Client.GCInterval 440 conf.GCParallelDestroys = a.config.Client.GCParallelDestroys 441 conf.GCDiskUsageThreshold = a.config.Client.GCDiskUsageThreshold 442 conf.GCInodeUsageThreshold = a.config.Client.GCInodeUsageThreshold 443 conf.GCMaxAllocs = a.config.Client.GCMaxAllocs 444 if a.config.Client.NoHostUUID != nil { 445 conf.NoHostUUID = *a.config.Client.NoHostUUID 446 } else { 447 // Default no_host_uuid to true 448 conf.NoHostUUID = true 449 } 450 451 // Setup the ACLs 452 conf.ACLEnabled = a.config.ACL.Enabled 453 conf.ACLTokenTTL = a.config.ACL.TokenTTL 454 conf.ACLPolicyTTL = a.config.ACL.PolicyTTL 455 456 return conf, nil 457 } 458 459 // setupServer is used to setup the server if enabled 460 func (a *Agent) setupServer() error { 461 if !a.config.Server.Enabled { 462 return nil 463 } 464 465 // Setup the configuration 466 conf, err := a.serverConfig() 467 if err != nil { 468 return fmt.Errorf("server config setup failed: %s", err) 469 } 470 471 // Generate a node ID and persist it if it is the first instance, otherwise 472 // read the persisted node ID. 473 if err := a.setupNodeID(conf); err != nil { 474 return fmt.Errorf("setting up server node ID failed: %s", err) 475 } 476 477 // Sets up the keyring for gossip encryption 478 if err := a.setupKeyrings(conf); err != nil { 479 return fmt.Errorf("failed to configure keyring: %v", err) 480 } 481 482 // Create the server 483 server, err := nomad.NewServer(conf, a.consulCatalog, a.logger) 484 if err != nil { 485 return fmt.Errorf("server setup failed: %v", err) 486 } 487 a.server = server 488 489 // Consul check addresses default to bind but can be toggled to use advertise 490 rpcCheckAddr := a.config.normalizedAddrs.RPC 491 serfCheckAddr := a.config.normalizedAddrs.Serf 492 if *a.config.Consul.ChecksUseAdvertise { 493 rpcCheckAddr = a.config.AdvertiseAddrs.RPC 494 serfCheckAddr = a.config.AdvertiseAddrs.Serf 495 } 496 497 // Create the Nomad Server services for Consul 498 if *a.config.Consul.AutoAdvertise { 499 httpServ := &structs.Service{ 500 Name: a.config.Consul.ServerServiceName, 501 PortLabel: a.config.AdvertiseAddrs.HTTP, 502 Tags: []string{consul.ServiceTagHTTP}, 503 } 504 const isServer = true 505 if check := a.agentHTTPCheck(isServer); check != nil { 506 httpServ.Checks = []*structs.ServiceCheck{check} 507 } 508 rpcServ := &structs.Service{ 509 Name: a.config.Consul.ServerServiceName, 510 PortLabel: a.config.AdvertiseAddrs.RPC, 511 Tags: []string{consul.ServiceTagRPC}, 512 Checks: []*structs.ServiceCheck{ 513 { 514 Name: a.config.Consul.ServerRPCCheckName, 515 Type: "tcp", 516 Interval: serverRpcCheckInterval, 517 Timeout: serverRpcCheckTimeout, 518 PortLabel: rpcCheckAddr, 519 }, 520 }, 521 } 522 serfServ := &structs.Service{ 523 Name: a.config.Consul.ServerServiceName, 524 PortLabel: a.config.AdvertiseAddrs.Serf, 525 Tags: []string{consul.ServiceTagSerf}, 526 Checks: []*structs.ServiceCheck{ 527 { 528 Name: a.config.Consul.ServerSerfCheckName, 529 Type: "tcp", 530 Interval: serverSerfCheckInterval, 531 Timeout: serverSerfCheckTimeout, 532 PortLabel: serfCheckAddr, 533 }, 534 }, 535 } 536 537 // Add the http port check if TLS isn't enabled 538 consulServices := []*structs.Service{ 539 rpcServ, 540 serfServ, 541 httpServ, 542 } 543 if err := a.consulService.RegisterAgent(consulRoleServer, consulServices); err != nil { 544 return err 545 } 546 } 547 548 return nil 549 } 550 551 // setupNodeID will pull the persisted node ID, if any, or create a random one 552 // and persist it. 553 func (a *Agent) setupNodeID(config *nomad.Config) error { 554 // For dev mode we have no filesystem access so just make a node ID. 555 if a.config.DevMode { 556 config.NodeID = uuid.Generate() 557 return nil 558 } 559 560 // Load saved state, if any. Since a user could edit this, we also 561 // validate it. Saved state overwrites any configured node id 562 fileID := filepath.Join(config.DataDir, "node-id") 563 if _, err := os.Stat(fileID); err == nil { 564 rawID, err := ioutil.ReadFile(fileID) 565 if err != nil { 566 return err 567 } 568 569 nodeID := strings.TrimSpace(string(rawID)) 570 nodeID = strings.ToLower(nodeID) 571 if _, err := uuidparse.ParseUUID(nodeID); err != nil { 572 return err 573 } 574 config.NodeID = nodeID 575 return nil 576 } 577 578 // If they've configured a node ID manually then just use that, as 579 // long as it's valid. 580 if config.NodeID != "" { 581 config.NodeID = strings.ToLower(config.NodeID) 582 if _, err := uuidparse.ParseUUID(config.NodeID); err != nil { 583 return err 584 } 585 // Persist this configured nodeID to our data directory 586 if err := lib.EnsurePath(fileID, false); err != nil { 587 return err 588 } 589 if err := ioutil.WriteFile(fileID, []byte(config.NodeID), 0600); err != nil { 590 return err 591 } 592 return nil 593 } 594 595 // If we still don't have a valid node ID, make one. 596 if config.NodeID == "" { 597 id := uuid.Generate() 598 if err := lib.EnsurePath(fileID, false); err != nil { 599 return err 600 } 601 if err := ioutil.WriteFile(fileID, []byte(id), 0600); err != nil { 602 return err 603 } 604 605 config.NodeID = id 606 } 607 return nil 608 } 609 610 // setupKeyrings is used to initialize and load keyrings during agent startup 611 func (a *Agent) setupKeyrings(config *nomad.Config) error { 612 file := filepath.Join(a.config.DataDir, serfKeyring) 613 614 if a.config.Server.EncryptKey == "" { 615 goto LOAD 616 } 617 if _, err := os.Stat(file); err != nil { 618 if err := initKeyring(file, a.config.Server.EncryptKey); err != nil { 619 return err 620 } 621 } 622 623 LOAD: 624 if _, err := os.Stat(file); err == nil { 625 config.SerfConfig.KeyringFile = file 626 } 627 if err := loadKeyringFile(config.SerfConfig); err != nil { 628 return err 629 } 630 // Success! 631 return nil 632 } 633 634 // setupClient is used to setup the client if enabled 635 func (a *Agent) setupClient() error { 636 if !a.config.Client.Enabled { 637 return nil 638 } 639 640 // Setup the configuration 641 conf, err := a.clientConfig() 642 if err != nil { 643 return fmt.Errorf("client setup failed: %v", err) 644 } 645 646 // Reserve some ports for the plugins if we are on Windows 647 if runtime.GOOS == "windows" { 648 if err := a.reservePortsForClient(conf); err != nil { 649 return err 650 } 651 } 652 653 client, err := client.NewClient(conf, a.consulCatalog, a.consulService, a.logger) 654 if err != nil { 655 return fmt.Errorf("client setup failed: %v", err) 656 } 657 a.client = client 658 659 // Create the Nomad Client services for Consul 660 if *a.config.Consul.AutoAdvertise { 661 httpServ := &structs.Service{ 662 Name: a.config.Consul.ClientServiceName, 663 PortLabel: a.config.AdvertiseAddrs.HTTP, 664 Tags: []string{consul.ServiceTagHTTP}, 665 } 666 const isServer = false 667 if check := a.agentHTTPCheck(isServer); check != nil { 668 httpServ.Checks = []*structs.ServiceCheck{check} 669 } 670 if err := a.consulService.RegisterAgent(consulRoleClient, []*structs.Service{httpServ}); err != nil { 671 return err 672 } 673 } 674 675 return nil 676 } 677 678 // agentHTTPCheck returns a health check for the agent's HTTP API if possible. 679 // If no HTTP health check can be supported nil is returned. 680 func (a *Agent) agentHTTPCheck(server bool) *structs.ServiceCheck { 681 // Resolve the http check address 682 httpCheckAddr := a.config.normalizedAddrs.HTTP 683 if *a.config.Consul.ChecksUseAdvertise { 684 httpCheckAddr = a.config.AdvertiseAddrs.HTTP 685 } 686 check := structs.ServiceCheck{ 687 Name: a.config.Consul.ClientHTTPCheckName, 688 Type: "http", 689 Path: "/v1/agent/health?type=client", 690 Protocol: "http", 691 Interval: agentHttpCheckInterval, 692 Timeout: agentHttpCheckTimeout, 693 PortLabel: httpCheckAddr, 694 } 695 // Switch to endpoint that doesn't require a leader for servers 696 if server { 697 check.Name = a.config.Consul.ServerHTTPCheckName 698 check.Path = "/v1/agent/health?type=server" 699 } 700 if !a.config.TLSConfig.EnableHTTP { 701 // No HTTPS, return a plain http check 702 return &check 703 } 704 if a.config.TLSConfig.VerifyHTTPSClient { 705 a.logger.Printf("[WARN] agent: not registering Nomad HTTPS Health Check because verify_https_client enabled") 706 return nil 707 } 708 709 // HTTPS enabled; skip verification 710 check.Protocol = "https" 711 check.TLSSkipVerify = true 712 return &check 713 } 714 715 // reservePortsForClient reserves a range of ports for the client to use when 716 // it creates various plugins for log collection, executors, drivers, etc 717 func (a *Agent) reservePortsForClient(conf *clientconfig.Config) error { 718 // finding the device name for loopback 719 deviceName, addr, mask, err := a.findLoopbackDevice() 720 if err != nil { 721 return fmt.Errorf("error finding the device name for loopback: %v", err) 722 } 723 724 // seeing if the user has already reserved some resources on this device 725 var nr *structs.NetworkResource 726 if conf.Node.Reserved == nil { 727 conf.Node.Reserved = &structs.Resources{} 728 } 729 for _, n := range conf.Node.Reserved.Networks { 730 if n.Device == deviceName { 731 nr = n 732 } 733 } 734 // If the user hasn't already created the device, we create it 735 if nr == nil { 736 nr = &structs.NetworkResource{ 737 Device: deviceName, 738 IP: addr, 739 CIDR: mask, 740 ReservedPorts: make([]structs.Port, 0), 741 } 742 } 743 // appending the port ranges we want to use for the client to the list of 744 // reserved ports for this device 745 for i := conf.ClientMinPort; i <= conf.ClientMaxPort; i++ { 746 nr.ReservedPorts = append(nr.ReservedPorts, structs.Port{Label: fmt.Sprintf("plugin-%d", i), Value: int(i)}) 747 } 748 conf.Node.Reserved.Networks = append(conf.Node.Reserved.Networks, nr) 749 return nil 750 } 751 752 // findLoopbackDevice iterates through all the interfaces on a machine and 753 // returns the ip addr, mask of the loopback device 754 func (a *Agent) findLoopbackDevice() (string, string, string, error) { 755 var ifcs []net.Interface 756 var err error 757 ifcs, err = net.Interfaces() 758 if err != nil { 759 return "", "", "", err 760 } 761 for _, ifc := range ifcs { 762 addrs, err := ifc.Addrs() 763 if err != nil { 764 return "", "", "", err 765 } 766 for _, addr := range addrs { 767 var ip net.IP 768 switch v := addr.(type) { 769 case *net.IPNet: 770 ip = v.IP 771 case *net.IPAddr: 772 ip = v.IP 773 } 774 if ip.IsLoopback() { 775 if ip.To4() == nil { 776 continue 777 } 778 return ifc.Name, ip.String(), addr.String(), nil 779 } 780 } 781 } 782 783 return "", "", "", fmt.Errorf("no loopback devices with IPV4 addr found") 784 } 785 786 // Leave is used gracefully exit. Clients will inform servers 787 // of their departure so that allocations can be rescheduled. 788 func (a *Agent) Leave() error { 789 if a.client != nil { 790 if err := a.client.Leave(); err != nil { 791 a.logger.Printf("[ERR] agent: client leave failed: %v", err) 792 } 793 } 794 if a.server != nil { 795 if err := a.server.Leave(); err != nil { 796 a.logger.Printf("[ERR] agent: server leave failed: %v", err) 797 } 798 } 799 return nil 800 } 801 802 // Shutdown is used to terminate the agent. 803 func (a *Agent) Shutdown() error { 804 a.shutdownLock.Lock() 805 defer a.shutdownLock.Unlock() 806 807 if a.shutdown { 808 return nil 809 } 810 811 a.logger.Println("[INFO] agent: requesting shutdown") 812 if a.client != nil { 813 if err := a.client.Shutdown(); err != nil { 814 a.logger.Printf("[ERR] agent: client shutdown failed: %v", err) 815 } 816 } 817 if a.server != nil { 818 if err := a.server.Shutdown(); err != nil { 819 a.logger.Printf("[ERR] agent: server shutdown failed: %v", err) 820 } 821 } 822 823 if err := a.consulService.Shutdown(); err != nil { 824 a.logger.Printf("[ERR] agent: shutting down Consul client failed: %v", err) 825 } 826 827 a.logger.Println("[INFO] agent: shutdown complete") 828 a.shutdown = true 829 close(a.shutdownCh) 830 return nil 831 } 832 833 // RPC is used to make an RPC call to the Nomad servers 834 func (a *Agent) RPC(method string, args interface{}, reply interface{}) error { 835 if a.server != nil { 836 return a.server.RPC(method, args, reply) 837 } 838 return a.client.RPC(method, args, reply) 839 } 840 841 // Client returns the configured client or nil 842 func (a *Agent) Client() *client.Client { 843 return a.client 844 } 845 846 // Server returns the configured server or nil 847 func (a *Agent) Server() *nomad.Server { 848 return a.server 849 } 850 851 // Stats is used to return statistics for debugging and insight 852 // for various sub-systems 853 func (a *Agent) Stats() map[string]map[string]string { 854 stats := make(map[string]map[string]string) 855 if a.server != nil { 856 subStat := a.server.Stats() 857 for k, v := range subStat { 858 stats[k] = v 859 } 860 } 861 if a.client != nil { 862 subStat := a.client.Stats() 863 for k, v := range subStat { 864 stats[k] = v 865 } 866 } 867 return stats 868 } 869 870 // ShouldReload determines if we should reload the configuration and agent 871 // connections. If the TLS Configuration has not changed, we shouldn't reload. 872 func (a *Agent) ShouldReload(newConfig *Config) (agent, http bool) { 873 a.configLock.Lock() 874 defer a.configLock.Unlock() 875 876 isEqual, err := a.config.TLSConfig.CertificateInfoIsEqual(newConfig.TLSConfig) 877 if err != nil { 878 a.logger.Printf("[INFO] agent: error when parsing TLS certificate %v", err) 879 return false, false 880 } else if !isEqual { 881 return true, true 882 } 883 884 // Allow the ability to only reload HTTP connections 885 if a.config.TLSConfig.EnableHTTP != newConfig.TLSConfig.EnableHTTP { 886 http = true 887 agent = true 888 } 889 890 // Allow the ability to only reload HTTP connections 891 if a.config.TLSConfig.EnableRPC != newConfig.TLSConfig.EnableRPC { 892 agent = true 893 } 894 895 return agent, http 896 } 897 898 // Reload handles configuration changes for the agent. Provides a method that 899 // is easier to unit test, as this action is invoked via SIGHUP. 900 func (a *Agent) Reload(newConfig *Config) error { 901 a.configLock.Lock() 902 defer a.configLock.Unlock() 903 904 if newConfig == nil || newConfig.TLSConfig == nil { 905 return fmt.Errorf("cannot reload agent with nil configuration") 906 } 907 908 // This is just a TLS configuration reload, we don't need to refresh 909 // existing network connections 910 if !a.config.TLSConfig.IsEmpty() && !newConfig.TLSConfig.IsEmpty() { 911 912 // Reload the certificates on the keyloader and on success store the 913 // updated TLS config. It is important to reuse the same keyloader 914 // as this allows us to dynamically reload configurations not only 915 // on the Agent but on the Server and Client too (they are 916 // referencing the same keyloader). 917 keyloader := a.config.TLSConfig.GetKeyLoader() 918 _, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile) 919 if err != nil { 920 return err 921 } 922 a.config.TLSConfig = newConfig.TLSConfig 923 a.config.TLSConfig.KeyLoader = keyloader 924 return nil 925 } 926 927 // Completely reload the agent's TLS configuration (moving from non-TLS to 928 // TLS, or vice versa) 929 // This does not handle errors in loading the new TLS configuration 930 a.config.TLSConfig = newConfig.TLSConfig.Copy() 931 932 if newConfig.TLSConfig.IsEmpty() { 933 a.logger.Println("[WARN] agent: Downgrading agent's existing TLS configuration to plaintext") 934 } else { 935 a.logger.Println("[INFO] agent: Upgrading from plaintext configuration to TLS") 936 } 937 938 return nil 939 } 940 941 // GetConfig creates a locked reference to the agent's config 942 func (a *Agent) GetConfig() *Config { 943 a.configLock.Lock() 944 defer a.configLock.Unlock() 945 946 return a.config 947 } 948 949 // setupConsul creates the Consul client and starts its main Run loop. 950 func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error { 951 apiConf, err := consulConfig.ApiConfig() 952 if err != nil { 953 return err 954 } 955 client, err := api.NewClient(apiConf) 956 if err != nil { 957 return err 958 } 959 960 // Determine version for TLSSkipVerify 961 962 // Create Consul Catalog client for service discovery. 963 a.consulCatalog = client.Catalog() 964 965 // Create Consul Service client for service advertisement and checks. 966 isClient := false 967 if a.config.Client != nil && a.config.Client.Enabled { 968 isClient = true 969 } 970 a.consulService = consul.NewServiceClient(client.Agent(), a.logger, isClient) 971 972 // Run the Consul service client's sync'ing main loop 973 go a.consulService.Run() 974 return nil 975 }