github.com/zhizhiboom/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/command/agent/command.go (about) 1 package agent 2 3 import ( 4 "flag" 5 "fmt" 6 "io" 7 "log" 8 "os" 9 "os/signal" 10 "path/filepath" 11 "reflect" 12 "sort" 13 "strconv" 14 "strings" 15 "syscall" 16 "time" 17 18 metrics "github.com/armon/go-metrics" 19 "github.com/armon/go-metrics/circonus" 20 "github.com/armon/go-metrics/datadog" 21 "github.com/armon/go-metrics/prometheus" 22 "github.com/hashicorp/consul/lib" 23 checkpoint "github.com/hashicorp/go-checkpoint" 24 discover "github.com/hashicorp/go-discover" 25 gsyslog "github.com/hashicorp/go-syslog" 26 "github.com/hashicorp/logutils" 27 flaghelper "github.com/hashicorp/nomad/helper/flag-helpers" 28 gatedwriter "github.com/hashicorp/nomad/helper/gated-writer" 29 "github.com/hashicorp/nomad/nomad/structs/config" 30 "github.com/hashicorp/nomad/version" 31 "github.com/mitchellh/cli" 32 "github.com/posener/complete" 33 ) 34 35 // gracefulTimeout controls how long we wait before forcefully terminating 36 const gracefulTimeout = 5 * time.Second 37 38 // Command is a Command implementation that runs a Nomad agent. 39 // The command will not end unless a shutdown message is sent on the 40 // ShutdownCh. If two messages are sent on the ShutdownCh it will forcibly 41 // exit. 42 type Command struct { 43 Version *version.VersionInfo 44 Ui cli.Ui 45 ShutdownCh <-chan struct{} 46 47 args []string 48 agent *Agent 49 httpServer *HTTPServer 50 logFilter *logutils.LevelFilter 51 logOutput io.Writer 52 retryJoinErrCh chan struct{} 53 } 54 55 func (c *Command) readConfig() *Config { 56 var dev bool 57 var configPath []string 58 var servers string 59 var meta []string 60 61 // Make a new, empty config. 62 cmdConfig := &Config{ 63 Client: &ClientConfig{}, 64 Consul: &config.ConsulConfig{}, 65 Ports: &Ports{}, 66 Server: &ServerConfig{ 67 ServerJoin: &ServerJoin{}, 68 }, 69 Vault: &config.VaultConfig{}, 70 ACL: &ACLConfig{}, 71 } 72 73 flags := flag.NewFlagSet("agent", flag.ContinueOnError) 74 flags.Usage = func() { c.Ui.Error(c.Help()) } 75 76 // Role options 77 flags.BoolVar(&dev, "dev", false, "") 78 flags.BoolVar(&cmdConfig.Server.Enabled, "server", false, "") 79 flags.BoolVar(&cmdConfig.Client.Enabled, "client", false, "") 80 81 // Server-only options 82 flags.IntVar(&cmdConfig.Server.BootstrapExpect, "bootstrap-expect", 0, "") 83 flags.StringVar(&cmdConfig.Server.EncryptKey, "encrypt", "", "gossip encryption key") 84 flags.IntVar(&cmdConfig.Server.RaftProtocol, "raft-protocol", 0, "") 85 flags.BoolVar(&cmdConfig.Server.RejoinAfterLeave, "rejoin", false, "") 86 flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.ServerJoin.StartJoin), "join", "") 87 flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.ServerJoin.RetryJoin), "retry-join", "") 88 flags.IntVar(&cmdConfig.Server.ServerJoin.RetryMaxAttempts, "retry-max", 0, "") 89 flags.Var((flaghelper.FuncDurationVar)(func(d time.Duration) error { 90 cmdConfig.Server.ServerJoin.RetryInterval = d 91 return nil 92 }), "retry-interval", "") 93 94 // Client-only options 95 flags.StringVar(&cmdConfig.Client.StateDir, "state-dir", "", "") 96 flags.StringVar(&cmdConfig.Client.AllocDir, "alloc-dir", "", "") 97 flags.StringVar(&cmdConfig.Client.NodeClass, "node-class", "", "") 98 flags.StringVar(&servers, "servers", "", "") 99 flags.Var((*flaghelper.StringFlag)(&meta), "meta", "") 100 flags.StringVar(&cmdConfig.Client.NetworkInterface, "network-interface", "", "") 101 flags.IntVar(&cmdConfig.Client.NetworkSpeed, "network-speed", 0, "") 102 103 // General options 104 flags.Var((*flaghelper.StringFlag)(&configPath), "config", "config") 105 flags.StringVar(&cmdConfig.BindAddr, "bind", "", "") 106 flags.StringVar(&cmdConfig.Region, "region", "", "") 107 flags.StringVar(&cmdConfig.DataDir, "data-dir", "", "") 108 flags.StringVar(&cmdConfig.PluginDir, "plugin-dir", "", "") 109 flags.StringVar(&cmdConfig.Datacenter, "dc", "", "") 110 flags.StringVar(&cmdConfig.LogLevel, "log-level", "", "") 111 flags.StringVar(&cmdConfig.NodeName, "node", "", "") 112 113 // Consul options 114 flags.StringVar(&cmdConfig.Consul.Auth, "consul-auth", "", "") 115 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 116 cmdConfig.Consul.AutoAdvertise = &b 117 return nil 118 }), "consul-auto-advertise", "") 119 flags.StringVar(&cmdConfig.Consul.CAFile, "consul-ca-file", "", "") 120 flags.StringVar(&cmdConfig.Consul.CertFile, "consul-cert-file", "", "") 121 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 122 cmdConfig.Consul.ChecksUseAdvertise = &b 123 return nil 124 }), "consul-checks-use-advertise", "") 125 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 126 cmdConfig.Consul.ClientAutoJoin = &b 127 return nil 128 }), "consul-client-auto-join", "") 129 flags.StringVar(&cmdConfig.Consul.ClientServiceName, "consul-client-service-name", "", "") 130 flags.StringVar(&cmdConfig.Consul.ClientHTTPCheckName, "consul-client-http-check-name", "", "") 131 flags.StringVar(&cmdConfig.Consul.KeyFile, "consul-key-file", "", "") 132 flags.StringVar(&cmdConfig.Consul.ServerServiceName, "consul-server-service-name", "", "") 133 flags.StringVar(&cmdConfig.Consul.ServerHTTPCheckName, "consul-server-http-check-name", "", "") 134 flags.StringVar(&cmdConfig.Consul.ServerSerfCheckName, "consul-server-serf-check-name", "", "") 135 flags.StringVar(&cmdConfig.Consul.ServerRPCCheckName, "consul-server-rpc-check-name", "", "") 136 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 137 cmdConfig.Consul.ServerAutoJoin = &b 138 return nil 139 }), "consul-server-auto-join", "") 140 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 141 cmdConfig.Consul.EnableSSL = &b 142 return nil 143 }), "consul-ssl", "") 144 flags.StringVar(&cmdConfig.Consul.Token, "consul-token", "", "") 145 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 146 cmdConfig.Consul.VerifySSL = &b 147 return nil 148 }), "consul-verify-ssl", "") 149 flags.StringVar(&cmdConfig.Consul.Addr, "consul-address", "", "") 150 151 // Vault options 152 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 153 cmdConfig.Vault.Enabled = &b 154 return nil 155 }), "vault-enabled", "") 156 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 157 cmdConfig.Vault.AllowUnauthenticated = &b 158 return nil 159 }), "vault-allow-unauthenticated", "") 160 flags.StringVar(&cmdConfig.Vault.Token, "vault-token", "", "") 161 flags.StringVar(&cmdConfig.Vault.Addr, "vault-address", "", "") 162 flags.StringVar(&cmdConfig.Vault.Role, "vault-create-from-role", "", "") 163 flags.StringVar(&cmdConfig.Vault.TLSCaFile, "vault-ca-file", "", "") 164 flags.StringVar(&cmdConfig.Vault.TLSCaPath, "vault-ca-path", "", "") 165 flags.StringVar(&cmdConfig.Vault.TLSCertFile, "vault-cert-file", "", "") 166 flags.StringVar(&cmdConfig.Vault.TLSKeyFile, "vault-key-file", "", "") 167 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 168 cmdConfig.Vault.TLSSkipVerify = &b 169 return nil 170 }), "vault-tls-skip-verify", "") 171 flags.StringVar(&cmdConfig.Vault.TLSServerName, "vault-tls-server-name", "", "") 172 173 // ACL options 174 flags.BoolVar(&cmdConfig.ACL.Enabled, "acl-enabled", false, "") 175 flags.StringVar(&cmdConfig.ACL.ReplicationToken, "acl-replication-token", "", "") 176 177 if err := flags.Parse(c.args); err != nil { 178 return nil 179 } 180 181 // Split the servers. 182 if servers != "" { 183 cmdConfig.Client.Servers = strings.Split(servers, ",") 184 } 185 186 // Parse the meta flags. 187 metaLength := len(meta) 188 if metaLength != 0 { 189 cmdConfig.Client.Meta = make(map[string]string, metaLength) 190 for _, kv := range meta { 191 parts := strings.SplitN(kv, "=", 2) 192 if len(parts) != 2 { 193 c.Ui.Error(fmt.Sprintf("Error parsing Client.Meta value: %v", kv)) 194 return nil 195 } 196 197 cmdConfig.Client.Meta[parts[0]] = parts[1] 198 } 199 } 200 201 // Load the configuration 202 var config *Config 203 if dev { 204 config = DevConfig() 205 } else { 206 config = DefaultConfig() 207 } 208 209 // Merge in the enterprise overlay 210 config.Merge(DefaultEntConfig()) 211 212 for _, path := range configPath { 213 current, err := LoadConfig(path) 214 if err != nil { 215 c.Ui.Error(fmt.Sprintf( 216 "Error loading configuration from %s: %s", path, err)) 217 return nil 218 } 219 220 // The user asked us to load some config here but we didn't find any, 221 // so we'll complain but continue. 222 if current == nil || reflect.DeepEqual(current, &Config{}) { 223 c.Ui.Warn(fmt.Sprintf("No configuration loaded from %s", path)) 224 } 225 226 if config == nil { 227 config = current 228 } else { 229 config = config.Merge(current) 230 } 231 } 232 233 // Ensure the sub-structs at least exist 234 if config.Client == nil { 235 config.Client = &ClientConfig{} 236 } 237 if config.Server == nil { 238 config.Server = &ServerConfig{} 239 } 240 241 // Merge any CLI options over config file options 242 config = config.Merge(cmdConfig) 243 244 // Set the version info 245 config.Version = c.Version 246 247 // Normalize binds, ports, addresses, and advertise 248 if err := config.normalizeAddrs(); err != nil { 249 c.Ui.Error(err.Error()) 250 return nil 251 } 252 253 // Check to see if we should read the Vault token from the environment 254 if config.Vault.Token == "" { 255 if token, ok := os.LookupEnv("VAULT_TOKEN"); ok { 256 config.Vault.Token = token 257 } 258 } 259 260 // Set up the TLS configuration properly if we have one. 261 // XXX chelseakomlo: set up a TLSConfig New method which would wrap 262 // constructor-type actions like this. 263 if config.TLSConfig != nil && !config.TLSConfig.IsEmpty() { 264 if err := config.TLSConfig.SetChecksum(); err != nil { 265 c.Ui.Error(fmt.Sprintf("WARNING: Error when parsing TLS configuration: %v", err)) 266 } 267 } 268 269 // Default the plugin directory to be under that of the data directory if it 270 // isn't explicitly specified. 271 if config.PluginDir == "" && config.DataDir != "" { 272 config.PluginDir = filepath.Join(config.DataDir, "plugins") 273 } 274 275 if dev { 276 // Skip validation for dev mode 277 return config 278 } 279 280 if config.Server.EncryptKey != "" { 281 if _, err := config.Server.EncryptBytes(); err != nil { 282 c.Ui.Error(fmt.Sprintf("Invalid encryption key: %s", err)) 283 return nil 284 } 285 keyfile := filepath.Join(config.DataDir, serfKeyring) 286 if _, err := os.Stat(keyfile); err == nil { 287 c.Ui.Warn("WARNING: keyring exists but -encrypt given, using keyring") 288 } 289 } 290 291 // Check that the server is running in at least one mode. 292 if !(config.Server.Enabled || config.Client.Enabled) { 293 c.Ui.Error("Must specify either server, client or dev mode for the agent.") 294 return nil 295 } 296 297 // Verify the paths are absolute. 298 dirs := map[string]string{ 299 "data-dir": config.DataDir, 300 "plugin-dir": config.PluginDir, 301 "alloc-dir": config.Client.AllocDir, 302 "state-dir": config.Client.StateDir, 303 } 304 for k, dir := range dirs { 305 if dir == "" { 306 continue 307 } 308 309 if !filepath.IsAbs(dir) { 310 c.Ui.Error(fmt.Sprintf("%s must be given as an absolute path: got %v", k, dir)) 311 return nil 312 } 313 } 314 315 // Ensure that we have the directories we need to run. 316 if config.Server.Enabled && config.DataDir == "" { 317 c.Ui.Error("Must specify data directory") 318 return nil 319 } 320 321 // The config is valid if the top-level data-dir is set or if both 322 // alloc-dir and state-dir are set. 323 if config.Client.Enabled && config.DataDir == "" { 324 if config.Client.AllocDir == "" || config.Client.StateDir == "" || config.PluginDir == "" { 325 c.Ui.Error("Must specify the state, alloc dir, and plugin dir if data-dir is omitted.") 326 return nil 327 } 328 } 329 330 // Check the bootstrap flags 331 if config.Server.BootstrapExpect > 0 && !config.Server.Enabled { 332 c.Ui.Error("Bootstrap requires server mode to be enabled") 333 return nil 334 } 335 if config.Server.BootstrapExpect == 1 { 336 c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.") 337 } 338 339 return config 340 } 341 342 // setupLoggers is used to setup the logGate, logWriter, and our logOutput 343 func (c *Command) setupLoggers(config *Config) (*gatedwriter.Writer, *logWriter, io.Writer) { 344 // Setup logging. First create the gated log writer, which will 345 // store logs until we're ready to show them. Then create the level 346 // filter, filtering logs of the specified level. 347 logGate := &gatedwriter.Writer{ 348 Writer: &cli.UiWriter{Ui: c.Ui}, 349 } 350 351 c.logFilter = LevelFilter() 352 c.logFilter.MinLevel = logutils.LogLevel(strings.ToUpper(config.LogLevel)) 353 c.logFilter.Writer = logGate 354 if !ValidateLevelFilter(c.logFilter.MinLevel, c.logFilter) { 355 c.Ui.Error(fmt.Sprintf( 356 "Invalid log level: %s. Valid log levels are: %v", 357 c.logFilter.MinLevel, c.logFilter.Levels)) 358 return nil, nil, nil 359 } 360 361 // Check if syslog is enabled 362 var syslog io.Writer 363 if config.EnableSyslog { 364 l, err := gsyslog.NewLogger(gsyslog.LOG_NOTICE, config.SyslogFacility, "nomad") 365 if err != nil { 366 c.Ui.Error(fmt.Sprintf("Syslog setup failed: %v", err)) 367 return nil, nil, nil 368 } 369 syslog = &SyslogWrapper{l, c.logFilter} 370 } 371 372 // Create a log writer, and wrap a logOutput around it 373 logWriter := NewLogWriter(512) 374 var logOutput io.Writer 375 if syslog != nil { 376 logOutput = io.MultiWriter(c.logFilter, logWriter, syslog) 377 } else { 378 logOutput = io.MultiWriter(c.logFilter, logWriter) 379 } 380 c.logOutput = logOutput 381 log.SetOutput(logOutput) 382 return logGate, logWriter, logOutput 383 } 384 385 // setupAgent is used to start the agent and various interfaces 386 func (c *Command) setupAgent(config *Config, logOutput io.Writer, inmem *metrics.InmemSink) error { 387 c.Ui.Output("Starting Nomad agent...") 388 agent, err := NewAgent(config, logOutput, inmem) 389 if err != nil { 390 c.Ui.Error(fmt.Sprintf("Error starting agent: %s", err)) 391 return err 392 } 393 c.agent = agent 394 395 // Setup the HTTP server 396 http, err := NewHTTPServer(agent, config) 397 if err != nil { 398 agent.Shutdown() 399 c.Ui.Error(fmt.Sprintf("Error starting http server: %s", err)) 400 return err 401 } 402 c.httpServer = http 403 404 // If DisableUpdateCheck is not enabled, set up update checking 405 // (DisableUpdateCheck is false by default) 406 if config.DisableUpdateCheck != nil && !*config.DisableUpdateCheck { 407 version := config.Version.Version 408 if config.Version.VersionPrerelease != "" { 409 version += fmt.Sprintf("-%s", config.Version.VersionPrerelease) 410 } 411 updateParams := &checkpoint.CheckParams{ 412 Product: "nomad", 413 Version: version, 414 } 415 if !config.DisableAnonymousSignature { 416 updateParams.SignatureFile = filepath.Join(config.DataDir, "checkpoint-signature") 417 } 418 419 // Schedule a periodic check with expected interval of 24 hours 420 checkpoint.CheckInterval(updateParams, 24*time.Hour, c.checkpointResults) 421 422 // Do an immediate check within the next 30 seconds 423 go func() { 424 time.Sleep(lib.RandomStagger(30 * time.Second)) 425 c.checkpointResults(checkpoint.Check(updateParams)) 426 }() 427 } 428 429 return nil 430 } 431 432 // checkpointResults is used to handler periodic results from our update checker 433 func (c *Command) checkpointResults(results *checkpoint.CheckResponse, err error) { 434 if err != nil { 435 c.Ui.Error(fmt.Sprintf("Failed to check for updates: %v", err)) 436 return 437 } 438 if results.Outdated { 439 c.Ui.Error(fmt.Sprintf("Newer Nomad version available: %s (currently running: %s)", results.CurrentVersion, c.Version.VersionNumber())) 440 } 441 for _, alert := range results.Alerts { 442 switch alert.Level { 443 case "info": 444 c.Ui.Info(fmt.Sprintf("Bulletin [%s]: %s (%s)", alert.Level, alert.Message, alert.URL)) 445 default: 446 c.Ui.Error(fmt.Sprintf("Bulletin [%s]: %s (%s)", alert.Level, alert.Message, alert.URL)) 447 } 448 } 449 } 450 451 func (c *Command) AutocompleteFlags() complete.Flags { 452 configFilePredictor := complete.PredictOr( 453 complete.PredictFiles("*.json"), 454 complete.PredictFiles("*.hcl")) 455 456 return map[string]complete.Predictor{ 457 "-config": configFilePredictor, 458 } 459 } 460 461 func (c *Command) AutocompleteArgs() complete.Predictor { 462 return nil 463 } 464 465 func (c *Command) Run(args []string) int { 466 c.Ui = &cli.PrefixedUi{ 467 OutputPrefix: "==> ", 468 InfoPrefix: " ", 469 ErrorPrefix: "==> ", 470 Ui: c.Ui, 471 } 472 473 // Parse our configs 474 c.args = args 475 config := c.readConfig() 476 if config == nil { 477 return 1 478 } 479 480 // Setup the log outputs 481 logGate, _, logOutput := c.setupLoggers(config) 482 if logGate == nil { 483 return 1 484 } 485 486 // Log config files 487 if len(config.Files) > 0 { 488 c.Ui.Output(fmt.Sprintf("Loaded configuration from %s", strings.Join(config.Files, ", "))) 489 } else { 490 c.Ui.Output("No configuration files loaded") 491 } 492 493 // Initialize the telemetry 494 inmem, err := c.setupTelemetry(config) 495 if err != nil { 496 c.Ui.Error(fmt.Sprintf("Error initializing telemetry: %s", err)) 497 return 1 498 } 499 500 // Create the agent 501 if err := c.setupAgent(config, logOutput, inmem); err != nil { 502 logGate.Flush() 503 return 1 504 } 505 defer c.agent.Shutdown() 506 507 // Shutdown the HTTP server at the end 508 defer func() { 509 if c.httpServer != nil { 510 c.httpServer.Shutdown() 511 } 512 }() 513 514 // Join startup nodes if specified 515 if err := c.startupJoin(config); err != nil { 516 c.Ui.Error(err.Error()) 517 return 1 518 } 519 520 // Compile agent information for output later 521 info := make(map[string]string) 522 info["version"] = config.Version.VersionNumber() 523 info["client"] = strconv.FormatBool(config.Client.Enabled) 524 info["log level"] = config.LogLevel 525 info["server"] = strconv.FormatBool(config.Server.Enabled) 526 info["region"] = fmt.Sprintf("%s (DC: %s)", config.Region, config.Datacenter) 527 info["bind addrs"] = c.getBindAddrSynopsis() 528 info["advertise addrs"] = c.getAdvertiseAddrSynopsis() 529 530 // Sort the keys for output 531 infoKeys := make([]string, 0, len(info)) 532 for key := range info { 533 infoKeys = append(infoKeys, key) 534 } 535 sort.Strings(infoKeys) 536 537 // Agent configuration output 538 padding := 18 539 c.Ui.Output("Nomad agent configuration:\n") 540 for _, k := range infoKeys { 541 c.Ui.Info(fmt.Sprintf( 542 "%s%s: %s", 543 strings.Repeat(" ", padding-len(k)), 544 strings.Title(k), 545 info[k])) 546 } 547 c.Ui.Output("") 548 549 // Output the header that the server has started 550 c.Ui.Output("Nomad agent started! Log data will stream in below:\n") 551 552 // Enable log streaming 553 logGate.Flush() 554 555 // Start retry join process 556 if err := c.handleRetryJoin(config); err != nil { 557 c.Ui.Error(err.Error()) 558 return 1 559 } 560 561 // Wait for exit 562 return c.handleSignals() 563 } 564 565 // handleRetryJoin is used to start retry joining if it is configured. 566 func (c *Command) handleRetryJoin(config *Config) error { 567 c.retryJoinErrCh = make(chan struct{}) 568 569 if config.Server.Enabled && len(config.Server.RetryJoin) != 0 { 570 joiner := retryJoiner{ 571 discover: &discover.Discover{}, 572 errCh: c.retryJoinErrCh, 573 logger: c.agent.logger, 574 serverJoin: c.agent.server.Join, 575 serverEnabled: true, 576 } 577 578 if err := joiner.Validate(config); err != nil { 579 return err 580 } 581 582 // Remove the duplicate fields 583 if len(config.Server.RetryJoin) != 0 { 584 config.Server.ServerJoin.RetryJoin = config.Server.RetryJoin 585 config.Server.RetryJoin = nil 586 } 587 if config.Server.RetryMaxAttempts != 0 { 588 config.Server.ServerJoin.RetryMaxAttempts = config.Server.RetryMaxAttempts 589 config.Server.RetryMaxAttempts = 0 590 } 591 if config.Server.RetryInterval != 0 { 592 config.Server.ServerJoin.RetryInterval = config.Server.RetryInterval 593 config.Server.RetryInterval = 0 594 } 595 596 c.agent.logger.Printf("[WARN] agent: Using deprecated retry_join fields. Upgrade configuration to use server_join") 597 } 598 599 if config.Server.Enabled && 600 config.Server.ServerJoin != nil && 601 len(config.Server.ServerJoin.RetryJoin) != 0 { 602 603 joiner := retryJoiner{ 604 discover: &discover.Discover{}, 605 errCh: c.retryJoinErrCh, 606 logger: c.agent.logger, 607 serverJoin: c.agent.server.Join, 608 serverEnabled: true, 609 } 610 611 if err := joiner.Validate(config); err != nil { 612 return err 613 } 614 615 go joiner.RetryJoin(config.Server.ServerJoin) 616 } 617 618 if config.Client.Enabled && 619 config.Client.ServerJoin != nil && 620 len(config.Client.ServerJoin.RetryJoin) != 0 { 621 joiner := retryJoiner{ 622 discover: &discover.Discover{}, 623 errCh: c.retryJoinErrCh, 624 logger: c.agent.logger, 625 clientJoin: c.agent.client.SetServers, 626 clientEnabled: true, 627 } 628 629 if err := joiner.Validate(config); err != nil { 630 return err 631 } 632 633 go joiner.RetryJoin(config.Client.ServerJoin) 634 } 635 636 return nil 637 } 638 639 // handleSignals blocks until we get an exit-causing signal 640 func (c *Command) handleSignals() int { 641 signalCh := make(chan os.Signal, 4) 642 signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGPIPE) 643 644 // Wait for a signal 645 WAIT: 646 var sig os.Signal 647 select { 648 case s := <-signalCh: 649 sig = s 650 case <-c.ShutdownCh: 651 sig = os.Interrupt 652 case <-c.retryJoinErrCh: 653 return 1 654 } 655 656 // Skip any SIGPIPE signal and don't try to log it (See issues #1798, #3554) 657 if sig == syscall.SIGPIPE { 658 goto WAIT 659 } 660 661 c.Ui.Output(fmt.Sprintf("Caught signal: %v", sig)) 662 663 // Check if this is a SIGHUP 664 if sig == syscall.SIGHUP { 665 c.handleReload() 666 goto WAIT 667 } 668 669 // Check if we should do a graceful leave 670 graceful := false 671 if sig == os.Interrupt && c.agent.GetConfig().LeaveOnInt { 672 graceful = true 673 } else if sig == syscall.SIGTERM && c.agent.GetConfig().LeaveOnTerm { 674 graceful = true 675 } 676 677 // Bail fast if not doing a graceful leave 678 if !graceful { 679 return 1 680 } 681 682 // Attempt a graceful leave 683 gracefulCh := make(chan struct{}) 684 c.Ui.Output("Gracefully shutting down agent...") 685 go func() { 686 if err := c.agent.Leave(); err != nil { 687 c.Ui.Error(fmt.Sprintf("Error: %s", err)) 688 return 689 } 690 close(gracefulCh) 691 }() 692 693 // Wait for leave or another signal 694 select { 695 case <-signalCh: 696 return 1 697 case <-time.After(gracefulTimeout): 698 return 1 699 case <-gracefulCh: 700 return 0 701 } 702 } 703 704 // reloadHTTPServer shuts down the existing HTTP server and restarts it. This 705 // is helpful when reloading the agent configuration. 706 func (c *Command) reloadHTTPServer() error { 707 c.agent.logger.Println("[INFO] agent: Reloading HTTP server with new TLS configuration") 708 709 c.httpServer.Shutdown() 710 711 http, err := NewHTTPServer(c.agent, c.agent.config) 712 if err != nil { 713 return err 714 } 715 c.httpServer = http 716 717 return nil 718 } 719 720 // handleReload is invoked when we should reload our configs, e.g. SIGHUP 721 func (c *Command) handleReload() { 722 c.Ui.Output("Reloading configuration...") 723 newConf := c.readConfig() 724 if newConf == nil { 725 c.Ui.Error(fmt.Sprintf("Failed to reload configs")) 726 return 727 } 728 729 // Change the log level 730 minLevel := logutils.LogLevel(strings.ToUpper(newConf.LogLevel)) 731 if ValidateLevelFilter(minLevel, c.logFilter) { 732 c.logFilter.SetMinLevel(minLevel) 733 } else { 734 c.Ui.Error(fmt.Sprintf( 735 "Invalid log level: %s. Valid log levels are: %v", 736 minLevel, c.logFilter.Levels)) 737 738 // Keep the current log level 739 newConf.LogLevel = c.agent.GetConfig().LogLevel 740 } 741 742 shouldReloadAgent, shouldReloadHTTP := c.agent.ShouldReload(newConf) 743 if shouldReloadAgent { 744 c.agent.logger.Printf("[DEBUG] agent: starting reload of agent config") 745 err := c.agent.Reload(newConf) 746 if err != nil { 747 c.agent.logger.Printf("[ERR] agent: failed to reload the config: %v", err) 748 return 749 } 750 } 751 752 if s := c.agent.Server(); s != nil { 753 c.agent.logger.Printf("[DEBUG] agent: starting reload of server config") 754 sconf, err := convertServerConfig(newConf, c.logOutput) 755 if err != nil { 756 c.agent.logger.Printf("[ERR] agent: failed to convert server config: %v", err) 757 return 758 } else { 759 if err := s.Reload(sconf); err != nil { 760 c.agent.logger.Printf("[ERR] agent: reloading server config failed: %v", err) 761 return 762 } 763 } 764 } 765 766 if s := c.agent.Client(); s != nil { 767 clientConfig, err := c.agent.clientConfig() 768 c.agent.logger.Printf("[DEBUG] agent: starting reload of client config") 769 if err != nil { 770 c.agent.logger.Printf("[ERR] agent: reloading client config failed: %v", err) 771 return 772 } 773 if err := c.agent.Client().Reload(clientConfig); err != nil { 774 c.agent.logger.Printf("[ERR] agent: reloading client config failed: %v", err) 775 return 776 } 777 } 778 779 // reload HTTP server after we have reloaded both client and server, in case 780 // we error in either of the above cases. For example, reloading the http 781 // server to a TLS connection could succeed, while reloading the server's rpc 782 // connections could fail. 783 if shouldReloadHTTP { 784 err := c.reloadHTTPServer() 785 if err != nil { 786 c.agent.logger.Printf("[ERR] http: failed to reload the config: %v", err) 787 return 788 } 789 } 790 } 791 792 // setupTelemetry is used ot setup the telemetry sub-systems 793 func (c *Command) setupTelemetry(config *Config) (*metrics.InmemSink, error) { 794 /* Setup telemetry 795 Aggregate on 10 second intervals for 1 minute. Expose the 796 metrics over stderr when there is a SIGUSR1 received. 797 */ 798 inm := metrics.NewInmemSink(10*time.Second, time.Minute) 799 metrics.DefaultInmemSignal(inm) 800 801 var telConfig *Telemetry 802 if config.Telemetry == nil { 803 telConfig = &Telemetry{} 804 } else { 805 telConfig = config.Telemetry 806 } 807 808 metricsConf := metrics.DefaultConfig("nomad") 809 metricsConf.EnableHostname = !telConfig.DisableHostname 810 811 // Prefer the hostname as a label. 812 metricsConf.EnableHostnameLabel = !telConfig.DisableHostname && 813 !telConfig.DisableTaggedMetrics && !telConfig.BackwardsCompatibleMetrics 814 815 if telConfig.UseNodeName { 816 metricsConf.HostName = config.NodeName 817 metricsConf.EnableHostname = true 818 } 819 820 // Configure the statsite sink 821 var fanout metrics.FanoutSink 822 if telConfig.StatsiteAddr != "" { 823 sink, err := metrics.NewStatsiteSink(telConfig.StatsiteAddr) 824 if err != nil { 825 return inm, err 826 } 827 fanout = append(fanout, sink) 828 } 829 830 // Configure the statsd sink 831 if telConfig.StatsdAddr != "" { 832 sink, err := metrics.NewStatsdSink(telConfig.StatsdAddr) 833 if err != nil { 834 return inm, err 835 } 836 fanout = append(fanout, sink) 837 } 838 839 // Configure the prometheus sink 840 if telConfig.PrometheusMetrics { 841 promSink, err := prometheus.NewPrometheusSink() 842 if err != nil { 843 return inm, err 844 } 845 fanout = append(fanout, promSink) 846 } 847 848 // Configure the datadog sink 849 if telConfig.DataDogAddr != "" { 850 sink, err := datadog.NewDogStatsdSink(telConfig.DataDogAddr, config.NodeName) 851 if err != nil { 852 return inm, err 853 } 854 sink.SetTags(telConfig.DataDogTags) 855 fanout = append(fanout, sink) 856 } 857 858 // Configure the Circonus sink 859 if telConfig.CirconusAPIToken != "" || telConfig.CirconusCheckSubmissionURL != "" { 860 cfg := &circonus.Config{} 861 cfg.Interval = telConfig.CirconusSubmissionInterval 862 cfg.CheckManager.API.TokenKey = telConfig.CirconusAPIToken 863 cfg.CheckManager.API.TokenApp = telConfig.CirconusAPIApp 864 cfg.CheckManager.API.URL = telConfig.CirconusAPIURL 865 cfg.CheckManager.Check.SubmissionURL = telConfig.CirconusCheckSubmissionURL 866 cfg.CheckManager.Check.ID = telConfig.CirconusCheckID 867 cfg.CheckManager.Check.ForceMetricActivation = telConfig.CirconusCheckForceMetricActivation 868 cfg.CheckManager.Check.InstanceID = telConfig.CirconusCheckInstanceID 869 cfg.CheckManager.Check.SearchTag = telConfig.CirconusCheckSearchTag 870 cfg.CheckManager.Check.Tags = telConfig.CirconusCheckTags 871 cfg.CheckManager.Check.DisplayName = telConfig.CirconusCheckDisplayName 872 cfg.CheckManager.Broker.ID = telConfig.CirconusBrokerID 873 cfg.CheckManager.Broker.SelectTag = telConfig.CirconusBrokerSelectTag 874 875 if cfg.CheckManager.Check.DisplayName == "" { 876 cfg.CheckManager.Check.DisplayName = "Nomad" 877 } 878 879 if cfg.CheckManager.API.TokenApp == "" { 880 cfg.CheckManager.API.TokenApp = "nomad" 881 } 882 883 if cfg.CheckManager.Check.SearchTag == "" { 884 cfg.CheckManager.Check.SearchTag = "service:nomad" 885 } 886 887 sink, err := circonus.NewCirconusSink(cfg) 888 if err != nil { 889 return inm, err 890 } 891 sink.Start() 892 fanout = append(fanout, sink) 893 } 894 895 // Initialize the global sink 896 if len(fanout) > 0 { 897 fanout = append(fanout, inm) 898 metrics.NewGlobal(metricsConf, fanout) 899 } else { 900 metricsConf.EnableHostname = false 901 metrics.NewGlobal(metricsConf, inm) 902 } 903 return inm, nil 904 } 905 906 func (c *Command) startupJoin(config *Config) error { 907 // Nothing to do 908 if !config.Server.Enabled { 909 return nil 910 } 911 912 // Validate both old and new aren't being set 913 old := len(config.Server.StartJoin) 914 var new int 915 if config.Server.ServerJoin != nil { 916 new = len(config.Server.ServerJoin.StartJoin) 917 } 918 if old != 0 && new != 0 { 919 return fmt.Errorf("server_join and start_join cannot both be defined; prefer setting the server_join stanza") 920 } 921 922 // Nothing to do 923 if old+new == 0 { 924 return nil 925 } 926 927 // Combine the lists and join 928 joining := config.Server.StartJoin 929 if new != 0 { 930 joining = append(joining, config.Server.ServerJoin.StartJoin...) 931 } 932 933 c.Ui.Output("Joining cluster...") 934 n, err := c.agent.server.Join(joining) 935 if err != nil { 936 return err 937 } 938 939 c.Ui.Output(fmt.Sprintf("Join completed. Synced with %d initial agents", n)) 940 return nil 941 } 942 943 // getBindAddrSynopsis returns a string that describes the addresses the agent 944 // is bound to. 945 func (c *Command) getBindAddrSynopsis() string { 946 if c == nil || c.agent == nil || c.agent.config == nil || c.agent.config.normalizedAddrs == nil { 947 return "" 948 } 949 950 b := new(strings.Builder) 951 fmt.Fprintf(b, "HTTP: %s", c.agent.config.normalizedAddrs.HTTP) 952 953 if c.agent.server != nil { 954 if c.agent.config.normalizedAddrs.RPC != "" { 955 fmt.Fprintf(b, "; RPC: %s", c.agent.config.normalizedAddrs.RPC) 956 } 957 if c.agent.config.normalizedAddrs.Serf != "" { 958 fmt.Fprintf(b, "; Serf: %s", c.agent.config.normalizedAddrs.Serf) 959 } 960 } 961 962 return b.String() 963 } 964 965 // getAdvertiseAddrSynopsis returns a string that describes the addresses the agent 966 // is advertising. 967 func (c *Command) getAdvertiseAddrSynopsis() string { 968 if c == nil || c.agent == nil || c.agent.config == nil || c.agent.config.AdvertiseAddrs == nil { 969 return "" 970 } 971 972 b := new(strings.Builder) 973 fmt.Fprintf(b, "HTTP: %s", c.agent.config.AdvertiseAddrs.HTTP) 974 975 if c.agent.server != nil { 976 if c.agent.config.AdvertiseAddrs.RPC != "" { 977 fmt.Fprintf(b, "; RPC: %s", c.agent.config.AdvertiseAddrs.RPC) 978 } 979 if c.agent.config.AdvertiseAddrs.Serf != "" { 980 fmt.Fprintf(b, "; Serf: %s", c.agent.config.AdvertiseAddrs.Serf) 981 } 982 } 983 984 return b.String() 985 } 986 987 func (c *Command) Synopsis() string { 988 return "Runs a Nomad agent" 989 } 990 991 func (c *Command) Help() string { 992 helpText := ` 993 Usage: nomad agent [options] 994 995 Starts the Nomad agent and runs until an interrupt is received. 996 The agent may be a client and/or server. 997 998 The Nomad agent's configuration primarily comes from the config 999 files used, but a subset of the options may also be passed directly 1000 as CLI arguments, listed below. 1001 1002 General Options (clients and servers): 1003 1004 -bind=<addr> 1005 The address the agent will bind to for all of its various network 1006 services. The individual services that run bind to individual 1007 ports on this address. Defaults to the loopback 127.0.0.1. 1008 1009 -config=<path> 1010 The path to either a single config file or a directory of config 1011 files to use for configuring the Nomad agent. This option may be 1012 specified multiple times. If multiple config files are used, the 1013 values from each will be merged together. During merging, values 1014 from files found later in the list are merged over values from 1015 previously parsed files. 1016 1017 -data-dir=<path> 1018 The data directory used to store state and other persistent data. 1019 On client machines this is used to house allocation data such as 1020 downloaded artifacts used by drivers. On server nodes, the data 1021 dir is also used to store the replicated log. 1022 1023 -plugin-dir=<path> 1024 The plugin directory is used to discover Nomad plugins. If not specified, 1025 the plugin directory defaults to be that of <data-dir>/plugins/. 1026 1027 -dc=<datacenter> 1028 The name of the datacenter this Nomad agent is a member of. By 1029 default this is set to "dc1". 1030 1031 -log-level=<level> 1032 Specify the verbosity level of Nomad's logs. Valid values include 1033 DEBUG, INFO, and WARN, in decreasing order of verbosity. The 1034 default is INFO. 1035 1036 -node=<name> 1037 The name of the local agent. This name is used to identify the node 1038 in the cluster. The name must be unique per region. The default is 1039 the current hostname of the machine. 1040 1041 -region=<region> 1042 Name of the region the Nomad agent will be a member of. By default 1043 this value is set to "global". 1044 1045 -dev 1046 Start the agent in development mode. This enables a pre-configured 1047 dual-role agent (client + server) which is useful for developing 1048 or testing Nomad. No other configuration is required to start the 1049 agent in this mode. 1050 1051 Server Options: 1052 1053 -server 1054 Enable server mode for the agent. Agents in server mode are 1055 clustered together and handle the additional responsibility of 1056 leader election, data replication, and scheduling work onto 1057 eligible client nodes. 1058 1059 -bootstrap-expect=<num> 1060 Configures the expected number of servers nodes to wait for before 1061 bootstrapping the cluster. Once <num> servers have joined each other, 1062 Nomad initiates the bootstrap process. 1063 1064 -encrypt=<key> 1065 Provides the gossip encryption key 1066 1067 -join=<address> 1068 Address of an agent to join at start time. Can be specified 1069 multiple times. 1070 1071 -raft-protocol=<num> 1072 The Raft protocol version to use. Used for enabling certain Autopilot 1073 features. Defaults to 2. 1074 1075 -retry-join=<address> 1076 Address of an agent to join at start time with retries enabled. 1077 Can be specified multiple times. 1078 1079 -retry-max=<num> 1080 Maximum number of join attempts. Defaults to 0, which will retry 1081 indefinitely. 1082 1083 -retry-interval=<dur> 1084 Time to wait between join attempts. 1085 1086 -rejoin 1087 Ignore a previous leave and attempts to rejoin the cluster. 1088 1089 Client Options: 1090 1091 -client 1092 Enable client mode for the agent. Client mode enables a given node to be 1093 evaluated for allocations. If client mode is not enabled, no work will be 1094 scheduled to the agent. 1095 1096 -state-dir 1097 The directory used to store state and other persistent data. If not 1098 specified a subdirectory under the "-data-dir" will be used. 1099 1100 -alloc-dir 1101 The directory used to store allocation data such as downloaded artifacts as 1102 well as data produced by tasks. If not specified, a subdirectory under the 1103 "-data-dir" will be used. 1104 1105 -servers 1106 A list of known server addresses to connect to given as "host:port" and 1107 delimited by commas. 1108 1109 -node-class 1110 Mark this node as a member of a node-class. This can be used to label 1111 similar node types. 1112 1113 -meta 1114 User specified metadata to associated with the node. Each instance of -meta 1115 parses a single KEY=VALUE pair. Repeat the meta flag for each key/value pair 1116 to be added. 1117 1118 -network-interface 1119 Forces the network fingerprinter to use the specified network interface. 1120 1121 -network-speed 1122 The default speed for network interfaces in MBits if the link speed can not 1123 be determined dynamically. 1124 1125 ACL Options: 1126 1127 -acl-enabled 1128 Specifies whether the agent should enable ACLs. 1129 1130 -acl-replication-token 1131 The replication token for servers to use when replicating from the 1132 authoritative region. The token must be a valid management token from the 1133 authoritative region. 1134 1135 Consul Options: 1136 1137 -consul-address=<addr> 1138 Specifies the address to the local Consul agent, given in the format host:port. 1139 Supports Unix sockets with the format: unix:///tmp/consul/consul.sock 1140 1141 -consul-auth=<auth> 1142 Specifies the HTTP Basic Authentication information to use for access to the 1143 Consul Agent, given in the format username:password. 1144 1145 -consul-auto-advertise 1146 Specifies if Nomad should advertise its services in Consul. The services 1147 are named according to server_service_name and client_service_name. Nomad 1148 servers and clients advertise their respective services, each tagged 1149 appropriately with either http or rpc tag. Nomad servers also advertise a 1150 serf tagged service. 1151 1152 -consul-ca-file=<path> 1153 Specifies an optional path to the CA certificate used for Consul communication. 1154 This defaults to the system bundle if unspecified. 1155 1156 -consul-cert-file=<path> 1157 Specifies the path to the certificate used for Consul communication. If this 1158 is set then you need to also set key_file. 1159 1160 -consul-checks-use-advertise 1161 Specifies if Consul heath checks should bind to the advertise address. By 1162 default, this is the bind address. 1163 1164 -consul-client-auto-join 1165 Specifies if the Nomad clients should automatically discover servers in the 1166 same region by searching for the Consul service name defined in the 1167 server_service_name option. 1168 1169 -consul-client-service-name=<name> 1170 Specifies the name of the service in Consul for the Nomad clients. 1171 1172 -consul-client-http-check-name=<name> 1173 Specifies the HTTP health check name in Consul for the Nomad clients. 1174 1175 -consul-key-file=<path> 1176 Specifies the path to the private key used for Consul communication. If this 1177 is set then you need to also set cert_file. 1178 1179 -consul-server-service-name=<name> 1180 Specifies the name of the service in Consul for the Nomad servers. 1181 1182 -consul-server-http-check-name=<name> 1183 Specifies the HTTP health check name in Consul for the Nomad servers. 1184 1185 -consul-server-serf-check-name=<name> 1186 Specifies the Serf health check name in Consul for the Nomad servers. 1187 1188 -consul-server-rpc-check-name=<name> 1189 Specifies the RPC health check name in Consul for the Nomad servers. 1190 1191 -consul-server-auto-join 1192 Specifies if the Nomad servers should automatically discover and join other 1193 Nomad servers by searching for the Consul service name defined in the 1194 server_service_name option. This search only happens if the server does not 1195 have a leader. 1196 1197 -consul-ssl 1198 Specifies if the transport scheme should use HTTPS to communicate with the 1199 Consul agent. 1200 1201 -consul-token=<token> 1202 Specifies the token used to provide a per-request ACL token. 1203 1204 -consul-verify-ssl 1205 Specifies if SSL peer verification should be used when communicating to the 1206 Consul API client over HTTPS. 1207 1208 Vault Options: 1209 1210 -vault-enabled 1211 Whether to enable or disable Vault integration. 1212 1213 -vault-address=<addr> 1214 The address to communicate with Vault. This should be provided with the http:// 1215 or https:// prefix. 1216 1217 -vault-token=<token> 1218 The Vault token used to derive tokens from Vault on behalf of clients. 1219 This only needs to be set on Servers. Overrides the Vault token read from 1220 the VAULT_TOKEN environment variable. 1221 1222 -vault-create-from-role=<role> 1223 The role name to create tokens for tasks from. 1224 1225 -vault-allow-unauthenticated 1226 Whether to allow jobs to be submitted that request Vault Tokens but do not 1227 authentication. The flag only applies to Servers. 1228 1229 -vault-ca-file=<path> 1230 The path to a PEM-encoded CA cert file to use to verify the Vault server SSL 1231 certificate. 1232 1233 -vault-ca-path=<path> 1234 The path to a directory of PEM-encoded CA cert files to verify the Vault server 1235 certificate. 1236 1237 -vault-cert-file=<token> 1238 The path to the certificate for Vault communication. 1239 1240 -vault-key-file=<addr> 1241 The path to the private key for Vault communication. 1242 1243 -vault-tls-skip-verify=<token> 1244 Enables or disables SSL certificate verification. 1245 1246 -vault-tls-server-name=<token> 1247 Used to set the SNI host when connecting over TLS. 1248 ` 1249 return strings.TrimSpace(helpText) 1250 }