github.com/djenriquez/nomad-1@v0.8.1/command/agent/command.go (about) 1 package agent 2 3 import ( 4 "flag" 5 "fmt" 6 "io" 7 "log" 8 "os" 9 "os/signal" 10 "path/filepath" 11 "reflect" 12 "sort" 13 "strconv" 14 "strings" 15 "syscall" 16 "time" 17 18 metrics "github.com/armon/go-metrics" 19 "github.com/armon/go-metrics/circonus" 20 "github.com/armon/go-metrics/datadog" 21 "github.com/armon/go-metrics/prometheus" 22 "github.com/hashicorp/consul/lib" 23 checkpoint "github.com/hashicorp/go-checkpoint" 24 gsyslog "github.com/hashicorp/go-syslog" 25 "github.com/hashicorp/logutils" 26 flaghelper "github.com/hashicorp/nomad/helper/flag-helpers" 27 gatedwriter "github.com/hashicorp/nomad/helper/gated-writer" 28 "github.com/hashicorp/nomad/nomad/structs/config" 29 "github.com/hashicorp/nomad/version" 30 "github.com/mitchellh/cli" 31 "github.com/posener/complete" 32 ) 33 34 // gracefulTimeout controls how long we wait before forcefully terminating 35 const gracefulTimeout = 5 * time.Second 36 37 // Command is a Command implementation that runs a Nomad agent. 38 // The command will not end unless a shutdown message is sent on the 39 // ShutdownCh. If two messages are sent on the ShutdownCh it will forcibly 40 // exit. 41 type Command struct { 42 Version *version.VersionInfo 43 Ui cli.Ui 44 ShutdownCh <-chan struct{} 45 46 args []string 47 agent *Agent 48 httpServer *HTTPServer 49 logFilter *logutils.LevelFilter 50 logOutput io.Writer 51 retryJoinErrCh chan struct{} 52 } 53 54 func (c *Command) readConfig() *Config { 55 var dev bool 56 var configPath []string 57 var servers string 58 var meta []string 59 60 // Make a new, empty config. 61 cmdConfig := &Config{ 62 Client: &ClientConfig{}, 63 Consul: &config.ConsulConfig{}, 64 Ports: &Ports{}, 65 Server: &ServerConfig{}, 66 Vault: &config.VaultConfig{}, 67 ACL: &ACLConfig{}, 68 } 69 70 flags := flag.NewFlagSet("agent", flag.ContinueOnError) 71 flags.Usage = func() { c.Ui.Error(c.Help()) } 72 73 // Role options 74 flags.BoolVar(&dev, "dev", false, "") 75 flags.BoolVar(&cmdConfig.Server.Enabled, "server", false, "") 76 flags.BoolVar(&cmdConfig.Client.Enabled, "client", false, "") 77 78 // Server-only options 79 flags.IntVar(&cmdConfig.Server.BootstrapExpect, "bootstrap-expect", 0, "") 80 flags.BoolVar(&cmdConfig.Server.RejoinAfterLeave, "rejoin", false, "") 81 flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.StartJoin), "join", "") 82 flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.RetryJoin), "retry-join", "") 83 flags.IntVar(&cmdConfig.Server.RetryMaxAttempts, "retry-max", 0, "") 84 flags.StringVar(&cmdConfig.Server.RetryInterval, "retry-interval", "", "") 85 flags.StringVar(&cmdConfig.Server.EncryptKey, "encrypt", "", "gossip encryption key") 86 flags.IntVar(&cmdConfig.Server.RaftProtocol, "raft-protocol", 0, "") 87 88 // Client-only options 89 flags.StringVar(&cmdConfig.Client.StateDir, "state-dir", "", "") 90 flags.StringVar(&cmdConfig.Client.AllocDir, "alloc-dir", "", "") 91 flags.StringVar(&cmdConfig.Client.NodeClass, "node-class", "", "") 92 flags.StringVar(&servers, "servers", "", "") 93 flags.Var((*flaghelper.StringFlag)(&meta), "meta", "") 94 flags.StringVar(&cmdConfig.Client.NetworkInterface, "network-interface", "", "") 95 flags.IntVar(&cmdConfig.Client.NetworkSpeed, "network-speed", 0, "") 96 97 // General options 98 flags.Var((*flaghelper.StringFlag)(&configPath), "config", "config") 99 flags.StringVar(&cmdConfig.BindAddr, "bind", "", "") 100 flags.StringVar(&cmdConfig.Region, "region", "", "") 101 flags.StringVar(&cmdConfig.DataDir, "data-dir", "", "") 102 flags.StringVar(&cmdConfig.Datacenter, "dc", "", "") 103 flags.StringVar(&cmdConfig.LogLevel, "log-level", "", "") 104 flags.StringVar(&cmdConfig.NodeName, "node", "", "") 105 106 // Consul options 107 flags.StringVar(&cmdConfig.Consul.Auth, "consul-auth", "", "") 108 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 109 cmdConfig.Consul.AutoAdvertise = &b 110 return nil 111 }), "consul-auto-advertise", "") 112 flags.StringVar(&cmdConfig.Consul.CAFile, "consul-ca-file", "", "") 113 flags.StringVar(&cmdConfig.Consul.CertFile, "consul-cert-file", "", "") 114 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 115 cmdConfig.Consul.ChecksUseAdvertise = &b 116 return nil 117 }), "consul-checks-use-advertise", "") 118 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 119 cmdConfig.Consul.ClientAutoJoin = &b 120 return nil 121 }), "consul-client-auto-join", "") 122 flags.StringVar(&cmdConfig.Consul.ClientServiceName, "consul-client-service-name", "", "") 123 flags.StringVar(&cmdConfig.Consul.ClientHTTPCheckName, "consul-client-http-check-name", "", "") 124 flags.StringVar(&cmdConfig.Consul.KeyFile, "consul-key-file", "", "") 125 flags.StringVar(&cmdConfig.Consul.ServerServiceName, "consul-server-service-name", "", "") 126 flags.StringVar(&cmdConfig.Consul.ServerHTTPCheckName, "consul-server-http-check-name", "", "") 127 flags.StringVar(&cmdConfig.Consul.ServerSerfCheckName, "consul-server-serf-check-name", "", "") 128 flags.StringVar(&cmdConfig.Consul.ServerRPCCheckName, "consul-server-rpc-check-name", "", "") 129 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 130 cmdConfig.Consul.ServerAutoJoin = &b 131 return nil 132 }), "consul-server-auto-join", "") 133 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 134 cmdConfig.Consul.EnableSSL = &b 135 return nil 136 }), "consul-ssl", "") 137 flags.StringVar(&cmdConfig.Consul.Token, "consul-token", "", "") 138 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 139 cmdConfig.Consul.VerifySSL = &b 140 return nil 141 }), "consul-verify-ssl", "") 142 flags.StringVar(&cmdConfig.Consul.Addr, "consul-address", "", "") 143 144 // Vault options 145 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 146 cmdConfig.Vault.Enabled = &b 147 return nil 148 }), "vault-enabled", "") 149 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 150 cmdConfig.Vault.AllowUnauthenticated = &b 151 return nil 152 }), "vault-allow-unauthenticated", "") 153 flags.StringVar(&cmdConfig.Vault.Token, "vault-token", "", "") 154 flags.StringVar(&cmdConfig.Vault.Addr, "vault-address", "", "") 155 flags.StringVar(&cmdConfig.Vault.Role, "vault-create-from-role", "", "") 156 flags.StringVar(&cmdConfig.Vault.TLSCaFile, "vault-ca-file", "", "") 157 flags.StringVar(&cmdConfig.Vault.TLSCaPath, "vault-ca-path", "", "") 158 flags.StringVar(&cmdConfig.Vault.TLSCertFile, "vault-cert-file", "", "") 159 flags.StringVar(&cmdConfig.Vault.TLSKeyFile, "vault-key-file", "", "") 160 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 161 cmdConfig.Vault.TLSSkipVerify = &b 162 return nil 163 }), "vault-tls-skip-verify", "") 164 flags.StringVar(&cmdConfig.Vault.TLSServerName, "vault-tls-server-name", "", "") 165 166 // ACL options 167 flags.BoolVar(&cmdConfig.ACL.Enabled, "acl-enabled", false, "") 168 flags.StringVar(&cmdConfig.ACL.ReplicationToken, "acl-replication-token", "", "") 169 170 if err := flags.Parse(c.args); err != nil { 171 return nil 172 } 173 174 // Split the servers. 175 if servers != "" { 176 cmdConfig.Client.Servers = strings.Split(servers, ",") 177 } 178 179 // Parse the meta flags. 180 metaLength := len(meta) 181 if metaLength != 0 { 182 cmdConfig.Client.Meta = make(map[string]string, metaLength) 183 for _, kv := range meta { 184 parts := strings.SplitN(kv, "=", 2) 185 if len(parts) != 2 { 186 c.Ui.Error(fmt.Sprintf("Error parsing Client.Meta value: %v", kv)) 187 return nil 188 } 189 190 cmdConfig.Client.Meta[parts[0]] = parts[1] 191 } 192 } 193 194 // Load the configuration 195 var config *Config 196 if dev { 197 config = DevConfig() 198 } else { 199 config = DefaultConfig() 200 } 201 202 // Merge in the enterprise overlay 203 config.Merge(DefaultEntConfig()) 204 205 for _, path := range configPath { 206 current, err := LoadConfig(path) 207 if err != nil { 208 c.Ui.Error(fmt.Sprintf( 209 "Error loading configuration from %s: %s", path, err)) 210 return nil 211 } 212 213 // The user asked us to load some config here but we didn't find any, 214 // so we'll complain but continue. 215 if current == nil || reflect.DeepEqual(current, &Config{}) { 216 c.Ui.Warn(fmt.Sprintf("No configuration loaded from %s", path)) 217 } 218 219 if config == nil { 220 config = current 221 } else { 222 config = config.Merge(current) 223 } 224 } 225 226 // Ensure the sub-structs at least exist 227 if config.Client == nil { 228 config.Client = &ClientConfig{} 229 } 230 if config.Server == nil { 231 config.Server = &ServerConfig{} 232 } 233 234 // Merge any CLI options over config file options 235 config = config.Merge(cmdConfig) 236 237 // Set the version info 238 config.Version = c.Version 239 240 // Normalize binds, ports, addresses, and advertise 241 if err := config.normalizeAddrs(); err != nil { 242 c.Ui.Error(err.Error()) 243 return nil 244 } 245 246 // Check to see if we should read the Vault token from the environment 247 if config.Vault.Token == "" { 248 if token, ok := os.LookupEnv("VAULT_TOKEN"); ok { 249 config.Vault.Token = token 250 } 251 } 252 253 if dev { 254 // Skip validation for dev mode 255 return config 256 } 257 258 if config.Server.EncryptKey != "" { 259 if _, err := config.Server.EncryptBytes(); err != nil { 260 c.Ui.Error(fmt.Sprintf("Invalid encryption key: %s", err)) 261 return nil 262 } 263 keyfile := filepath.Join(config.DataDir, serfKeyring) 264 if _, err := os.Stat(keyfile); err == nil { 265 c.Ui.Warn("WARNING: keyring exists but -encrypt given, using keyring") 266 } 267 } 268 269 // Parse the RetryInterval. 270 dur, err := time.ParseDuration(config.Server.RetryInterval) 271 if err != nil { 272 c.Ui.Error(fmt.Sprintf("Error parsing retry interval: %s", err)) 273 return nil 274 } 275 config.Server.retryInterval = dur 276 277 // Check that the server is running in at least one mode. 278 if !(config.Server.Enabled || config.Client.Enabled) { 279 c.Ui.Error("Must specify either server, client or dev mode for the agent.") 280 return nil 281 } 282 283 // Verify the paths are absolute. 284 dirs := map[string]string{ 285 "data-dir": config.DataDir, 286 "alloc-dir": config.Client.AllocDir, 287 "state-dir": config.Client.StateDir, 288 } 289 for k, dir := range dirs { 290 if dir == "" { 291 continue 292 } 293 294 if !filepath.IsAbs(dir) { 295 c.Ui.Error(fmt.Sprintf("%s must be given as an absolute path: got %v", k, dir)) 296 return nil 297 } 298 } 299 300 // Ensure that we have the directories we neet to run. 301 if config.Server.Enabled && config.DataDir == "" { 302 c.Ui.Error("Must specify data directory") 303 return nil 304 } 305 306 // The config is valid if the top-level data-dir is set or if both 307 // alloc-dir and state-dir are set. 308 if config.Client.Enabled && config.DataDir == "" { 309 if config.Client.AllocDir == "" || config.Client.StateDir == "" { 310 c.Ui.Error("Must specify both the state and alloc dir if data-dir is omitted.") 311 return nil 312 } 313 } 314 315 // Check the bootstrap flags 316 if config.Server.BootstrapExpect > 0 && !config.Server.Enabled { 317 c.Ui.Error("Bootstrap requires server mode to be enabled") 318 return nil 319 } 320 if config.Server.BootstrapExpect == 1 { 321 c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.") 322 } 323 324 // Set up the TLS configuration properly if we have one. 325 // XXX chelseakomlo: set up a TLSConfig New method which would wrap 326 // constructor-type actions like this. 327 if config.TLSConfig != nil && !config.TLSConfig.IsEmpty() { 328 if err := config.TLSConfig.SetChecksum(); err != nil { 329 c.Ui.Error(fmt.Sprintf("WARNING: Error when parsing TLS configuration: %v", err)) 330 } 331 } 332 333 return config 334 } 335 336 // setupLoggers is used to setup the logGate, logWriter, and our logOutput 337 func (c *Command) setupLoggers(config *Config) (*gatedwriter.Writer, *logWriter, io.Writer) { 338 // Setup logging. First create the gated log writer, which will 339 // store logs until we're ready to show them. Then create the level 340 // filter, filtering logs of the specified level. 341 logGate := &gatedwriter.Writer{ 342 Writer: &cli.UiWriter{Ui: c.Ui}, 343 } 344 345 c.logFilter = LevelFilter() 346 c.logFilter.MinLevel = logutils.LogLevel(strings.ToUpper(config.LogLevel)) 347 c.logFilter.Writer = logGate 348 if !ValidateLevelFilter(c.logFilter.MinLevel, c.logFilter) { 349 c.Ui.Error(fmt.Sprintf( 350 "Invalid log level: %s. Valid log levels are: %v", 351 c.logFilter.MinLevel, c.logFilter.Levels)) 352 return nil, nil, nil 353 } 354 355 // Check if syslog is enabled 356 var syslog io.Writer 357 if config.EnableSyslog { 358 l, err := gsyslog.NewLogger(gsyslog.LOG_NOTICE, config.SyslogFacility, "nomad") 359 if err != nil { 360 c.Ui.Error(fmt.Sprintf("Syslog setup failed: %v", err)) 361 return nil, nil, nil 362 } 363 syslog = &SyslogWrapper{l, c.logFilter} 364 } 365 366 // Create a log writer, and wrap a logOutput around it 367 logWriter := NewLogWriter(512) 368 var logOutput io.Writer 369 if syslog != nil { 370 logOutput = io.MultiWriter(c.logFilter, logWriter, syslog) 371 } else { 372 logOutput = io.MultiWriter(c.logFilter, logWriter) 373 } 374 c.logOutput = logOutput 375 log.SetOutput(logOutput) 376 return logGate, logWriter, logOutput 377 } 378 379 // setupAgent is used to start the agent and various interfaces 380 func (c *Command) setupAgent(config *Config, logOutput io.Writer, inmem *metrics.InmemSink) error { 381 c.Ui.Output("Starting Nomad agent...") 382 agent, err := NewAgent(config, logOutput, inmem) 383 if err != nil { 384 c.Ui.Error(fmt.Sprintf("Error starting agent: %s", err)) 385 return err 386 } 387 c.agent = agent 388 389 // Setup the HTTP server 390 http, err := NewHTTPServer(agent, config) 391 if err != nil { 392 agent.Shutdown() 393 c.Ui.Error(fmt.Sprintf("Error starting http server: %s", err)) 394 return err 395 } 396 c.httpServer = http 397 398 // Setup update checking 399 if config.DisableUpdateCheck != nil && *config.DisableUpdateCheck { 400 version := config.Version.Version 401 if config.Version.VersionPrerelease != "" { 402 version += fmt.Sprintf("-%s", config.Version.VersionPrerelease) 403 } 404 updateParams := &checkpoint.CheckParams{ 405 Product: "nomad", 406 Version: version, 407 } 408 if !config.DisableAnonymousSignature { 409 updateParams.SignatureFile = filepath.Join(config.DataDir, "checkpoint-signature") 410 } 411 412 // Schedule a periodic check with expected interval of 24 hours 413 checkpoint.CheckInterval(updateParams, 24*time.Hour, c.checkpointResults) 414 415 // Do an immediate check within the next 30 seconds 416 go func() { 417 time.Sleep(lib.RandomStagger(30 * time.Second)) 418 c.checkpointResults(checkpoint.Check(updateParams)) 419 }() 420 } 421 422 return nil 423 } 424 425 // checkpointResults is used to handler periodic results from our update checker 426 func (c *Command) checkpointResults(results *checkpoint.CheckResponse, err error) { 427 if err != nil { 428 c.Ui.Error(fmt.Sprintf("Failed to check for updates: %v", err)) 429 return 430 } 431 if results.Outdated { 432 c.Ui.Error(fmt.Sprintf("Newer Nomad version available: %s (currently running: %s)", results.CurrentVersion, c.Version.VersionNumber())) 433 } 434 for _, alert := range results.Alerts { 435 switch alert.Level { 436 case "info": 437 c.Ui.Info(fmt.Sprintf("Bulletin [%s]: %s (%s)", alert.Level, alert.Message, alert.URL)) 438 default: 439 c.Ui.Error(fmt.Sprintf("Bulletin [%s]: %s (%s)", alert.Level, alert.Message, alert.URL)) 440 } 441 } 442 } 443 444 func (c *Command) AutocompleteFlags() complete.Flags { 445 configFilePredictor := complete.PredictOr( 446 complete.PredictFiles("*.json"), 447 complete.PredictFiles("*.hcl")) 448 449 return map[string]complete.Predictor{ 450 "-config": configFilePredictor, 451 } 452 } 453 454 func (c *Command) AutocompleteArgs() complete.Predictor { 455 return nil 456 } 457 458 func (c *Command) Run(args []string) int { 459 c.Ui = &cli.PrefixedUi{ 460 OutputPrefix: "==> ", 461 InfoPrefix: " ", 462 ErrorPrefix: "==> ", 463 Ui: c.Ui, 464 } 465 466 // Parse our configs 467 c.args = args 468 config := c.readConfig() 469 if config == nil { 470 return 1 471 } 472 473 // Setup the log outputs 474 logGate, _, logOutput := c.setupLoggers(config) 475 if logGate == nil { 476 return 1 477 } 478 479 // Log config files 480 if len(config.Files) > 0 { 481 c.Ui.Output(fmt.Sprintf("Loaded configuration from %s", strings.Join(config.Files, ", "))) 482 } else { 483 c.Ui.Output("No configuration files loaded") 484 } 485 486 // Initialize the telemetry 487 inmem, err := c.setupTelemetry(config) 488 if err != nil { 489 c.Ui.Error(fmt.Sprintf("Error initializing telemetry: %s", err)) 490 return 1 491 } 492 493 // Create the agent 494 if err := c.setupAgent(config, logOutput, inmem); err != nil { 495 logGate.Flush() 496 return 1 497 } 498 defer c.agent.Shutdown() 499 500 // Shutdown the HTTP server at the end 501 defer func() { 502 if c.httpServer != nil { 503 c.httpServer.Shutdown() 504 } 505 }() 506 507 // Join startup nodes if specified 508 if err := c.startupJoin(config); err != nil { 509 c.Ui.Error(err.Error()) 510 return 1 511 } 512 513 // Compile agent information for output later 514 info := make(map[string]string) 515 info["version"] = config.Version.VersionNumber() 516 info["client"] = strconv.FormatBool(config.Client.Enabled) 517 info["log level"] = config.LogLevel 518 info["server"] = strconv.FormatBool(config.Server.Enabled) 519 info["region"] = fmt.Sprintf("%s (DC: %s)", config.Region, config.Datacenter) 520 521 // Sort the keys for output 522 infoKeys := make([]string, 0, len(info)) 523 for key := range info { 524 infoKeys = append(infoKeys, key) 525 } 526 sort.Strings(infoKeys) 527 528 // Agent configuration output 529 padding := 18 530 c.Ui.Output("Nomad agent configuration:\n") 531 for _, k := range infoKeys { 532 c.Ui.Info(fmt.Sprintf( 533 "%s%s: %s", 534 strings.Repeat(" ", padding-len(k)), 535 strings.Title(k), 536 info[k])) 537 } 538 c.Ui.Output("") 539 540 // Output the header that the server has started 541 c.Ui.Output("Nomad agent started! Log data will stream in below:\n") 542 543 // Enable log streaming 544 logGate.Flush() 545 546 // Start retry join process 547 c.retryJoinErrCh = make(chan struct{}) 548 go c.retryJoin(config) 549 550 // Wait for exit 551 return c.handleSignals() 552 } 553 554 // handleSignals blocks until we get an exit-causing signal 555 func (c *Command) handleSignals() int { 556 signalCh := make(chan os.Signal, 4) 557 signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGPIPE) 558 559 // Wait for a signal 560 WAIT: 561 var sig os.Signal 562 select { 563 case s := <-signalCh: 564 sig = s 565 case <-c.ShutdownCh: 566 sig = os.Interrupt 567 case <-c.retryJoinErrCh: 568 return 1 569 } 570 571 // Skip any SIGPIPE signal and don't try to log it (See issues #1798, #3554) 572 if sig == syscall.SIGPIPE { 573 goto WAIT 574 } 575 576 c.Ui.Output(fmt.Sprintf("Caught signal: %v", sig)) 577 578 // Check if this is a SIGHUP 579 if sig == syscall.SIGHUP { 580 c.handleReload() 581 goto WAIT 582 } 583 584 // Check if we should do a graceful leave 585 graceful := false 586 if sig == os.Interrupt && c.agent.GetConfig().LeaveOnInt { 587 graceful = true 588 } else if sig == syscall.SIGTERM && c.agent.GetConfig().LeaveOnTerm { 589 graceful = true 590 } 591 592 // Bail fast if not doing a graceful leave 593 if !graceful { 594 return 1 595 } 596 597 // Attempt a graceful leave 598 gracefulCh := make(chan struct{}) 599 c.Ui.Output("Gracefully shutting down agent...") 600 go func() { 601 if err := c.agent.Leave(); err != nil { 602 c.Ui.Error(fmt.Sprintf("Error: %s", err)) 603 return 604 } 605 close(gracefulCh) 606 }() 607 608 // Wait for leave or another signal 609 select { 610 case <-signalCh: 611 return 1 612 case <-time.After(gracefulTimeout): 613 return 1 614 case <-gracefulCh: 615 return 0 616 } 617 } 618 619 // reloadHTTPServer shuts down the existing HTTP server and restarts it. This 620 // is helpful when reloading the agent configuration. 621 func (c *Command) reloadHTTPServer() error { 622 c.agent.logger.Println("[INFO] agent: Reloading HTTP server with new TLS configuration") 623 624 c.httpServer.Shutdown() 625 626 http, err := NewHTTPServer(c.agent, c.agent.config) 627 if err != nil { 628 return err 629 } 630 c.httpServer = http 631 632 return nil 633 } 634 635 // handleReload is invoked when we should reload our configs, e.g. SIGHUP 636 func (c *Command) handleReload() { 637 c.Ui.Output("Reloading configuration...") 638 newConf := c.readConfig() 639 if newConf == nil { 640 c.Ui.Error(fmt.Sprintf("Failed to reload configs")) 641 return 642 } 643 644 // Change the log level 645 minLevel := logutils.LogLevel(strings.ToUpper(newConf.LogLevel)) 646 if ValidateLevelFilter(minLevel, c.logFilter) { 647 c.logFilter.SetMinLevel(minLevel) 648 } else { 649 c.Ui.Error(fmt.Sprintf( 650 "Invalid log level: %s. Valid log levels are: %v", 651 minLevel, c.logFilter.Levels)) 652 653 // Keep the current log level 654 newConf.LogLevel = c.agent.GetConfig().LogLevel 655 } 656 657 shouldReloadAgent, shouldReloadHTTP, shouldReloadRPC := c.agent.ShouldReload(newConf) 658 if shouldReloadAgent { 659 c.agent.logger.Printf("[DEBUG] agent: starting reload of agent config") 660 err := c.agent.Reload(newConf) 661 if err != nil { 662 c.agent.logger.Printf("[ERR] agent: failed to reload the config: %v", err) 663 return 664 } 665 } 666 667 if shouldReloadRPC { 668 if s := c.agent.Server(); s != nil { 669 sconf, err := convertServerConfig(newConf, c.logOutput) 670 c.agent.logger.Printf("[DEBUG] agent: starting reload of server config") 671 if err != nil { 672 c.agent.logger.Printf("[ERR] agent: failed to convert server config: %v", err) 673 return 674 } else { 675 if err := s.Reload(sconf); err != nil { 676 c.agent.logger.Printf("[ERR] agent: reloading server config failed: %v", err) 677 return 678 } 679 } 680 } 681 682 if s := c.agent.Client(); s != nil { 683 clientConfig, err := c.agent.clientConfig() 684 c.agent.logger.Printf("[DEBUG] agent: starting reload of client config") 685 if err != nil { 686 c.agent.logger.Printf("[ERR] agent: reloading client config failed: %v", err) 687 return 688 } 689 if err := c.agent.Client().Reload(clientConfig); err != nil { 690 c.agent.logger.Printf("[ERR] agent: reloading client config failed: %v", err) 691 return 692 } 693 } 694 } 695 696 // reload HTTP server after we have reloaded both client and server, in case 697 // we error in either of the above cases. For example, reloading the http 698 // server to a TLS connection could succeed, while reloading the server's rpc 699 // connections could fail. 700 if shouldReloadHTTP { 701 err := c.reloadHTTPServer() 702 if err != nil { 703 c.agent.logger.Printf("[ERR] http: failed to reload the config: %v", err) 704 return 705 } 706 } 707 } 708 709 // setupTelemetry is used ot setup the telemetry sub-systems 710 func (c *Command) setupTelemetry(config *Config) (*metrics.InmemSink, error) { 711 /* Setup telemetry 712 Aggregate on 10 second intervals for 1 minute. Expose the 713 metrics over stderr when there is a SIGUSR1 received. 714 */ 715 inm := metrics.NewInmemSink(10*time.Second, time.Minute) 716 metrics.DefaultInmemSignal(inm) 717 718 var telConfig *Telemetry 719 if config.Telemetry == nil { 720 telConfig = &Telemetry{} 721 } else { 722 telConfig = config.Telemetry 723 } 724 725 metricsConf := metrics.DefaultConfig("nomad") 726 metricsConf.EnableHostname = !telConfig.DisableHostname 727 728 // Prefer the hostname as a label. 729 metricsConf.EnableHostnameLabel = !telConfig.DisableHostname && 730 !telConfig.DisableTaggedMetrics && !telConfig.BackwardsCompatibleMetrics 731 732 if telConfig.UseNodeName { 733 metricsConf.HostName = config.NodeName 734 metricsConf.EnableHostname = true 735 } 736 737 // Configure the statsite sink 738 var fanout metrics.FanoutSink 739 if telConfig.StatsiteAddr != "" { 740 sink, err := metrics.NewStatsiteSink(telConfig.StatsiteAddr) 741 if err != nil { 742 return inm, err 743 } 744 fanout = append(fanout, sink) 745 } 746 747 // Configure the statsd sink 748 if telConfig.StatsdAddr != "" { 749 sink, err := metrics.NewStatsdSink(telConfig.StatsdAddr) 750 if err != nil { 751 return inm, err 752 } 753 fanout = append(fanout, sink) 754 } 755 756 // Configure the prometheus sink 757 if telConfig.PrometheusMetrics { 758 promSink, err := prometheus.NewPrometheusSink() 759 if err != nil { 760 return inm, err 761 } 762 fanout = append(fanout, promSink) 763 } 764 765 // Configure the datadog sink 766 if telConfig.DataDogAddr != "" { 767 sink, err := datadog.NewDogStatsdSink(telConfig.DataDogAddr, config.NodeName) 768 if err != nil { 769 return inm, err 770 } 771 sink.SetTags(telConfig.DataDogTags) 772 fanout = append(fanout, sink) 773 } 774 775 // Configure the Circonus sink 776 if telConfig.CirconusAPIToken != "" || telConfig.CirconusCheckSubmissionURL != "" { 777 cfg := &circonus.Config{} 778 cfg.Interval = telConfig.CirconusSubmissionInterval 779 cfg.CheckManager.API.TokenKey = telConfig.CirconusAPIToken 780 cfg.CheckManager.API.TokenApp = telConfig.CirconusAPIApp 781 cfg.CheckManager.API.URL = telConfig.CirconusAPIURL 782 cfg.CheckManager.Check.SubmissionURL = telConfig.CirconusCheckSubmissionURL 783 cfg.CheckManager.Check.ID = telConfig.CirconusCheckID 784 cfg.CheckManager.Check.ForceMetricActivation = telConfig.CirconusCheckForceMetricActivation 785 cfg.CheckManager.Check.InstanceID = telConfig.CirconusCheckInstanceID 786 cfg.CheckManager.Check.SearchTag = telConfig.CirconusCheckSearchTag 787 cfg.CheckManager.Check.Tags = telConfig.CirconusCheckTags 788 cfg.CheckManager.Check.DisplayName = telConfig.CirconusCheckDisplayName 789 cfg.CheckManager.Broker.ID = telConfig.CirconusBrokerID 790 cfg.CheckManager.Broker.SelectTag = telConfig.CirconusBrokerSelectTag 791 792 if cfg.CheckManager.Check.DisplayName == "" { 793 cfg.CheckManager.Check.DisplayName = "Nomad" 794 } 795 796 if cfg.CheckManager.API.TokenApp == "" { 797 cfg.CheckManager.API.TokenApp = "nomad" 798 } 799 800 if cfg.CheckManager.Check.SearchTag == "" { 801 cfg.CheckManager.Check.SearchTag = "service:nomad" 802 } 803 804 sink, err := circonus.NewCirconusSink(cfg) 805 if err != nil { 806 return inm, err 807 } 808 sink.Start() 809 fanout = append(fanout, sink) 810 } 811 812 // Initialize the global sink 813 if len(fanout) > 0 { 814 fanout = append(fanout, inm) 815 metrics.NewGlobal(metricsConf, fanout) 816 } else { 817 metricsConf.EnableHostname = false 818 metrics.NewGlobal(metricsConf, inm) 819 } 820 return inm, nil 821 } 822 823 func (c *Command) startupJoin(config *Config) error { 824 if len(config.Server.StartJoin) == 0 || !config.Server.Enabled { 825 return nil 826 } 827 828 c.Ui.Output("Joining cluster...") 829 n, err := c.agent.server.Join(config.Server.StartJoin) 830 if err != nil { 831 return err 832 } 833 834 c.Ui.Output(fmt.Sprintf("Join completed. Synced with %d initial agents", n)) 835 return nil 836 } 837 838 // retryJoin is used to handle retrying a join until it succeeds or all retries 839 // are exhausted. 840 func (c *Command) retryJoin(config *Config) { 841 if len(config.Server.RetryJoin) == 0 || !config.Server.Enabled { 842 return 843 } 844 845 logger := c.agent.logger 846 logger.Printf("[INFO] agent: Joining cluster...") 847 848 attempt := 0 849 for { 850 n, err := c.agent.server.Join(config.Server.RetryJoin) 851 if err == nil { 852 logger.Printf("[INFO] agent: Join completed. Synced with %d initial agents", n) 853 return 854 } 855 856 attempt++ 857 if config.Server.RetryMaxAttempts > 0 && attempt > config.Server.RetryMaxAttempts { 858 logger.Printf("[ERR] agent: max join retry exhausted, exiting") 859 close(c.retryJoinErrCh) 860 return 861 } 862 863 logger.Printf("[WARN] agent: Join failed: %v, retrying in %v", err, 864 config.Server.RetryInterval) 865 time.Sleep(config.Server.retryInterval) 866 } 867 } 868 869 func (c *Command) Synopsis() string { 870 return "Runs a Nomad agent" 871 } 872 873 func (c *Command) Help() string { 874 helpText := ` 875 Usage: nomad agent [options] 876 877 Starts the Nomad agent and runs until an interrupt is received. 878 The agent may be a client and/or server. 879 880 The Nomad agent's configuration primarily comes from the config 881 files used, but a subset of the options may also be passed directly 882 as CLI arguments, listed below. 883 884 General Options (clients and servers): 885 886 -bind=<addr> 887 The address the agent will bind to for all of its various network 888 services. The individual services that run bind to individual 889 ports on this address. Defaults to the loopback 127.0.0.1. 890 891 -config=<path> 892 The path to either a single config file or a directory of config 893 files to use for configuring the Nomad agent. This option may be 894 specified multiple times. If multiple config files are used, the 895 values from each will be merged together. During merging, values 896 from files found later in the list are merged over values from 897 previously parsed files. 898 899 -data-dir=<path> 900 The data directory used to store state and other persistent data. 901 On client machines this is used to house allocation data such as 902 downloaded artifacts used by drivers. On server nodes, the data 903 dir is also used to store the replicated log. 904 905 -dc=<datacenter> 906 The name of the datacenter this Nomad agent is a member of. By 907 default this is set to "dc1". 908 909 -log-level=<level> 910 Specify the verbosity level of Nomad's logs. Valid values include 911 DEBUG, INFO, and WARN, in decreasing order of verbosity. The 912 default is INFO. 913 914 -node=<name> 915 The name of the local agent. This name is used to identify the node 916 in the cluster. The name must be unique per region. The default is 917 the current hostname of the machine. 918 919 -region=<region> 920 Name of the region the Nomad agent will be a member of. By default 921 this value is set to "global". 922 923 -dev 924 Start the agent in development mode. This enables a pre-configured 925 dual-role agent (client + server) which is useful for developing 926 or testing Nomad. No other configuration is required to start the 927 agent in this mode. 928 929 Server Options: 930 931 -server 932 Enable server mode for the agent. Agents in server mode are 933 clustered together and handle the additional responsibility of 934 leader election, data replication, and scheduling work onto 935 eligible client nodes. 936 937 -bootstrap-expect=<num> 938 Configures the expected number of servers nodes to wait for before 939 bootstrapping the cluster. Once <num> servers have joined each other, 940 Nomad initiates the bootstrap process. 941 942 -encrypt=<key> 943 Provides the gossip encryption key 944 945 -join=<address> 946 Address of an agent to join at start time. Can be specified 947 multiple times. 948 949 -raft-protocol=<num> 950 The Raft protocol version to use. Used for enabling certain Autopilot 951 features. Defaults to 2. 952 953 -retry-join=<address> 954 Address of an agent to join at start time with retries enabled. 955 Can be specified multiple times. 956 957 -retry-max=<num> 958 Maximum number of join attempts. Defaults to 0, which will retry 959 indefinitely. 960 961 -retry-interval=<dur> 962 Time to wait between join attempts. 963 964 -rejoin 965 Ignore a previous leave and attempts to rejoin the cluster. 966 967 Client Options: 968 969 -client 970 Enable client mode for the agent. Client mode enables a given node to be 971 evaluated for allocations. If client mode is not enabled, no work will be 972 scheduled to the agent. 973 974 -state-dir 975 The directory used to store state and other persistent data. If not 976 specified a subdirectory under the "-data-dir" will be used. 977 978 -alloc-dir 979 The directory used to store allocation data such as downloaded artifacts as 980 well as data produced by tasks. If not specified, a subdirectory under the 981 "-data-dir" will be used. 982 983 -servers 984 A list of known server addresses to connect to given as "host:port" and 985 delimited by commas. 986 987 -node-class 988 Mark this node as a member of a node-class. This can be used to label 989 similar node types. 990 991 -meta 992 User specified metadata to associated with the node. Each instance of -meta 993 parses a single KEY=VALUE pair. Repeat the meta flag for each key/value pair 994 to be added. 995 996 -network-interface 997 Forces the network fingerprinter to use the specified network interface. 998 999 -network-speed 1000 The default speed for network interfaces in MBits if the link speed can not 1001 be determined dynamically. 1002 1003 ACL Options: 1004 1005 -acl-enabled 1006 Specifies whether the agent should enable ACLs. 1007 1008 -acl-replication-token 1009 The replication token for servers to use when replicating from the 1010 authoritative region. The token must be a valid management token from the 1011 authoritative region. 1012 1013 Consul Options: 1014 1015 -consul-address=<addr> 1016 Specifies the address to the local Consul agent, given in the format host:port. 1017 Supports Unix sockets with the format: unix:///tmp/consul/consul.sock 1018 1019 -consul-auth=<auth> 1020 Specifies the HTTP Basic Authentication information to use for access to the 1021 Consul Agent, given in the format username:password. 1022 1023 -consul-auto-advertise 1024 Specifies if Nomad should advertise its services in Consul. The services 1025 are named according to server_service_name and client_service_name. Nomad 1026 servers and clients advertise their respective services, each tagged 1027 appropriately with either http or rpc tag. Nomad servers also advertise a 1028 serf tagged service. 1029 1030 -consul-ca-file=<path> 1031 Specifies an optional path to the CA certificate used for Consul communication. 1032 This defaults to the system bundle if unspecified. 1033 1034 -consul-cert-file=<path> 1035 Specifies the path to the certificate used for Consul communication. If this 1036 is set then you need to also set key_file. 1037 1038 -consul-checks-use-advertise 1039 Specifies if Consul heath checks should bind to the advertise address. By 1040 default, this is the bind address. 1041 1042 -consul-client-auto-join 1043 Specifies if the Nomad clients should automatically discover servers in the 1044 same region by searching for the Consul service name defined in the 1045 server_service_name option. 1046 1047 -consul-client-service-name=<name> 1048 Specifies the name of the service in Consul for the Nomad clients. 1049 1050 -consul-client-http-check-name=<name> 1051 Specifies the HTTP health check name in Consul for the Nomad clients. 1052 1053 -consul-key-file=<path> 1054 Specifies the path to the private key used for Consul communication. If this 1055 is set then you need to also set cert_file. 1056 1057 -consul-server-service-name=<name> 1058 Specifies the name of the service in Consul for the Nomad servers. 1059 1060 -consul-server-http-check-name=<name> 1061 Specifies the HTTP health check name in Consul for the Nomad servers. 1062 1063 -consul-server-serf-check-name=<name> 1064 Specifies the Serf health check name in Consul for the Nomad servers. 1065 1066 -consul-server-rpc-check-name=<name> 1067 Specifies the RPC health check name in Consul for the Nomad servers. 1068 1069 -consul-server-auto-join 1070 Specifies if the Nomad servers should automatically discover and join other 1071 Nomad servers by searching for the Consul service name defined in the 1072 server_service_name option. This search only happens if the server does not 1073 have a leader. 1074 1075 -consul-ssl 1076 Specifies if the transport scheme should use HTTPS to communicate with the 1077 Consul agent. 1078 1079 -consul-token=<token> 1080 Specifies the token used to provide a per-request ACL token. 1081 1082 -consul-verify-ssl 1083 Specifies if SSL peer verification should be used when communicating to the 1084 Consul API client over HTTPS. 1085 1086 Vault Options: 1087 1088 -vault-enabled 1089 Whether to enable or disable Vault integration. 1090 1091 -vault-address=<addr> 1092 The address to communicate with Vault. This should be provided with the http:// 1093 or https:// prefix. 1094 1095 -vault-token=<token> 1096 The Vault token used to derive tokens from Vault on behalf of clients. 1097 This only needs to be set on Servers. Overrides the Vault token read from 1098 the VAULT_TOKEN environment variable. 1099 1100 -vault-create-from-role=<role> 1101 The role name to create tokens for tasks from. 1102 1103 -vault-allow-unauthenticated 1104 Whether to allow jobs to be submitted that request Vault Tokens but do not 1105 authentication. The flag only applies to Servers. 1106 1107 -vault-ca-file=<path> 1108 The path to a PEM-encoded CA cert file to use to verify the Vault server SSL 1109 certificate. 1110 1111 -vault-ca-path=<path> 1112 The path to a directory of PEM-encoded CA cert files to verify the Vault server 1113 certificate. 1114 1115 -vault-cert-file=<token> 1116 The path to the certificate for Vault communication. 1117 1118 -vault-key-file=<addr> 1119 The path to the private key for Vault communication. 1120 1121 -vault-tls-skip-verify=<token> 1122 Enables or disables SSL certificate verification. 1123 1124 -vault-tls-server-name=<token> 1125 Used to set the SNI host when connecting over TLS. 1126 ` 1127 return strings.TrimSpace(helpText) 1128 }