github.com/hspak/nomad@v0.7.2-0.20180309000617-bc4ae22a39a5/command/agent/command.go (about) 1 package agent 2 3 import ( 4 "flag" 5 "fmt" 6 "io" 7 "log" 8 "os" 9 "os/signal" 10 "path/filepath" 11 "reflect" 12 "sort" 13 "strconv" 14 "strings" 15 "syscall" 16 "time" 17 18 metrics "github.com/armon/go-metrics" 19 "github.com/armon/go-metrics/circonus" 20 "github.com/armon/go-metrics/datadog" 21 "github.com/armon/go-metrics/prometheus" 22 "github.com/hashicorp/consul/lib" 23 checkpoint "github.com/hashicorp/go-checkpoint" 24 gsyslog "github.com/hashicorp/go-syslog" 25 "github.com/hashicorp/logutils" 26 flaghelper "github.com/hashicorp/nomad/helper/flag-helpers" 27 gatedwriter "github.com/hashicorp/nomad/helper/gated-writer" 28 "github.com/hashicorp/nomad/nomad/structs/config" 29 "github.com/hashicorp/nomad/version" 30 "github.com/mitchellh/cli" 31 "github.com/posener/complete" 32 ) 33 34 // gracefulTimeout controls how long we wait before forcefully terminating 35 const gracefulTimeout = 5 * time.Second 36 37 // Command is a Command implementation that runs a Nomad agent. 38 // The command will not end unless a shutdown message is sent on the 39 // ShutdownCh. If two messages are sent on the ShutdownCh it will forcibly 40 // exit. 41 type Command struct { 42 Version *version.VersionInfo 43 Ui cli.Ui 44 ShutdownCh <-chan struct{} 45 46 args []string 47 agent *Agent 48 httpServer *HTTPServer 49 logFilter *logutils.LevelFilter 50 logOutput io.Writer 51 retryJoinErrCh chan struct{} 52 } 53 54 func (c *Command) readConfig() *Config { 55 var dev bool 56 var configPath []string 57 var servers string 58 var meta []string 59 60 // Make a new, empty config. 61 cmdConfig := &Config{ 62 Client: &ClientConfig{}, 63 Consul: &config.ConsulConfig{}, 64 Ports: &Ports{}, 65 Server: &ServerConfig{}, 66 Vault: &config.VaultConfig{}, 67 ACL: &ACLConfig{}, 68 } 69 70 flags := flag.NewFlagSet("agent", flag.ContinueOnError) 71 flags.Usage = func() { c.Ui.Error(c.Help()) } 72 73 // Role options 74 flags.BoolVar(&dev, "dev", false, "") 75 flags.BoolVar(&cmdConfig.Server.Enabled, "server", false, "") 76 flags.BoolVar(&cmdConfig.Client.Enabled, "client", false, "") 77 78 // Server-only options 79 flags.IntVar(&cmdConfig.Server.BootstrapExpect, "bootstrap-expect", 0, "") 80 flags.BoolVar(&cmdConfig.Server.RejoinAfterLeave, "rejoin", false, "") 81 flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.StartJoin), "join", "") 82 flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.RetryJoin), "retry-join", "") 83 flags.IntVar(&cmdConfig.Server.RetryMaxAttempts, "retry-max", 0, "") 84 flags.StringVar(&cmdConfig.Server.RetryInterval, "retry-interval", "", "") 85 flags.StringVar(&cmdConfig.Server.EncryptKey, "encrypt", "", "gossip encryption key") 86 flags.IntVar(&cmdConfig.Server.RaftProtocol, "raft-protocol", 0, "") 87 88 // Client-only options 89 flags.StringVar(&cmdConfig.Client.StateDir, "state-dir", "", "") 90 flags.StringVar(&cmdConfig.Client.AllocDir, "alloc-dir", "", "") 91 flags.StringVar(&cmdConfig.Client.NodeClass, "node-class", "", "") 92 flags.StringVar(&servers, "servers", "", "") 93 flags.Var((*flaghelper.StringFlag)(&meta), "meta", "") 94 flags.StringVar(&cmdConfig.Client.NetworkInterface, "network-interface", "", "") 95 flags.IntVar(&cmdConfig.Client.NetworkSpeed, "network-speed", 0, "") 96 97 // General options 98 flags.Var((*flaghelper.StringFlag)(&configPath), "config", "config") 99 flags.StringVar(&cmdConfig.BindAddr, "bind", "", "") 100 flags.StringVar(&cmdConfig.Region, "region", "", "") 101 flags.StringVar(&cmdConfig.DataDir, "data-dir", "", "") 102 flags.StringVar(&cmdConfig.Datacenter, "dc", "", "") 103 flags.StringVar(&cmdConfig.LogLevel, "log-level", "", "") 104 flags.StringVar(&cmdConfig.NodeName, "node", "", "") 105 106 // Consul options 107 flags.StringVar(&cmdConfig.Consul.Auth, "consul-auth", "", "") 108 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 109 cmdConfig.Consul.AutoAdvertise = &b 110 return nil 111 }), "consul-auto-advertise", "") 112 flags.StringVar(&cmdConfig.Consul.CAFile, "consul-ca-file", "", "") 113 flags.StringVar(&cmdConfig.Consul.CertFile, "consul-cert-file", "", "") 114 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 115 cmdConfig.Consul.ChecksUseAdvertise = &b 116 return nil 117 }), "consul-checks-use-advertise", "") 118 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 119 cmdConfig.Consul.ClientAutoJoin = &b 120 return nil 121 }), "consul-client-auto-join", "") 122 flags.StringVar(&cmdConfig.Consul.ClientServiceName, "consul-client-service-name", "", "") 123 flags.StringVar(&cmdConfig.Consul.KeyFile, "consul-key-file", "", "") 124 flags.StringVar(&cmdConfig.Consul.ServerServiceName, "consul-server-service-name", "", "") 125 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 126 cmdConfig.Consul.ServerAutoJoin = &b 127 return nil 128 }), "consul-server-auto-join", "") 129 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 130 cmdConfig.Consul.EnableSSL = &b 131 return nil 132 }), "consul-ssl", "") 133 flags.StringVar(&cmdConfig.Consul.Token, "consul-token", "", "") 134 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 135 cmdConfig.Consul.VerifySSL = &b 136 return nil 137 }), "consul-verify-ssl", "") 138 flags.StringVar(&cmdConfig.Consul.Addr, "consul-address", "", "") 139 140 // Vault options 141 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 142 cmdConfig.Vault.Enabled = &b 143 return nil 144 }), "vault-enabled", "") 145 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 146 cmdConfig.Vault.AllowUnauthenticated = &b 147 return nil 148 }), "vault-allow-unauthenticated", "") 149 flags.StringVar(&cmdConfig.Vault.Token, "vault-token", "", "") 150 flags.StringVar(&cmdConfig.Vault.Addr, "vault-address", "", "") 151 flags.StringVar(&cmdConfig.Vault.Role, "vault-create-from-role", "", "") 152 flags.StringVar(&cmdConfig.Vault.TLSCaFile, "vault-ca-file", "", "") 153 flags.StringVar(&cmdConfig.Vault.TLSCaPath, "vault-ca-path", "", "") 154 flags.StringVar(&cmdConfig.Vault.TLSCertFile, "vault-cert-file", "", "") 155 flags.StringVar(&cmdConfig.Vault.TLSKeyFile, "vault-key-file", "", "") 156 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 157 cmdConfig.Vault.TLSSkipVerify = &b 158 return nil 159 }), "vault-tls-skip-verify", "") 160 flags.StringVar(&cmdConfig.Vault.TLSServerName, "vault-tls-server-name", "", "") 161 162 // ACL options 163 flags.BoolVar(&cmdConfig.ACL.Enabled, "acl-enabled", false, "") 164 flags.StringVar(&cmdConfig.ACL.ReplicationToken, "acl-replication-token", "", "") 165 166 if err := flags.Parse(c.args); err != nil { 167 return nil 168 } 169 170 // Split the servers. 171 if servers != "" { 172 cmdConfig.Client.Servers = strings.Split(servers, ",") 173 } 174 175 // Parse the meta flags. 176 metaLength := len(meta) 177 if metaLength != 0 { 178 cmdConfig.Client.Meta = make(map[string]string, metaLength) 179 for _, kv := range meta { 180 parts := strings.SplitN(kv, "=", 2) 181 if len(parts) != 2 { 182 c.Ui.Error(fmt.Sprintf("Error parsing Client.Meta value: %v", kv)) 183 return nil 184 } 185 186 cmdConfig.Client.Meta[parts[0]] = parts[1] 187 } 188 } 189 190 // Load the configuration 191 var config *Config 192 if dev { 193 config = DevConfig() 194 } else { 195 config = DefaultConfig() 196 } 197 for _, path := range configPath { 198 current, err := LoadConfig(path) 199 if err != nil { 200 c.Ui.Error(fmt.Sprintf( 201 "Error loading configuration from %s: %s", path, err)) 202 return nil 203 } 204 205 // The user asked us to load some config here but we didn't find any, 206 // so we'll complain but continue. 207 if current == nil || reflect.DeepEqual(current, &Config{}) { 208 c.Ui.Warn(fmt.Sprintf("No configuration loaded from %s", path)) 209 } 210 211 if config == nil { 212 config = current 213 } else { 214 config = config.Merge(current) 215 } 216 } 217 218 // Ensure the sub-structs at least exist 219 if config.Client == nil { 220 config.Client = &ClientConfig{} 221 } 222 if config.Server == nil { 223 config.Server = &ServerConfig{} 224 } 225 226 // Merge any CLI options over config file options 227 config = config.Merge(cmdConfig) 228 229 // Set the version info 230 config.Version = c.Version 231 232 // Normalize binds, ports, addresses, and advertise 233 if err := config.normalizeAddrs(); err != nil { 234 c.Ui.Error(err.Error()) 235 return nil 236 } 237 238 // Check to see if we should read the Vault token from the environment 239 if config.Vault.Token == "" { 240 if token, ok := os.LookupEnv("VAULT_TOKEN"); ok { 241 config.Vault.Token = token 242 } 243 } 244 245 if dev { 246 // Skip validation for dev mode 247 return config 248 } 249 250 if config.Server.EncryptKey != "" { 251 if _, err := config.Server.EncryptBytes(); err != nil { 252 c.Ui.Error(fmt.Sprintf("Invalid encryption key: %s", err)) 253 return nil 254 } 255 keyfile := filepath.Join(config.DataDir, serfKeyring) 256 if _, err := os.Stat(keyfile); err == nil { 257 c.Ui.Warn("WARNING: keyring exists but -encrypt given, using keyring") 258 } 259 } 260 261 // Parse the RetryInterval. 262 dur, err := time.ParseDuration(config.Server.RetryInterval) 263 if err != nil { 264 c.Ui.Error(fmt.Sprintf("Error parsing retry interval: %s", err)) 265 return nil 266 } 267 config.Server.retryInterval = dur 268 269 // Check that the server is running in at least one mode. 270 if !(config.Server.Enabled || config.Client.Enabled) { 271 c.Ui.Error("Must specify either server, client or dev mode for the agent.") 272 return nil 273 } 274 275 // Verify the paths are absolute. 276 dirs := map[string]string{ 277 "data-dir": config.DataDir, 278 "alloc-dir": config.Client.AllocDir, 279 "state-dir": config.Client.StateDir, 280 } 281 for k, dir := range dirs { 282 if dir == "" { 283 continue 284 } 285 286 if !filepath.IsAbs(dir) { 287 c.Ui.Error(fmt.Sprintf("%s must be given as an absolute path: got %v", k, dir)) 288 return nil 289 } 290 } 291 292 // Ensure that we have the directories we neet to run. 293 if config.Server.Enabled && config.DataDir == "" { 294 c.Ui.Error("Must specify data directory") 295 return nil 296 } 297 298 // The config is valid if the top-level data-dir is set or if both 299 // alloc-dir and state-dir are set. 300 if config.Client.Enabled && config.DataDir == "" { 301 if config.Client.AllocDir == "" || config.Client.StateDir == "" { 302 c.Ui.Error("Must specify both the state and alloc dir if data-dir is omitted.") 303 return nil 304 } 305 } 306 307 // Check the bootstrap flags 308 if config.Server.BootstrapExpect > 0 && !config.Server.Enabled { 309 c.Ui.Error("Bootstrap requires server mode to be enabled") 310 return nil 311 } 312 if config.Server.BootstrapExpect == 1 { 313 c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.") 314 } 315 316 return config 317 } 318 319 // setupLoggers is used to setup the logGate, logWriter, and our logOutput 320 func (c *Command) setupLoggers(config *Config) (*gatedwriter.Writer, *logWriter, io.Writer) { 321 // Setup logging. First create the gated log writer, which will 322 // store logs until we're ready to show them. Then create the level 323 // filter, filtering logs of the specified level. 324 logGate := &gatedwriter.Writer{ 325 Writer: &cli.UiWriter{Ui: c.Ui}, 326 } 327 328 c.logFilter = LevelFilter() 329 c.logFilter.MinLevel = logutils.LogLevel(strings.ToUpper(config.LogLevel)) 330 c.logFilter.Writer = logGate 331 if !ValidateLevelFilter(c.logFilter.MinLevel, c.logFilter) { 332 c.Ui.Error(fmt.Sprintf( 333 "Invalid log level: %s. Valid log levels are: %v", 334 c.logFilter.MinLevel, c.logFilter.Levels)) 335 return nil, nil, nil 336 } 337 338 // Check if syslog is enabled 339 var syslog io.Writer 340 if config.EnableSyslog { 341 l, err := gsyslog.NewLogger(gsyslog.LOG_NOTICE, config.SyslogFacility, "nomad") 342 if err != nil { 343 c.Ui.Error(fmt.Sprintf("Syslog setup failed: %v", err)) 344 return nil, nil, nil 345 } 346 syslog = &SyslogWrapper{l, c.logFilter} 347 } 348 349 // Create a log writer, and wrap a logOutput around it 350 logWriter := NewLogWriter(512) 351 var logOutput io.Writer 352 if syslog != nil { 353 logOutput = io.MultiWriter(c.logFilter, logWriter, syslog) 354 } else { 355 logOutput = io.MultiWriter(c.logFilter, logWriter) 356 } 357 c.logOutput = logOutput 358 log.SetOutput(logOutput) 359 return logGate, logWriter, logOutput 360 } 361 362 // setupAgent is used to start the agent and various interfaces 363 func (c *Command) setupAgent(config *Config, logOutput io.Writer, inmem *metrics.InmemSink) error { 364 c.Ui.Output("Starting Nomad agent...") 365 agent, err := NewAgent(config, logOutput, inmem) 366 if err != nil { 367 c.Ui.Error(fmt.Sprintf("Error starting agent: %s", err)) 368 return err 369 } 370 c.agent = agent 371 372 // Setup the HTTP server 373 http, err := NewHTTPServer(agent, config) 374 if err != nil { 375 agent.Shutdown() 376 c.Ui.Error(fmt.Sprintf("Error starting http server: %s", err)) 377 return err 378 } 379 c.httpServer = http 380 381 // Setup update checking 382 if !config.DisableUpdateCheck { 383 version := config.Version.Version 384 if config.Version.VersionPrerelease != "" { 385 version += fmt.Sprintf("-%s", config.Version.VersionPrerelease) 386 } 387 updateParams := &checkpoint.CheckParams{ 388 Product: "nomad", 389 Version: version, 390 } 391 if !config.DisableAnonymousSignature { 392 updateParams.SignatureFile = filepath.Join(config.DataDir, "checkpoint-signature") 393 } 394 395 // Schedule a periodic check with expected interval of 24 hours 396 checkpoint.CheckInterval(updateParams, 24*time.Hour, c.checkpointResults) 397 398 // Do an immediate check within the next 30 seconds 399 go func() { 400 time.Sleep(lib.RandomStagger(30 * time.Second)) 401 c.checkpointResults(checkpoint.Check(updateParams)) 402 }() 403 } 404 return nil 405 } 406 407 // checkpointResults is used to handler periodic results from our update checker 408 func (c *Command) checkpointResults(results *checkpoint.CheckResponse, err error) { 409 if err != nil { 410 c.Ui.Error(fmt.Sprintf("Failed to check for updates: %v", err)) 411 return 412 } 413 if results.Outdated { 414 c.Ui.Error(fmt.Sprintf("Newer Nomad version available: %s (currently running: %s)", results.CurrentVersion, c.Version.VersionNumber())) 415 } 416 for _, alert := range results.Alerts { 417 switch alert.Level { 418 case "info": 419 c.Ui.Info(fmt.Sprintf("Bulletin [%s]: %s (%s)", alert.Level, alert.Message, alert.URL)) 420 default: 421 c.Ui.Error(fmt.Sprintf("Bulletin [%s]: %s (%s)", alert.Level, alert.Message, alert.URL)) 422 } 423 } 424 } 425 426 func (c *Command) AutocompleteFlags() complete.Flags { 427 configFilePredictor := complete.PredictOr( 428 complete.PredictFiles("*.json"), 429 complete.PredictFiles("*.hcl")) 430 431 return map[string]complete.Predictor{ 432 "-config": configFilePredictor, 433 } 434 } 435 436 func (c *Command) AutocompleteArgs() complete.Predictor { 437 return nil 438 } 439 440 func (c *Command) Run(args []string) int { 441 c.Ui = &cli.PrefixedUi{ 442 OutputPrefix: "==> ", 443 InfoPrefix: " ", 444 ErrorPrefix: "==> ", 445 Ui: c.Ui, 446 } 447 448 // Parse our configs 449 c.args = args 450 config := c.readConfig() 451 if config == nil { 452 return 1 453 } 454 455 // Setup the log outputs 456 logGate, _, logOutput := c.setupLoggers(config) 457 if logGate == nil { 458 return 1 459 } 460 461 // Log config files 462 if len(config.Files) > 0 { 463 c.Ui.Info(fmt.Sprintf("Loaded configuration from %s", strings.Join(config.Files, ", "))) 464 } else { 465 c.Ui.Info("No configuration files loaded") 466 } 467 468 // Initialize the telemetry 469 inmem, err := c.setupTelemetry(config) 470 if err != nil { 471 c.Ui.Error(fmt.Sprintf("Error initializing telemetry: %s", err)) 472 return 1 473 } 474 475 // Create the agent 476 if err := c.setupAgent(config, logOutput, inmem); err != nil { 477 logGate.Flush() 478 return 1 479 } 480 defer c.agent.Shutdown() 481 482 // Shudown the HTTP server at the end 483 defer func() { 484 if c.httpServer != nil { 485 c.httpServer.Shutdown() 486 } 487 }() 488 489 // Join startup nodes if specified 490 if err := c.startupJoin(config); err != nil { 491 c.Ui.Error(err.Error()) 492 return 1 493 } 494 495 // Compile agent information for output later 496 info := make(map[string]string) 497 info["version"] = config.Version.VersionNumber() 498 info["client"] = strconv.FormatBool(config.Client.Enabled) 499 info["log level"] = config.LogLevel 500 info["server"] = strconv.FormatBool(config.Server.Enabled) 501 info["region"] = fmt.Sprintf("%s (DC: %s)", config.Region, config.Datacenter) 502 503 // Sort the keys for output 504 infoKeys := make([]string, 0, len(info)) 505 for key := range info { 506 infoKeys = append(infoKeys, key) 507 } 508 sort.Strings(infoKeys) 509 510 // Agent configuration output 511 padding := 18 512 c.Ui.Output("Nomad agent configuration:\n") 513 for _, k := range infoKeys { 514 c.Ui.Info(fmt.Sprintf( 515 "%s%s: %s", 516 strings.Repeat(" ", padding-len(k)), 517 strings.Title(k), 518 info[k])) 519 } 520 c.Ui.Output("") 521 522 // Output the header that the server has started 523 c.Ui.Output("Nomad agent started! Log data will stream in below:\n") 524 525 // Enable log streaming 526 logGate.Flush() 527 528 // Start retry join process 529 c.retryJoinErrCh = make(chan struct{}) 530 go c.retryJoin(config) 531 532 // Wait for exit 533 return c.handleSignals() 534 } 535 536 // handleSignals blocks until we get an exit-causing signal 537 func (c *Command) handleSignals() int { 538 signalCh := make(chan os.Signal, 4) 539 signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGPIPE) 540 541 // Wait for a signal 542 WAIT: 543 var sig os.Signal 544 select { 545 case s := <-signalCh: 546 sig = s 547 case <-c.ShutdownCh: 548 sig = os.Interrupt 549 case <-c.retryJoinErrCh: 550 return 1 551 } 552 553 // Skip any SIGPIPE signal and don't try to log it (See issues #1798, #3554) 554 if sig == syscall.SIGPIPE { 555 goto WAIT 556 } 557 558 c.Ui.Output(fmt.Sprintf("Caught signal: %v", sig)) 559 560 // Check if this is a SIGHUP 561 if sig == syscall.SIGHUP { 562 c.handleReload() 563 goto WAIT 564 } 565 566 // Check if we should do a graceful leave 567 graceful := false 568 if sig == os.Interrupt && c.agent.GetConfig().LeaveOnInt { 569 graceful = true 570 } else if sig == syscall.SIGTERM && c.agent.GetConfig().LeaveOnTerm { 571 graceful = true 572 } 573 574 // Bail fast if not doing a graceful leave 575 if !graceful { 576 return 1 577 } 578 579 // Attempt a graceful leave 580 gracefulCh := make(chan struct{}) 581 c.Ui.Output("Gracefully shutting down agent...") 582 go func() { 583 if err := c.agent.Leave(); err != nil { 584 c.Ui.Error(fmt.Sprintf("Error: %s", err)) 585 return 586 } 587 close(gracefulCh) 588 }() 589 590 // Wait for leave or another signal 591 select { 592 case <-signalCh: 593 return 1 594 case <-time.After(gracefulTimeout): 595 return 1 596 case <-gracefulCh: 597 return 0 598 } 599 } 600 601 // reloadHTTPServer shuts down the existing HTTP server and restarts it. This 602 // is helpful when reloading the agent configuration. 603 func (c *Command) reloadHTTPServer() error { 604 c.agent.logger.Println("[INFO] agent: Reloading HTTP server with new TLS configuration") 605 606 c.httpServer.Shutdown() 607 608 http, err := NewHTTPServer(c.agent, c.agent.config) 609 if err != nil { 610 return err 611 } 612 c.httpServer = http 613 614 return nil 615 } 616 617 // handleReload is invoked when we should reload our configs, e.g. SIGHUP 618 func (c *Command) handleReload() { 619 c.Ui.Output("Reloading configuration...") 620 newConf := c.readConfig() 621 if newConf == nil { 622 c.Ui.Error(fmt.Sprintf("Failed to reload configs")) 623 return 624 } 625 626 // Change the log level 627 minLevel := logutils.LogLevel(strings.ToUpper(newConf.LogLevel)) 628 if ValidateLevelFilter(minLevel, c.logFilter) { 629 c.logFilter.SetMinLevel(minLevel) 630 } else { 631 c.Ui.Error(fmt.Sprintf( 632 "Invalid log level: %s. Valid log levels are: %v", 633 minLevel, c.logFilter.Levels)) 634 635 // Keep the current log level 636 newConf.LogLevel = c.agent.GetConfig().LogLevel 637 } 638 639 shouldReloadAgent, shouldReloadHTTPServer := c.agent.ShouldReload(newConf) 640 if shouldReloadAgent { 641 c.agent.logger.Printf("[DEBUG] agent: starting reload of agent config") 642 err := c.agent.Reload(newConf) 643 if err != nil { 644 c.agent.logger.Printf("[ERR] agent: failed to reload the config: %v", err) 645 return 646 } 647 648 if s := c.agent.Server(); s != nil { 649 sconf, err := convertServerConfig(newConf, c.logOutput) 650 c.agent.logger.Printf("[DEBUG] agent: starting reload of server config") 651 if err != nil { 652 c.agent.logger.Printf("[ERR] agent: failed to convert server config: %v", err) 653 return 654 } else { 655 if err := s.Reload(sconf); err != nil { 656 c.agent.logger.Printf("[ERR] agent: reloading server config failed: %v", err) 657 return 658 } 659 } 660 } 661 662 if s := c.agent.Client(); s != nil { 663 clientConfig, err := c.agent.clientConfig() 664 c.agent.logger.Printf("[DEBUG] agent: starting reload of client config") 665 if err != nil { 666 c.agent.logger.Printf("[ERR] agent: reloading client config failed: %v", err) 667 return 668 } 669 if err := c.agent.Client().Reload(clientConfig); err != nil { 670 c.agent.logger.Printf("[ERR] agent: reloading client config failed: %v", err) 671 return 672 } 673 } 674 } 675 676 // reload HTTP server after we have reloaded both client and server, in case 677 // we error in either of the above cases. For example, reloading the http 678 // server to a TLS connection could succeed, while reloading the server's rpc 679 // connections could fail. 680 if shouldReloadHTTPServer { 681 err := c.reloadHTTPServer() 682 if err != nil { 683 c.agent.logger.Printf("[ERR] http: failed to reload the config: %v", err) 684 return 685 } 686 } 687 } 688 689 // setupTelemetry is used ot setup the telemetry sub-systems 690 func (c *Command) setupTelemetry(config *Config) (*metrics.InmemSink, error) { 691 /* Setup telemetry 692 Aggregate on 10 second intervals for 1 minute. Expose the 693 metrics over stderr when there is a SIGUSR1 received. 694 */ 695 inm := metrics.NewInmemSink(10*time.Second, time.Minute) 696 metrics.DefaultInmemSignal(inm) 697 698 var telConfig *Telemetry 699 if config.Telemetry == nil { 700 telConfig = &Telemetry{} 701 } else { 702 telConfig = config.Telemetry 703 } 704 705 metricsConf := metrics.DefaultConfig("nomad") 706 metricsConf.EnableHostname = !telConfig.DisableHostname 707 708 // Prefer the hostname as a label. 709 metricsConf.EnableHostnameLabel = !telConfig.DisableHostname && 710 !telConfig.DisableTaggedMetrics && !telConfig.BackwardsCompatibleMetrics 711 712 if telConfig.UseNodeName { 713 metricsConf.HostName = config.NodeName 714 metricsConf.EnableHostname = true 715 } 716 717 // Configure the statsite sink 718 var fanout metrics.FanoutSink 719 if telConfig.StatsiteAddr != "" { 720 sink, err := metrics.NewStatsiteSink(telConfig.StatsiteAddr) 721 if err != nil { 722 return inm, err 723 } 724 fanout = append(fanout, sink) 725 } 726 727 // Configure the statsd sink 728 if telConfig.StatsdAddr != "" { 729 sink, err := metrics.NewStatsdSink(telConfig.StatsdAddr) 730 if err != nil { 731 return inm, err 732 } 733 fanout = append(fanout, sink) 734 } 735 736 // Configure the prometheus sink 737 if telConfig.PrometheusMetrics { 738 promSink, err := prometheus.NewPrometheusSink() 739 if err != nil { 740 return inm, err 741 } 742 fanout = append(fanout, promSink) 743 } 744 745 // Configure the datadog sink 746 if telConfig.DataDogAddr != "" { 747 sink, err := datadog.NewDogStatsdSink(telConfig.DataDogAddr, config.NodeName) 748 if err != nil { 749 return inm, err 750 } 751 sink.SetTags(telConfig.DataDogTags) 752 fanout = append(fanout, sink) 753 } 754 755 // Configure the Circonus sink 756 if telConfig.CirconusAPIToken != "" || telConfig.CirconusCheckSubmissionURL != "" { 757 cfg := &circonus.Config{} 758 cfg.Interval = telConfig.CirconusSubmissionInterval 759 cfg.CheckManager.API.TokenKey = telConfig.CirconusAPIToken 760 cfg.CheckManager.API.TokenApp = telConfig.CirconusAPIApp 761 cfg.CheckManager.API.URL = telConfig.CirconusAPIURL 762 cfg.CheckManager.Check.SubmissionURL = telConfig.CirconusCheckSubmissionURL 763 cfg.CheckManager.Check.ID = telConfig.CirconusCheckID 764 cfg.CheckManager.Check.ForceMetricActivation = telConfig.CirconusCheckForceMetricActivation 765 cfg.CheckManager.Check.InstanceID = telConfig.CirconusCheckInstanceID 766 cfg.CheckManager.Check.SearchTag = telConfig.CirconusCheckSearchTag 767 cfg.CheckManager.Check.Tags = telConfig.CirconusCheckTags 768 cfg.CheckManager.Check.DisplayName = telConfig.CirconusCheckDisplayName 769 cfg.CheckManager.Broker.ID = telConfig.CirconusBrokerID 770 cfg.CheckManager.Broker.SelectTag = telConfig.CirconusBrokerSelectTag 771 772 if cfg.CheckManager.Check.DisplayName == "" { 773 cfg.CheckManager.Check.DisplayName = "Nomad" 774 } 775 776 if cfg.CheckManager.API.TokenApp == "" { 777 cfg.CheckManager.API.TokenApp = "nomad" 778 } 779 780 if cfg.CheckManager.Check.SearchTag == "" { 781 cfg.CheckManager.Check.SearchTag = "service:nomad" 782 } 783 784 sink, err := circonus.NewCirconusSink(cfg) 785 if err != nil { 786 return inm, err 787 } 788 sink.Start() 789 fanout = append(fanout, sink) 790 } 791 792 // Initialize the global sink 793 if len(fanout) > 0 { 794 fanout = append(fanout, inm) 795 metrics.NewGlobal(metricsConf, fanout) 796 } else { 797 metricsConf.EnableHostname = false 798 metrics.NewGlobal(metricsConf, inm) 799 } 800 return inm, nil 801 } 802 803 func (c *Command) startupJoin(config *Config) error { 804 if len(config.Server.StartJoin) == 0 || !config.Server.Enabled { 805 return nil 806 } 807 808 c.Ui.Output("Joining cluster...") 809 n, err := c.agent.server.Join(config.Server.StartJoin) 810 if err != nil { 811 return err 812 } 813 814 c.Ui.Info(fmt.Sprintf("Join completed. Synced with %d initial agents", n)) 815 return nil 816 } 817 818 // retryJoin is used to handle retrying a join until it succeeds or all retries 819 // are exhausted. 820 func (c *Command) retryJoin(config *Config) { 821 if len(config.Server.RetryJoin) == 0 || !config.Server.Enabled { 822 return 823 } 824 825 logger := c.agent.logger 826 logger.Printf("[INFO] agent: Joining cluster...") 827 828 attempt := 0 829 for { 830 n, err := c.agent.server.Join(config.Server.RetryJoin) 831 if err == nil { 832 logger.Printf("[INFO] agent: Join completed. Synced with %d initial agents", n) 833 return 834 } 835 836 attempt++ 837 if config.Server.RetryMaxAttempts > 0 && attempt > config.Server.RetryMaxAttempts { 838 logger.Printf("[ERR] agent: max join retry exhausted, exiting") 839 close(c.retryJoinErrCh) 840 return 841 } 842 843 logger.Printf("[WARN] agent: Join failed: %v, retrying in %v", err, 844 config.Server.RetryInterval) 845 time.Sleep(config.Server.retryInterval) 846 } 847 } 848 849 func (c *Command) Synopsis() string { 850 return "Runs a Nomad agent" 851 } 852 853 func (c *Command) Help() string { 854 helpText := ` 855 Usage: nomad agent [options] 856 857 Starts the Nomad agent and runs until an interrupt is received. 858 The agent may be a client and/or server. 859 860 The Nomad agent's configuration primarily comes from the config 861 files used, but a subset of the options may also be passed directly 862 as CLI arguments, listed below. 863 864 General Options (clients and servers): 865 866 -bind=<addr> 867 The address the agent will bind to for all of its various network 868 services. The individual services that run bind to individual 869 ports on this address. Defaults to the loopback 127.0.0.1. 870 871 -config=<path> 872 The path to either a single config file or a directory of config 873 files to use for configuring the Nomad agent. This option may be 874 specified multiple times. If multiple config files are used, the 875 values from each will be merged together. During merging, values 876 from files found later in the list are merged over values from 877 previously parsed files. 878 879 -data-dir=<path> 880 The data directory used to store state and other persistent data. 881 On client machines this is used to house allocation data such as 882 downloaded artifacts used by drivers. On server nodes, the data 883 dir is also used to store the replicated log. 884 885 -dc=<datacenter> 886 The name of the datacenter this Nomad agent is a member of. By 887 default this is set to "dc1". 888 889 -log-level=<level> 890 Specify the verbosity level of Nomad's logs. Valid values include 891 DEBUG, INFO, and WARN, in decreasing order of verbosity. The 892 default is INFO. 893 894 -node=<name> 895 The name of the local agent. This name is used to identify the node 896 in the cluster. The name must be unique per region. The default is 897 the current hostname of the machine. 898 899 -region=<region> 900 Name of the region the Nomad agent will be a member of. By default 901 this value is set to "global". 902 903 -dev 904 Start the agent in development mode. This enables a pre-configured 905 dual-role agent (client + server) which is useful for developing 906 or testing Nomad. No other configuration is required to start the 907 agent in this mode. 908 909 Server Options: 910 911 -server 912 Enable server mode for the agent. Agents in server mode are 913 clustered together and handle the additional responsibility of 914 leader election, data replication, and scheduling work onto 915 eligible client nodes. 916 917 -bootstrap-expect=<num> 918 Configures the expected number of servers nodes to wait for before 919 bootstrapping the cluster. Once <num> servers have joined eachother, 920 Nomad initiates the bootstrap process. 921 922 -encrypt=<key> 923 Provides the gossip encryption key 924 925 -join=<address> 926 Address of an agent to join at start time. Can be specified 927 multiple times. 928 929 -raft-protocol=<num> 930 The Raft protocol version to use. Used for enabling certain Autopilot 931 features. Defaults to 2. 932 933 -retry-join=<address> 934 Address of an agent to join at start time with retries enabled. 935 Can be specified multiple times. 936 937 -retry-max=<num> 938 Maximum number of join attempts. Defaults to 0, which will retry 939 indefinitely. 940 941 -retry-interval=<dur> 942 Time to wait between join attempts. 943 944 -rejoin 945 Ignore a previous leave and attempts to rejoin the cluster. 946 947 Client Options: 948 949 -client 950 Enable client mode for the agent. Client mode enables a given node to be 951 evaluated for allocations. If client mode is not enabled, no work will be 952 scheduled to the agent. 953 954 -state-dir 955 The directory used to store state and other persistent data. If not 956 specified a subdirectory under the "-data-dir" will be used. 957 958 -alloc-dir 959 The directory used to store allocation data such as downloaded artificats as 960 well as data produced by tasks. If not specified, a subdirectory under the 961 "-data-dir" will be used. 962 963 -servers 964 A list of known server addresses to connect to given as "host:port" and 965 delimited by commas. 966 967 -node-class 968 Mark this node as a member of a node-class. This can be used to label 969 similar node types. 970 971 -meta 972 User specified metadata to associated with the node. Each instance of -meta 973 parses a single KEY=VALUE pair. Repeat the meta flag for each key/value pair 974 to be added. 975 976 -network-interface 977 Forces the network fingerprinter to use the specified network interface. 978 979 -network-speed 980 The default speed for network interfaces in MBits if the link speed can not 981 be determined dynamically. 982 983 ACL Options: 984 985 -acl-enabled 986 Specifies whether the agent should enable ACLs. 987 988 -acl-replication-token 989 The replication token for servers to use when replicating from the 990 authoratative region. The token must be a valid management token from the 991 authoratative region. 992 993 Consul Options: 994 995 -consul-address=<addr> 996 Specifies the address to the local Consul agent, given in the format host:port. 997 Supports Unix sockets with the format: unix:///tmp/consul/consul.sock 998 999 -consul-auth=<auth> 1000 Specifies the HTTP Basic Authentication information to use for access to the 1001 Consul Agent, given in the format username:password. 1002 1003 -consul-auto-advertise 1004 Specifies if Nomad should advertise its services in Consul. The services 1005 are named according to server_service_name and client_service_name. Nomad 1006 servers and clients advertise their respective services, each tagged 1007 appropriately with either http or rpc tag. Nomad servers also advertise a 1008 serf tagged service. 1009 1010 -consul-ca-file=<path> 1011 Specifies an optional path to the CA certificate used for Consul communication. 1012 This defaults to the system bundle if unspecified. 1013 1014 -consul-cert-file=<path> 1015 Specifies the path to the certificate used for Consul communication. If this 1016 is set then you need to also set key_file. 1017 1018 -consul-checks-use-advertise 1019 Specifies if Consul heath checks should bind to the advertise address. By 1020 default, this is the bind address. 1021 1022 -consul-client-auto-join 1023 Specifies if the Nomad clients should automatically discover servers in the 1024 same region by searching for the Consul service name defined in the 1025 server_service_name option. 1026 1027 -consul-client-service-name=<name> 1028 Specifies the name of the service in Consul for the Nomad clients. 1029 1030 -consul-key-file=<path> 1031 Specifies the path to the private key used for Consul communication. If this 1032 is set then you need to also set cert_file. 1033 1034 -consul-server-service-name=<name> 1035 Specifies the name of the service in Consul for the Nomad servers. 1036 1037 -consul-server-auto-join 1038 Specifies if the Nomad servers should automatically discover and join other 1039 Nomad servers by searching for the Consul service name defined in the 1040 server_service_name option. This search only happens if the server does not 1041 have a leader. 1042 1043 -consul-ssl 1044 Specifies if the transport scheme should use HTTPS to communicate with the 1045 Consul agent. 1046 1047 -consul-token=<token> 1048 Specifies the token used to provide a per-request ACL token. 1049 1050 -consul-verify-ssl 1051 Specifies if SSL peer verification should be used when communicating to the 1052 Consul API client over HTTPS. 1053 1054 Vault Options: 1055 1056 -vault-enabled 1057 Whether to enable or disable Vault integration. 1058 1059 -vault-address=<addr> 1060 The address to communicate with Vault. This should be provided with the http:// 1061 or https:// prefix. 1062 1063 -vault-token=<token> 1064 The Vault token used to derive tokens from Vault on behalf of clients. 1065 This only needs to be set on Servers. Overrides the Vault token read from 1066 the VAULT_TOKEN environment variable. 1067 1068 -vault-create-from-role=<role> 1069 The role name to create tokens for tasks from. 1070 1071 -vault-allow-unauthenticated 1072 Whether to allow jobs to be sumbitted that request Vault Tokens but do not 1073 authentication. The flag only applies to Servers. 1074 1075 -vault-ca-file=<path> 1076 The path to a PEM-encoded CA cert file to use to verify the Vault server SSL 1077 certificate. 1078 1079 -vault-ca-path=<path> 1080 The path to a directory of PEM-encoded CA cert files to verify the Vault server 1081 certificate. 1082 1083 -vault-cert-file=<token> 1084 The path to the certificate for Vault communication. 1085 1086 -vault-key-file=<addr> 1087 The path to the private key for Vault communication. 1088 1089 -vault-tls-skip-verify=<token> 1090 Enables or disables SSL certificate verification. 1091 1092 -vault-tls-server-name=<token> 1093 Used to set the SNI host when connecting over TLS. 1094 ` 1095 return strings.TrimSpace(helpText) 1096 }