github.com/remilapeyre/nomad@v0.8.5/command/agent/command.go (about) 1 package agent 2 3 import ( 4 "flag" 5 "fmt" 6 "io" 7 "log" 8 "os" 9 "os/signal" 10 "path/filepath" 11 "reflect" 12 "sort" 13 "strconv" 14 "strings" 15 "syscall" 16 "time" 17 18 metrics "github.com/armon/go-metrics" 19 "github.com/armon/go-metrics/circonus" 20 "github.com/armon/go-metrics/datadog" 21 "github.com/armon/go-metrics/prometheus" 22 "github.com/hashicorp/consul/lib" 23 checkpoint "github.com/hashicorp/go-checkpoint" 24 discover "github.com/hashicorp/go-discover" 25 gsyslog "github.com/hashicorp/go-syslog" 26 "github.com/hashicorp/logutils" 27 flaghelper "github.com/hashicorp/nomad/helper/flag-helpers" 28 gatedwriter "github.com/hashicorp/nomad/helper/gated-writer" 29 "github.com/hashicorp/nomad/nomad/structs/config" 30 "github.com/hashicorp/nomad/version" 31 "github.com/mitchellh/cli" 32 "github.com/posener/complete" 33 ) 34 35 // gracefulTimeout controls how long we wait before forcefully terminating 36 const gracefulTimeout = 5 * time.Second 37 38 // Command is a Command implementation that runs a Nomad agent. 39 // The command will not end unless a shutdown message is sent on the 40 // ShutdownCh. If two messages are sent on the ShutdownCh it will forcibly 41 // exit. 42 type Command struct { 43 Version *version.VersionInfo 44 Ui cli.Ui 45 ShutdownCh <-chan struct{} 46 47 args []string 48 agent *Agent 49 httpServer *HTTPServer 50 logFilter *logutils.LevelFilter 51 logOutput io.Writer 52 retryJoinErrCh chan struct{} 53 } 54 55 func (c *Command) readConfig() *Config { 56 var dev bool 57 var configPath []string 58 var servers string 59 var meta []string 60 61 // Make a new, empty config. 62 cmdConfig := &Config{ 63 Client: &ClientConfig{}, 64 Consul: &config.ConsulConfig{}, 65 Ports: &Ports{}, 66 Server: &ServerConfig{ 67 ServerJoin: &ServerJoin{}, 68 }, 69 Vault: &config.VaultConfig{}, 70 ACL: &ACLConfig{}, 71 } 72 73 flags := flag.NewFlagSet("agent", flag.ContinueOnError) 74 flags.Usage = func() { c.Ui.Error(c.Help()) } 75 76 // Role options 77 flags.BoolVar(&dev, "dev", false, "") 78 flags.BoolVar(&cmdConfig.Server.Enabled, "server", false, "") 79 flags.BoolVar(&cmdConfig.Client.Enabled, "client", false, "") 80 81 // Server-only options 82 flags.IntVar(&cmdConfig.Server.BootstrapExpect, "bootstrap-expect", 0, "") 83 flags.StringVar(&cmdConfig.Server.EncryptKey, "encrypt", "", "gossip encryption key") 84 flags.IntVar(&cmdConfig.Server.RaftProtocol, "raft-protocol", 0, "") 85 flags.BoolVar(&cmdConfig.Server.RejoinAfterLeave, "rejoin", false, "") 86 flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.ServerJoin.StartJoin), "join", "") 87 flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.ServerJoin.RetryJoin), "retry-join", "") 88 flags.IntVar(&cmdConfig.Server.ServerJoin.RetryMaxAttempts, "retry-max", 0, "") 89 flags.Var((flaghelper.FuncDurationVar)(func(d time.Duration) error { 90 cmdConfig.Server.ServerJoin.RetryInterval = d 91 return nil 92 }), "retry-interval", "") 93 94 // Client-only options 95 flags.StringVar(&cmdConfig.Client.StateDir, "state-dir", "", "") 96 flags.StringVar(&cmdConfig.Client.AllocDir, "alloc-dir", "", "") 97 flags.StringVar(&cmdConfig.Client.NodeClass, "node-class", "", "") 98 flags.StringVar(&servers, "servers", "", "") 99 flags.Var((*flaghelper.StringFlag)(&meta), "meta", "") 100 flags.StringVar(&cmdConfig.Client.NetworkInterface, "network-interface", "", "") 101 flags.IntVar(&cmdConfig.Client.NetworkSpeed, "network-speed", 0, "") 102 103 // General options 104 flags.Var((*flaghelper.StringFlag)(&configPath), "config", "config") 105 flags.StringVar(&cmdConfig.BindAddr, "bind", "", "") 106 flags.StringVar(&cmdConfig.Region, "region", "", "") 107 flags.StringVar(&cmdConfig.DataDir, "data-dir", "", "") 108 flags.StringVar(&cmdConfig.Datacenter, "dc", "", "") 109 flags.StringVar(&cmdConfig.LogLevel, "log-level", "", "") 110 flags.StringVar(&cmdConfig.NodeName, "node", "", "") 111 112 // Consul options 113 flags.StringVar(&cmdConfig.Consul.Auth, "consul-auth", "", "") 114 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 115 cmdConfig.Consul.AutoAdvertise = &b 116 return nil 117 }), "consul-auto-advertise", "") 118 flags.StringVar(&cmdConfig.Consul.CAFile, "consul-ca-file", "", "") 119 flags.StringVar(&cmdConfig.Consul.CertFile, "consul-cert-file", "", "") 120 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 121 cmdConfig.Consul.ChecksUseAdvertise = &b 122 return nil 123 }), "consul-checks-use-advertise", "") 124 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 125 cmdConfig.Consul.ClientAutoJoin = &b 126 return nil 127 }), "consul-client-auto-join", "") 128 flags.StringVar(&cmdConfig.Consul.ClientServiceName, "consul-client-service-name", "", "") 129 flags.StringVar(&cmdConfig.Consul.ClientHTTPCheckName, "consul-client-http-check-name", "", "") 130 flags.StringVar(&cmdConfig.Consul.KeyFile, "consul-key-file", "", "") 131 flags.StringVar(&cmdConfig.Consul.ServerServiceName, "consul-server-service-name", "", "") 132 flags.StringVar(&cmdConfig.Consul.ServerHTTPCheckName, "consul-server-http-check-name", "", "") 133 flags.StringVar(&cmdConfig.Consul.ServerSerfCheckName, "consul-server-serf-check-name", "", "") 134 flags.StringVar(&cmdConfig.Consul.ServerRPCCheckName, "consul-server-rpc-check-name", "", "") 135 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 136 cmdConfig.Consul.ServerAutoJoin = &b 137 return nil 138 }), "consul-server-auto-join", "") 139 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 140 cmdConfig.Consul.EnableSSL = &b 141 return nil 142 }), "consul-ssl", "") 143 flags.StringVar(&cmdConfig.Consul.Token, "consul-token", "", "") 144 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 145 cmdConfig.Consul.VerifySSL = &b 146 return nil 147 }), "consul-verify-ssl", "") 148 flags.StringVar(&cmdConfig.Consul.Addr, "consul-address", "", "") 149 150 // Vault options 151 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 152 cmdConfig.Vault.Enabled = &b 153 return nil 154 }), "vault-enabled", "") 155 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 156 cmdConfig.Vault.AllowUnauthenticated = &b 157 return nil 158 }), "vault-allow-unauthenticated", "") 159 flags.StringVar(&cmdConfig.Vault.Token, "vault-token", "", "") 160 flags.StringVar(&cmdConfig.Vault.Addr, "vault-address", "", "") 161 flags.StringVar(&cmdConfig.Vault.Role, "vault-create-from-role", "", "") 162 flags.StringVar(&cmdConfig.Vault.TLSCaFile, "vault-ca-file", "", "") 163 flags.StringVar(&cmdConfig.Vault.TLSCaPath, "vault-ca-path", "", "") 164 flags.StringVar(&cmdConfig.Vault.TLSCertFile, "vault-cert-file", "", "") 165 flags.StringVar(&cmdConfig.Vault.TLSKeyFile, "vault-key-file", "", "") 166 flags.Var((flaghelper.FuncBoolVar)(func(b bool) error { 167 cmdConfig.Vault.TLSSkipVerify = &b 168 return nil 169 }), "vault-tls-skip-verify", "") 170 flags.StringVar(&cmdConfig.Vault.TLSServerName, "vault-tls-server-name", "", "") 171 172 // ACL options 173 flags.BoolVar(&cmdConfig.ACL.Enabled, "acl-enabled", false, "") 174 flags.StringVar(&cmdConfig.ACL.ReplicationToken, "acl-replication-token", "", "") 175 176 if err := flags.Parse(c.args); err != nil { 177 return nil 178 } 179 180 // Split the servers. 181 if servers != "" { 182 cmdConfig.Client.Servers = strings.Split(servers, ",") 183 } 184 185 // Parse the meta flags. 186 metaLength := len(meta) 187 if metaLength != 0 { 188 cmdConfig.Client.Meta = make(map[string]string, metaLength) 189 for _, kv := range meta { 190 parts := strings.SplitN(kv, "=", 2) 191 if len(parts) != 2 { 192 c.Ui.Error(fmt.Sprintf("Error parsing Client.Meta value: %v", kv)) 193 return nil 194 } 195 196 cmdConfig.Client.Meta[parts[0]] = parts[1] 197 } 198 } 199 200 // Load the configuration 201 var config *Config 202 if dev { 203 config = DevConfig() 204 } else { 205 config = DefaultConfig() 206 } 207 208 // Merge in the enterprise overlay 209 config.Merge(DefaultEntConfig()) 210 211 for _, path := range configPath { 212 current, err := LoadConfig(path) 213 if err != nil { 214 c.Ui.Error(fmt.Sprintf( 215 "Error loading configuration from %s: %s", path, err)) 216 return nil 217 } 218 219 // The user asked us to load some config here but we didn't find any, 220 // so we'll complain but continue. 221 if current == nil || reflect.DeepEqual(current, &Config{}) { 222 c.Ui.Warn(fmt.Sprintf("No configuration loaded from %s", path)) 223 } 224 225 if config == nil { 226 config = current 227 } else { 228 config = config.Merge(current) 229 } 230 } 231 232 // Ensure the sub-structs at least exist 233 if config.Client == nil { 234 config.Client = &ClientConfig{} 235 } 236 if config.Server == nil { 237 config.Server = &ServerConfig{} 238 } 239 240 // Merge any CLI options over config file options 241 config = config.Merge(cmdConfig) 242 243 // Set the version info 244 config.Version = c.Version 245 246 // Normalize binds, ports, addresses, and advertise 247 if err := config.normalizeAddrs(); err != nil { 248 c.Ui.Error(err.Error()) 249 return nil 250 } 251 252 // Check to see if we should read the Vault token from the environment 253 if config.Vault.Token == "" { 254 if token, ok := os.LookupEnv("VAULT_TOKEN"); ok { 255 config.Vault.Token = token 256 } 257 } 258 259 if dev { 260 // Skip validation for dev mode 261 return config 262 } 263 264 if config.Server.EncryptKey != "" { 265 if _, err := config.Server.EncryptBytes(); err != nil { 266 c.Ui.Error(fmt.Sprintf("Invalid encryption key: %s", err)) 267 return nil 268 } 269 keyfile := filepath.Join(config.DataDir, serfKeyring) 270 if _, err := os.Stat(keyfile); err == nil { 271 c.Ui.Warn("WARNING: keyring exists but -encrypt given, using keyring") 272 } 273 } 274 275 // Check that the server is running in at least one mode. 276 if !(config.Server.Enabled || config.Client.Enabled) { 277 c.Ui.Error("Must specify either server, client or dev mode for the agent.") 278 return nil 279 } 280 281 // Verify the paths are absolute. 282 dirs := map[string]string{ 283 "data-dir": config.DataDir, 284 "alloc-dir": config.Client.AllocDir, 285 "state-dir": config.Client.StateDir, 286 } 287 for k, dir := range dirs { 288 if dir == "" { 289 continue 290 } 291 292 if !filepath.IsAbs(dir) { 293 c.Ui.Error(fmt.Sprintf("%s must be given as an absolute path: got %v", k, dir)) 294 return nil 295 } 296 } 297 298 // Ensure that we have the directories we neet to run. 299 if config.Server.Enabled && config.DataDir == "" { 300 c.Ui.Error("Must specify data directory") 301 return nil 302 } 303 304 // The config is valid if the top-level data-dir is set or if both 305 // alloc-dir and state-dir are set. 306 if config.Client.Enabled && config.DataDir == "" { 307 if config.Client.AllocDir == "" || config.Client.StateDir == "" { 308 c.Ui.Error("Must specify both the state and alloc dir if data-dir is omitted.") 309 return nil 310 } 311 } 312 313 // Check the bootstrap flags 314 if config.Server.BootstrapExpect > 0 && !config.Server.Enabled { 315 c.Ui.Error("Bootstrap requires server mode to be enabled") 316 return nil 317 } 318 if config.Server.BootstrapExpect == 1 { 319 c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.") 320 } 321 322 // Set up the TLS configuration properly if we have one. 323 // XXX chelseakomlo: set up a TLSConfig New method which would wrap 324 // constructor-type actions like this. 325 if config.TLSConfig != nil && !config.TLSConfig.IsEmpty() { 326 if err := config.TLSConfig.SetChecksum(); err != nil { 327 c.Ui.Error(fmt.Sprintf("WARNING: Error when parsing TLS configuration: %v", err)) 328 } 329 } 330 331 return config 332 } 333 334 // setupLoggers is used to setup the logGate, logWriter, and our logOutput 335 func (c *Command) setupLoggers(config *Config) (*gatedwriter.Writer, *logWriter, io.Writer) { 336 // Setup logging. First create the gated log writer, which will 337 // store logs until we're ready to show them. Then create the level 338 // filter, filtering logs of the specified level. 339 logGate := &gatedwriter.Writer{ 340 Writer: &cli.UiWriter{Ui: c.Ui}, 341 } 342 343 c.logFilter = LevelFilter() 344 c.logFilter.MinLevel = logutils.LogLevel(strings.ToUpper(config.LogLevel)) 345 c.logFilter.Writer = logGate 346 if !ValidateLevelFilter(c.logFilter.MinLevel, c.logFilter) { 347 c.Ui.Error(fmt.Sprintf( 348 "Invalid log level: %s. Valid log levels are: %v", 349 c.logFilter.MinLevel, c.logFilter.Levels)) 350 return nil, nil, nil 351 } 352 353 // Check if syslog is enabled 354 var syslog io.Writer 355 if config.EnableSyslog { 356 l, err := gsyslog.NewLogger(gsyslog.LOG_NOTICE, config.SyslogFacility, "nomad") 357 if err != nil { 358 c.Ui.Error(fmt.Sprintf("Syslog setup failed: %v", err)) 359 return nil, nil, nil 360 } 361 syslog = &SyslogWrapper{l, c.logFilter} 362 } 363 364 // Create a log writer, and wrap a logOutput around it 365 logWriter := NewLogWriter(512) 366 var logOutput io.Writer 367 if syslog != nil { 368 logOutput = io.MultiWriter(c.logFilter, logWriter, syslog) 369 } else { 370 logOutput = io.MultiWriter(c.logFilter, logWriter) 371 } 372 c.logOutput = logOutput 373 log.SetOutput(logOutput) 374 return logGate, logWriter, logOutput 375 } 376 377 // setupAgent is used to start the agent and various interfaces 378 func (c *Command) setupAgent(config *Config, logOutput io.Writer, inmem *metrics.InmemSink) error { 379 c.Ui.Output("Starting Nomad agent...") 380 agent, err := NewAgent(config, logOutput, inmem) 381 if err != nil { 382 c.Ui.Error(fmt.Sprintf("Error starting agent: %s", err)) 383 return err 384 } 385 c.agent = agent 386 387 // Setup the HTTP server 388 http, err := NewHTTPServer(agent, config) 389 if err != nil { 390 agent.Shutdown() 391 c.Ui.Error(fmt.Sprintf("Error starting http server: %s", err)) 392 return err 393 } 394 c.httpServer = http 395 396 // If DisableUpdateCheck is not enabled, set up update checking 397 // (DisableUpdateCheck is false by default) 398 if config.DisableUpdateCheck != nil && !*config.DisableUpdateCheck { 399 version := config.Version.Version 400 if config.Version.VersionPrerelease != "" { 401 version += fmt.Sprintf("-%s", config.Version.VersionPrerelease) 402 } 403 updateParams := &checkpoint.CheckParams{ 404 Product: "nomad", 405 Version: version, 406 } 407 if !config.DisableAnonymousSignature { 408 updateParams.SignatureFile = filepath.Join(config.DataDir, "checkpoint-signature") 409 } 410 411 // Schedule a periodic check with expected interval of 24 hours 412 checkpoint.CheckInterval(updateParams, 24*time.Hour, c.checkpointResults) 413 414 // Do an immediate check within the next 30 seconds 415 go func() { 416 time.Sleep(lib.RandomStagger(30 * time.Second)) 417 c.checkpointResults(checkpoint.Check(updateParams)) 418 }() 419 } 420 421 return nil 422 } 423 424 // checkpointResults is used to handler periodic results from our update checker 425 func (c *Command) checkpointResults(results *checkpoint.CheckResponse, err error) { 426 if err != nil { 427 c.Ui.Error(fmt.Sprintf("Failed to check for updates: %v", err)) 428 return 429 } 430 if results.Outdated { 431 c.Ui.Error(fmt.Sprintf("Newer Nomad version available: %s (currently running: %s)", results.CurrentVersion, c.Version.VersionNumber())) 432 } 433 for _, alert := range results.Alerts { 434 switch alert.Level { 435 case "info": 436 c.Ui.Info(fmt.Sprintf("Bulletin [%s]: %s (%s)", alert.Level, alert.Message, alert.URL)) 437 default: 438 c.Ui.Error(fmt.Sprintf("Bulletin [%s]: %s (%s)", alert.Level, alert.Message, alert.URL)) 439 } 440 } 441 } 442 443 func (c *Command) AutocompleteFlags() complete.Flags { 444 configFilePredictor := complete.PredictOr( 445 complete.PredictFiles("*.json"), 446 complete.PredictFiles("*.hcl")) 447 448 return map[string]complete.Predictor{ 449 "-config": configFilePredictor, 450 } 451 } 452 453 func (c *Command) AutocompleteArgs() complete.Predictor { 454 return nil 455 } 456 457 func (c *Command) Run(args []string) int { 458 c.Ui = &cli.PrefixedUi{ 459 OutputPrefix: "==> ", 460 InfoPrefix: " ", 461 ErrorPrefix: "==> ", 462 Ui: c.Ui, 463 } 464 465 // Parse our configs 466 c.args = args 467 config := c.readConfig() 468 if config == nil { 469 return 1 470 } 471 472 // Setup the log outputs 473 logGate, _, logOutput := c.setupLoggers(config) 474 if logGate == nil { 475 return 1 476 } 477 478 // Log config files 479 if len(config.Files) > 0 { 480 c.Ui.Output(fmt.Sprintf("Loaded configuration from %s", strings.Join(config.Files, ", "))) 481 } else { 482 c.Ui.Output("No configuration files loaded") 483 } 484 485 // Initialize the telemetry 486 inmem, err := c.setupTelemetry(config) 487 if err != nil { 488 c.Ui.Error(fmt.Sprintf("Error initializing telemetry: %s", err)) 489 return 1 490 } 491 492 // Create the agent 493 if err := c.setupAgent(config, logOutput, inmem); err != nil { 494 logGate.Flush() 495 return 1 496 } 497 defer c.agent.Shutdown() 498 499 // Shutdown the HTTP server at the end 500 defer func() { 501 if c.httpServer != nil { 502 c.httpServer.Shutdown() 503 } 504 }() 505 506 // Join startup nodes if specified 507 if err := c.startupJoin(config); err != nil { 508 c.Ui.Error(err.Error()) 509 return 1 510 } 511 512 // Compile agent information for output later 513 info := make(map[string]string) 514 info["version"] = config.Version.VersionNumber() 515 info["client"] = strconv.FormatBool(config.Client.Enabled) 516 info["log level"] = config.LogLevel 517 info["server"] = strconv.FormatBool(config.Server.Enabled) 518 info["region"] = fmt.Sprintf("%s (DC: %s)", config.Region, config.Datacenter) 519 info["bind addrs"] = c.getBindAddrSynopsis() 520 info["advertise addrs"] = c.getAdvertiseAddrSynopsis() 521 522 // Sort the keys for output 523 infoKeys := make([]string, 0, len(info)) 524 for key := range info { 525 infoKeys = append(infoKeys, key) 526 } 527 sort.Strings(infoKeys) 528 529 // Agent configuration output 530 padding := 18 531 c.Ui.Output("Nomad agent configuration:\n") 532 for _, k := range infoKeys { 533 c.Ui.Info(fmt.Sprintf( 534 "%s%s: %s", 535 strings.Repeat(" ", padding-len(k)), 536 strings.Title(k), 537 info[k])) 538 } 539 c.Ui.Output("") 540 541 // Output the header that the server has started 542 c.Ui.Output("Nomad agent started! Log data will stream in below:\n") 543 544 // Enable log streaming 545 logGate.Flush() 546 547 // Start retry join process 548 if err := c.handleRetryJoin(config); err != nil { 549 c.Ui.Error(err.Error()) 550 return 1 551 } 552 553 // Wait for exit 554 return c.handleSignals() 555 } 556 557 // handleRetryJoin is used to start retry joining if it is configured. 558 func (c *Command) handleRetryJoin(config *Config) error { 559 c.retryJoinErrCh = make(chan struct{}) 560 561 if config.Server.Enabled && len(config.Server.RetryJoin) != 0 { 562 joiner := retryJoiner{ 563 discover: &discover.Discover{}, 564 errCh: c.retryJoinErrCh, 565 logger: c.agent.logger, 566 serverJoin: c.agent.server.Join, 567 serverEnabled: true, 568 } 569 570 if err := joiner.Validate(config); err != nil { 571 return err 572 } 573 574 // Remove the duplicate fields 575 if len(config.Server.RetryJoin) != 0 { 576 config.Server.ServerJoin.RetryJoin = config.Server.RetryJoin 577 config.Server.RetryJoin = nil 578 } 579 if config.Server.RetryMaxAttempts != 0 { 580 config.Server.ServerJoin.RetryMaxAttempts = config.Server.RetryMaxAttempts 581 config.Server.RetryMaxAttempts = 0 582 } 583 if config.Server.RetryInterval != 0 { 584 config.Server.ServerJoin.RetryInterval = config.Server.RetryInterval 585 config.Server.RetryInterval = 0 586 } 587 588 c.agent.logger.Printf("[WARN] agent: Using deprecated retry_join fields. Upgrade configuration to use server_join") 589 } 590 591 if config.Server.Enabled && 592 config.Server.ServerJoin != nil && 593 len(config.Server.ServerJoin.RetryJoin) != 0 { 594 595 joiner := retryJoiner{ 596 discover: &discover.Discover{}, 597 errCh: c.retryJoinErrCh, 598 logger: c.agent.logger, 599 serverJoin: c.agent.server.Join, 600 serverEnabled: true, 601 } 602 603 if err := joiner.Validate(config); err != nil { 604 return err 605 } 606 607 go joiner.RetryJoin(config.Server.ServerJoin) 608 } 609 610 if config.Client.Enabled && 611 config.Client.ServerJoin != nil && 612 len(config.Client.ServerJoin.RetryJoin) != 0 { 613 joiner := retryJoiner{ 614 discover: &discover.Discover{}, 615 errCh: c.retryJoinErrCh, 616 logger: c.agent.logger, 617 clientJoin: c.agent.client.SetServers, 618 clientEnabled: true, 619 } 620 621 if err := joiner.Validate(config); err != nil { 622 return err 623 } 624 625 go joiner.RetryJoin(config.Client.ServerJoin) 626 } 627 628 return nil 629 } 630 631 // handleSignals blocks until we get an exit-causing signal 632 func (c *Command) handleSignals() int { 633 signalCh := make(chan os.Signal, 4) 634 signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGPIPE) 635 636 // Wait for a signal 637 WAIT: 638 var sig os.Signal 639 select { 640 case s := <-signalCh: 641 sig = s 642 case <-c.ShutdownCh: 643 sig = os.Interrupt 644 case <-c.retryJoinErrCh: 645 return 1 646 } 647 648 // Skip any SIGPIPE signal and don't try to log it (See issues #1798, #3554) 649 if sig == syscall.SIGPIPE { 650 goto WAIT 651 } 652 653 c.Ui.Output(fmt.Sprintf("Caught signal: %v", sig)) 654 655 // Check if this is a SIGHUP 656 if sig == syscall.SIGHUP { 657 c.handleReload() 658 goto WAIT 659 } 660 661 // Check if we should do a graceful leave 662 graceful := false 663 if sig == os.Interrupt && c.agent.GetConfig().LeaveOnInt { 664 graceful = true 665 } else if sig == syscall.SIGTERM && c.agent.GetConfig().LeaveOnTerm { 666 graceful = true 667 } 668 669 // Bail fast if not doing a graceful leave 670 if !graceful { 671 return 1 672 } 673 674 // Attempt a graceful leave 675 gracefulCh := make(chan struct{}) 676 c.Ui.Output("Gracefully shutting down agent...") 677 go func() { 678 if err := c.agent.Leave(); err != nil { 679 c.Ui.Error(fmt.Sprintf("Error: %s", err)) 680 return 681 } 682 close(gracefulCh) 683 }() 684 685 // Wait for leave or another signal 686 select { 687 case <-signalCh: 688 return 1 689 case <-time.After(gracefulTimeout): 690 return 1 691 case <-gracefulCh: 692 return 0 693 } 694 } 695 696 // reloadHTTPServer shuts down the existing HTTP server and restarts it. This 697 // is helpful when reloading the agent configuration. 698 func (c *Command) reloadHTTPServer() error { 699 c.agent.logger.Println("[INFO] agent: Reloading HTTP server with new TLS configuration") 700 701 c.httpServer.Shutdown() 702 703 http, err := NewHTTPServer(c.agent, c.agent.config) 704 if err != nil { 705 return err 706 } 707 c.httpServer = http 708 709 return nil 710 } 711 712 // handleReload is invoked when we should reload our configs, e.g. SIGHUP 713 func (c *Command) handleReload() { 714 c.Ui.Output("Reloading configuration...") 715 newConf := c.readConfig() 716 if newConf == nil { 717 c.Ui.Error(fmt.Sprintf("Failed to reload configs")) 718 return 719 } 720 721 // Change the log level 722 minLevel := logutils.LogLevel(strings.ToUpper(newConf.LogLevel)) 723 if ValidateLevelFilter(minLevel, c.logFilter) { 724 c.logFilter.SetMinLevel(minLevel) 725 } else { 726 c.Ui.Error(fmt.Sprintf( 727 "Invalid log level: %s. Valid log levels are: %v", 728 minLevel, c.logFilter.Levels)) 729 730 // Keep the current log level 731 newConf.LogLevel = c.agent.GetConfig().LogLevel 732 } 733 734 shouldReloadAgent, shouldReloadHTTP, shouldReloadRPC := c.agent.ShouldReload(newConf) 735 if shouldReloadAgent { 736 c.agent.logger.Printf("[DEBUG] agent: starting reload of agent config") 737 err := c.agent.Reload(newConf) 738 if err != nil { 739 c.agent.logger.Printf("[ERR] agent: failed to reload the config: %v", err) 740 return 741 } 742 } 743 744 if s := c.agent.Server(); s != nil { 745 c.agent.logger.Printf("[DEBUG] agent: starting reload of server config") 746 sconf, err := convertServerConfig(newConf, c.logOutput) 747 if err != nil { 748 c.agent.logger.Printf("[ERR] agent: failed to convert server config: %v", err) 749 return 750 } else { 751 if err := s.Reload(sconf); err != nil { 752 c.agent.logger.Printf("[ERR] agent: reloading server config failed: %v", err) 753 return 754 } 755 } 756 } 757 758 if shouldReloadRPC { 759 760 if s := c.agent.Client(); s != nil { 761 clientConfig, err := c.agent.clientConfig() 762 c.agent.logger.Printf("[DEBUG] agent: starting reload of client config") 763 if err != nil { 764 c.agent.logger.Printf("[ERR] agent: reloading client config failed: %v", err) 765 return 766 } 767 if err := c.agent.Client().Reload(clientConfig); err != nil { 768 c.agent.logger.Printf("[ERR] agent: reloading client config failed: %v", err) 769 return 770 } 771 } 772 } 773 774 // reload HTTP server after we have reloaded both client and server, in case 775 // we error in either of the above cases. For example, reloading the http 776 // server to a TLS connection could succeed, while reloading the server's rpc 777 // connections could fail. 778 if shouldReloadHTTP { 779 err := c.reloadHTTPServer() 780 if err != nil { 781 c.agent.logger.Printf("[ERR] http: failed to reload the config: %v", err) 782 return 783 } 784 } 785 } 786 787 // setupTelemetry is used ot setup the telemetry sub-systems 788 func (c *Command) setupTelemetry(config *Config) (*metrics.InmemSink, error) { 789 /* Setup telemetry 790 Aggregate on 10 second intervals for 1 minute. Expose the 791 metrics over stderr when there is a SIGUSR1 received. 792 */ 793 inm := metrics.NewInmemSink(10*time.Second, time.Minute) 794 metrics.DefaultInmemSignal(inm) 795 796 var telConfig *Telemetry 797 if config.Telemetry == nil { 798 telConfig = &Telemetry{} 799 } else { 800 telConfig = config.Telemetry 801 } 802 803 metricsConf := metrics.DefaultConfig("nomad") 804 metricsConf.EnableHostname = !telConfig.DisableHostname 805 806 // Prefer the hostname as a label. 807 metricsConf.EnableHostnameLabel = !telConfig.DisableHostname && 808 !telConfig.DisableTaggedMetrics && !telConfig.BackwardsCompatibleMetrics 809 810 if telConfig.UseNodeName { 811 metricsConf.HostName = config.NodeName 812 metricsConf.EnableHostname = true 813 } 814 815 // Configure the statsite sink 816 var fanout metrics.FanoutSink 817 if telConfig.StatsiteAddr != "" { 818 sink, err := metrics.NewStatsiteSink(telConfig.StatsiteAddr) 819 if err != nil { 820 return inm, err 821 } 822 fanout = append(fanout, sink) 823 } 824 825 // Configure the statsd sink 826 if telConfig.StatsdAddr != "" { 827 sink, err := metrics.NewStatsdSink(telConfig.StatsdAddr) 828 if err != nil { 829 return inm, err 830 } 831 fanout = append(fanout, sink) 832 } 833 834 // Configure the prometheus sink 835 if telConfig.PrometheusMetrics { 836 promSink, err := prometheus.NewPrometheusSink() 837 if err != nil { 838 return inm, err 839 } 840 fanout = append(fanout, promSink) 841 } 842 843 // Configure the datadog sink 844 if telConfig.DataDogAddr != "" { 845 sink, err := datadog.NewDogStatsdSink(telConfig.DataDogAddr, config.NodeName) 846 if err != nil { 847 return inm, err 848 } 849 sink.SetTags(telConfig.DataDogTags) 850 fanout = append(fanout, sink) 851 } 852 853 // Configure the Circonus sink 854 if telConfig.CirconusAPIToken != "" || telConfig.CirconusCheckSubmissionURL != "" { 855 cfg := &circonus.Config{} 856 cfg.Interval = telConfig.CirconusSubmissionInterval 857 cfg.CheckManager.API.TokenKey = telConfig.CirconusAPIToken 858 cfg.CheckManager.API.TokenApp = telConfig.CirconusAPIApp 859 cfg.CheckManager.API.URL = telConfig.CirconusAPIURL 860 cfg.CheckManager.Check.SubmissionURL = telConfig.CirconusCheckSubmissionURL 861 cfg.CheckManager.Check.ID = telConfig.CirconusCheckID 862 cfg.CheckManager.Check.ForceMetricActivation = telConfig.CirconusCheckForceMetricActivation 863 cfg.CheckManager.Check.InstanceID = telConfig.CirconusCheckInstanceID 864 cfg.CheckManager.Check.SearchTag = telConfig.CirconusCheckSearchTag 865 cfg.CheckManager.Check.Tags = telConfig.CirconusCheckTags 866 cfg.CheckManager.Check.DisplayName = telConfig.CirconusCheckDisplayName 867 cfg.CheckManager.Broker.ID = telConfig.CirconusBrokerID 868 cfg.CheckManager.Broker.SelectTag = telConfig.CirconusBrokerSelectTag 869 870 if cfg.CheckManager.Check.DisplayName == "" { 871 cfg.CheckManager.Check.DisplayName = "Nomad" 872 } 873 874 if cfg.CheckManager.API.TokenApp == "" { 875 cfg.CheckManager.API.TokenApp = "nomad" 876 } 877 878 if cfg.CheckManager.Check.SearchTag == "" { 879 cfg.CheckManager.Check.SearchTag = "service:nomad" 880 } 881 882 sink, err := circonus.NewCirconusSink(cfg) 883 if err != nil { 884 return inm, err 885 } 886 sink.Start() 887 fanout = append(fanout, sink) 888 } 889 890 // Initialize the global sink 891 if len(fanout) > 0 { 892 fanout = append(fanout, inm) 893 metrics.NewGlobal(metricsConf, fanout) 894 } else { 895 metricsConf.EnableHostname = false 896 metrics.NewGlobal(metricsConf, inm) 897 } 898 return inm, nil 899 } 900 901 func (c *Command) startupJoin(config *Config) error { 902 // Nothing to do 903 if !config.Server.Enabled { 904 return nil 905 } 906 907 // Validate both old and new aren't being set 908 old := len(config.Server.StartJoin) 909 var new int 910 if config.Server.ServerJoin != nil { 911 new = len(config.Server.ServerJoin.StartJoin) 912 } 913 if old != 0 && new != 0 { 914 return fmt.Errorf("server_join and start_join cannot both be defined; prefer setting the server_join stanza") 915 } 916 917 // Nothing to do 918 if old+new == 0 { 919 return nil 920 } 921 922 // Combine the lists and join 923 joining := config.Server.StartJoin 924 if new != 0 { 925 joining = append(joining, config.Server.ServerJoin.StartJoin...) 926 } 927 928 c.Ui.Output("Joining cluster...") 929 n, err := c.agent.server.Join(joining) 930 if err != nil { 931 return err 932 } 933 934 c.Ui.Output(fmt.Sprintf("Join completed. Synced with %d initial agents", n)) 935 return nil 936 } 937 938 // getBindAddrSynopsis returns a string that describes the addresses the agent 939 // is bound to. 940 func (c *Command) getBindAddrSynopsis() string { 941 if c == nil || c.agent == nil || c.agent.config == nil || c.agent.config.normalizedAddrs == nil { 942 return "" 943 } 944 945 b := new(strings.Builder) 946 fmt.Fprintf(b, "HTTP: %s", c.agent.config.normalizedAddrs.HTTP) 947 948 if c.agent.server != nil { 949 if c.agent.config.normalizedAddrs.RPC != "" { 950 fmt.Fprintf(b, "; RPC: %s", c.agent.config.normalizedAddrs.RPC) 951 } 952 if c.agent.config.normalizedAddrs.Serf != "" { 953 fmt.Fprintf(b, "; Serf: %s", c.agent.config.normalizedAddrs.Serf) 954 } 955 } 956 957 return b.String() 958 } 959 960 // getAdvertiseAddrSynopsis returns a string that describes the addresses the agent 961 // is advertising. 962 func (c *Command) getAdvertiseAddrSynopsis() string { 963 if c == nil || c.agent == nil || c.agent.config == nil || c.agent.config.AdvertiseAddrs == nil { 964 return "" 965 } 966 967 b := new(strings.Builder) 968 fmt.Fprintf(b, "HTTP: %s", c.agent.config.AdvertiseAddrs.HTTP) 969 970 if c.agent.server != nil { 971 if c.agent.config.AdvertiseAddrs.RPC != "" { 972 fmt.Fprintf(b, "; RPC: %s", c.agent.config.AdvertiseAddrs.RPC) 973 } 974 if c.agent.config.AdvertiseAddrs.Serf != "" { 975 fmt.Fprintf(b, "; Serf: %s", c.agent.config.AdvertiseAddrs.Serf) 976 } 977 } 978 979 return b.String() 980 } 981 982 func (c *Command) Synopsis() string { 983 return "Runs a Nomad agent" 984 } 985 986 func (c *Command) Help() string { 987 helpText := ` 988 Usage: nomad agent [options] 989 990 Starts the Nomad agent and runs until an interrupt is received. 991 The agent may be a client and/or server. 992 993 The Nomad agent's configuration primarily comes from the config 994 files used, but a subset of the options may also be passed directly 995 as CLI arguments, listed below. 996 997 General Options (clients and servers): 998 999 -bind=<addr> 1000 The address the agent will bind to for all of its various network 1001 services. The individual services that run bind to individual 1002 ports on this address. Defaults to the loopback 127.0.0.1. 1003 1004 -config=<path> 1005 The path to either a single config file or a directory of config 1006 files to use for configuring the Nomad agent. This option may be 1007 specified multiple times. If multiple config files are used, the 1008 values from each will be merged together. During merging, values 1009 from files found later in the list are merged over values from 1010 previously parsed files. 1011 1012 -data-dir=<path> 1013 The data directory used to store state and other persistent data. 1014 On client machines this is used to house allocation data such as 1015 downloaded artifacts used by drivers. On server nodes, the data 1016 dir is also used to store the replicated log. 1017 1018 -dc=<datacenter> 1019 The name of the datacenter this Nomad agent is a member of. By 1020 default this is set to "dc1". 1021 1022 -log-level=<level> 1023 Specify the verbosity level of Nomad's logs. Valid values include 1024 DEBUG, INFO, and WARN, in decreasing order of verbosity. The 1025 default is INFO. 1026 1027 -node=<name> 1028 The name of the local agent. This name is used to identify the node 1029 in the cluster. The name must be unique per region. The default is 1030 the current hostname of the machine. 1031 1032 -region=<region> 1033 Name of the region the Nomad agent will be a member of. By default 1034 this value is set to "global". 1035 1036 -dev 1037 Start the agent in development mode. This enables a pre-configured 1038 dual-role agent (client + server) which is useful for developing 1039 or testing Nomad. No other configuration is required to start the 1040 agent in this mode. 1041 1042 Server Options: 1043 1044 -server 1045 Enable server mode for the agent. Agents in server mode are 1046 clustered together and handle the additional responsibility of 1047 leader election, data replication, and scheduling work onto 1048 eligible client nodes. 1049 1050 -bootstrap-expect=<num> 1051 Configures the expected number of servers nodes to wait for before 1052 bootstrapping the cluster. Once <num> servers have joined each other, 1053 Nomad initiates the bootstrap process. 1054 1055 -encrypt=<key> 1056 Provides the gossip encryption key 1057 1058 -join=<address> 1059 Address of an agent to join at start time. Can be specified 1060 multiple times. 1061 1062 -raft-protocol=<num> 1063 The Raft protocol version to use. Used for enabling certain Autopilot 1064 features. Defaults to 2. 1065 1066 -retry-join=<address> 1067 Address of an agent to join at start time with retries enabled. 1068 Can be specified multiple times. 1069 1070 -retry-max=<num> 1071 Maximum number of join attempts. Defaults to 0, which will retry 1072 indefinitely. 1073 1074 -retry-interval=<dur> 1075 Time to wait between join attempts. 1076 1077 -rejoin 1078 Ignore a previous leave and attempts to rejoin the cluster. 1079 1080 Client Options: 1081 1082 -client 1083 Enable client mode for the agent. Client mode enables a given node to be 1084 evaluated for allocations. If client mode is not enabled, no work will be 1085 scheduled to the agent. 1086 1087 -state-dir 1088 The directory used to store state and other persistent data. If not 1089 specified a subdirectory under the "-data-dir" will be used. 1090 1091 -alloc-dir 1092 The directory used to store allocation data such as downloaded artifacts as 1093 well as data produced by tasks. If not specified, a subdirectory under the 1094 "-data-dir" will be used. 1095 1096 -servers 1097 A list of known server addresses to connect to given as "host:port" and 1098 delimited by commas. 1099 1100 -node-class 1101 Mark this node as a member of a node-class. This can be used to label 1102 similar node types. 1103 1104 -meta 1105 User specified metadata to associated with the node. Each instance of -meta 1106 parses a single KEY=VALUE pair. Repeat the meta flag for each key/value pair 1107 to be added. 1108 1109 -network-interface 1110 Forces the network fingerprinter to use the specified network interface. 1111 1112 -network-speed 1113 The default speed for network interfaces in MBits if the link speed can not 1114 be determined dynamically. 1115 1116 ACL Options: 1117 1118 -acl-enabled 1119 Specifies whether the agent should enable ACLs. 1120 1121 -acl-replication-token 1122 The replication token for servers to use when replicating from the 1123 authoritative region. The token must be a valid management token from the 1124 authoritative region. 1125 1126 Consul Options: 1127 1128 -consul-address=<addr> 1129 Specifies the address to the local Consul agent, given in the format host:port. 1130 Supports Unix sockets with the format: unix:///tmp/consul/consul.sock 1131 1132 -consul-auth=<auth> 1133 Specifies the HTTP Basic Authentication information to use for access to the 1134 Consul Agent, given in the format username:password. 1135 1136 -consul-auto-advertise 1137 Specifies if Nomad should advertise its services in Consul. The services 1138 are named according to server_service_name and client_service_name. Nomad 1139 servers and clients advertise their respective services, each tagged 1140 appropriately with either http or rpc tag. Nomad servers also advertise a 1141 serf tagged service. 1142 1143 -consul-ca-file=<path> 1144 Specifies an optional path to the CA certificate used for Consul communication. 1145 This defaults to the system bundle if unspecified. 1146 1147 -consul-cert-file=<path> 1148 Specifies the path to the certificate used for Consul communication. If this 1149 is set then you need to also set key_file. 1150 1151 -consul-checks-use-advertise 1152 Specifies if Consul heath checks should bind to the advertise address. By 1153 default, this is the bind address. 1154 1155 -consul-client-auto-join 1156 Specifies if the Nomad clients should automatically discover servers in the 1157 same region by searching for the Consul service name defined in the 1158 server_service_name option. 1159 1160 -consul-client-service-name=<name> 1161 Specifies the name of the service in Consul for the Nomad clients. 1162 1163 -consul-client-http-check-name=<name> 1164 Specifies the HTTP health check name in Consul for the Nomad clients. 1165 1166 -consul-key-file=<path> 1167 Specifies the path to the private key used for Consul communication. If this 1168 is set then you need to also set cert_file. 1169 1170 -consul-server-service-name=<name> 1171 Specifies the name of the service in Consul for the Nomad servers. 1172 1173 -consul-server-http-check-name=<name> 1174 Specifies the HTTP health check name in Consul for the Nomad servers. 1175 1176 -consul-server-serf-check-name=<name> 1177 Specifies the Serf health check name in Consul for the Nomad servers. 1178 1179 -consul-server-rpc-check-name=<name> 1180 Specifies the RPC health check name in Consul for the Nomad servers. 1181 1182 -consul-server-auto-join 1183 Specifies if the Nomad servers should automatically discover and join other 1184 Nomad servers by searching for the Consul service name defined in the 1185 server_service_name option. This search only happens if the server does not 1186 have a leader. 1187 1188 -consul-ssl 1189 Specifies if the transport scheme should use HTTPS to communicate with the 1190 Consul agent. 1191 1192 -consul-token=<token> 1193 Specifies the token used to provide a per-request ACL token. 1194 1195 -consul-verify-ssl 1196 Specifies if SSL peer verification should be used when communicating to the 1197 Consul API client over HTTPS. 1198 1199 Vault Options: 1200 1201 -vault-enabled 1202 Whether to enable or disable Vault integration. 1203 1204 -vault-address=<addr> 1205 The address to communicate with Vault. This should be provided with the http:// 1206 or https:// prefix. 1207 1208 -vault-token=<token> 1209 The Vault token used to derive tokens from Vault on behalf of clients. 1210 This only needs to be set on Servers. Overrides the Vault token read from 1211 the VAULT_TOKEN environment variable. 1212 1213 -vault-create-from-role=<role> 1214 The role name to create tokens for tasks from. 1215 1216 -vault-allow-unauthenticated 1217 Whether to allow jobs to be submitted that request Vault Tokens but do not 1218 authentication. The flag only applies to Servers. 1219 1220 -vault-ca-file=<path> 1221 The path to a PEM-encoded CA cert file to use to verify the Vault server SSL 1222 certificate. 1223 1224 -vault-ca-path=<path> 1225 The path to a directory of PEM-encoded CA cert files to verify the Vault server 1226 certificate. 1227 1228 -vault-cert-file=<token> 1229 The path to the certificate for Vault communication. 1230 1231 -vault-key-file=<addr> 1232 The path to the private key for Vault communication. 1233 1234 -vault-tls-skip-verify=<token> 1235 Enables or disables SSL certificate verification. 1236 1237 -vault-tls-server-name=<token> 1238 Used to set the SNI host when connecting over TLS. 1239 ` 1240 return strings.TrimSpace(helpText) 1241 }