github.com/kardianos/nomad@v0.1.3-0.20151022182107-b13df73ee850/command/agent/command.go (about) 1 package agent 2 3 import ( 4 "flag" 5 "fmt" 6 "io" 7 "os" 8 "os/signal" 9 "path/filepath" 10 "sort" 11 "strconv" 12 "strings" 13 "syscall" 14 "time" 15 16 "github.com/armon/go-metrics" 17 "github.com/hashicorp/go-checkpoint" 18 "github.com/hashicorp/go-syslog" 19 "github.com/hashicorp/logutils" 20 "github.com/hashicorp/nomad/helper/flag-slice" 21 "github.com/hashicorp/nomad/helper/gated-writer" 22 scada "github.com/hashicorp/scada-client" 23 "github.com/mitchellh/cli" 24 ) 25 26 // gracefulTimeout controls how long we wait before forcefully terminating 27 const gracefulTimeout = 5 * time.Second 28 29 // Command is a Command implementation that runs a Nomad agent. 30 // The command will not end unless a shutdown message is sent on the 31 // ShutdownCh. If two messages are sent on the ShutdownCh it will forcibly 32 // exit. 33 type Command struct { 34 Revision string 35 Version string 36 VersionPrerelease string 37 Ui cli.Ui 38 ShutdownCh <-chan struct{} 39 40 args []string 41 agent *Agent 42 httpServer *HTTPServer 43 logFilter *logutils.LevelFilter 44 logOutput io.Writer 45 46 scadaProvider *scada.Provider 47 scadaHttp *HTTPServer 48 } 49 50 func (c *Command) readConfig() *Config { 51 var dev bool 52 var configPath []string 53 var servers string 54 var meta []string 55 56 // Make a new, empty config. 57 cmdConfig := &Config{ 58 Atlas: &AtlasConfig{}, 59 Client: &ClientConfig{}, 60 Ports: &Ports{}, 61 Server: &ServerConfig{}, 62 } 63 64 flags := flag.NewFlagSet("agent", flag.ContinueOnError) 65 flags.Usage = func() { c.Ui.Error(c.Help()) } 66 67 // Role options 68 flags.BoolVar(&dev, "dev", false, "") 69 flags.BoolVar(&cmdConfig.Server.Enabled, "server", false, "") 70 flags.BoolVar(&cmdConfig.Client.Enabled, "client", false, "") 71 72 // Server-only options 73 flags.IntVar(&cmdConfig.Server.BootstrapExpect, "bootstrap-expect", 0, "") 74 75 // Client-only options 76 flags.StringVar(&cmdConfig.Client.StateDir, "state-dir", "", "") 77 flags.StringVar(&cmdConfig.Client.AllocDir, "alloc-dir", "", "") 78 flags.StringVar(&cmdConfig.Client.NodeID, "node-id", "", "") 79 flags.StringVar(&cmdConfig.Client.NodeClass, "node-class", "", "") 80 flags.StringVar(&servers, "servers", "", "") 81 flags.Var((*sliceflag.StringFlag)(&meta), "meta", "") 82 flags.StringVar(&cmdConfig.Client.NetworkInterface, "network-interface", "", "") 83 flags.IntVar(&cmdConfig.Client.NetworkSpeed, "network-speed", 0, "") 84 85 // General options 86 flags.Var((*sliceflag.StringFlag)(&configPath), "config", "config") 87 flags.StringVar(&cmdConfig.BindAddr, "bind", "", "") 88 flags.StringVar(&cmdConfig.Region, "region", "", "") 89 flags.StringVar(&cmdConfig.DataDir, "data-dir", "", "") 90 flags.StringVar(&cmdConfig.Datacenter, "dc", "", "") 91 flags.StringVar(&cmdConfig.LogLevel, "log-level", "", "") 92 flags.StringVar(&cmdConfig.NodeName, "node", "", "") 93 94 // Atlas options 95 flags.StringVar(&cmdConfig.Atlas.Infrastructure, "atlas", "", "") 96 flags.BoolVar(&cmdConfig.Atlas.Join, "atlas-join", false, "") 97 flags.StringVar(&cmdConfig.Atlas.Token, "atlas-token", "", "") 98 99 if err := flags.Parse(c.args); err != nil { 100 return nil 101 } 102 103 // Split the servers. 104 if servers != "" { 105 cmdConfig.Client.Servers = strings.Split(servers, ",") 106 } 107 108 // Parse the meta flags. 109 metaLength := len(meta) 110 if metaLength != 0 { 111 cmdConfig.Client.Meta = make(map[string]string, metaLength) 112 for _, kv := range meta { 113 parts := strings.SplitN(kv, "=", 2) 114 if len(parts) != 2 { 115 c.Ui.Error(fmt.Sprintf("Error parsing Client.Meta value: %v", kv)) 116 return nil 117 } 118 119 cmdConfig.Client.Meta[parts[0]] = parts[1] 120 } 121 } 122 123 // Load the configuration 124 var config *Config 125 if dev { 126 config = DevConfig() 127 } else { 128 config = DefaultConfig() 129 } 130 for _, path := range configPath { 131 current, err := LoadConfig(path) 132 if err != nil { 133 c.Ui.Error(fmt.Sprintf( 134 "Error loading configuration from %s: %s", path, err)) 135 return nil 136 } 137 138 if config == nil { 139 config = current 140 } else { 141 config = config.Merge(current) 142 } 143 } 144 145 // Ensure the sub-structs at least exist 146 if config.Atlas == nil { 147 config.Atlas = &AtlasConfig{} 148 } 149 if config.Client == nil { 150 config.Client = &ClientConfig{} 151 } 152 if config.Server == nil { 153 config.Server = &ServerConfig{} 154 } 155 156 // Merge any CLI options over config file options 157 config = config.Merge(cmdConfig) 158 159 // Set the version info 160 config.Revision = c.Revision 161 config.Version = c.Version 162 config.VersionPrerelease = c.VersionPrerelease 163 164 if dev { 165 // Skip validation for dev mode 166 return config 167 } 168 169 // Check that the server is running in at least one mode. 170 if !(config.Server.Enabled || config.Client.Enabled) { 171 c.Ui.Error("Must specify either server, client or dev mode for the agent.") 172 return nil 173 } 174 175 // Ensure that we have the directories we neet to run. 176 if config.Server.Enabled && config.DataDir == "" { 177 c.Ui.Error("Must specify data directory") 178 return nil 179 } 180 181 // The config is valid if the top-level data-dir is set or if both 182 // alloc-dir and state-dir are set. 183 if config.Client.Enabled && config.DataDir == "" { 184 if config.Client.AllocDir == "" || config.Client.StateDir == "" { 185 c.Ui.Error("Must specify both the state and alloc dir if data-dir is omitted.") 186 return nil 187 } 188 } 189 190 // Check the bootstrap flags 191 if config.Server.BootstrapExpect > 0 && !config.Server.Enabled { 192 c.Ui.Error("Bootstrap requires server mode to be enabled") 193 return nil 194 } 195 if config.Server.BootstrapExpect == 1 { 196 c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.") 197 } 198 199 return config 200 } 201 202 // setupLoggers is used to setup the logGate, logWriter, and our logOutput 203 func (c *Command) setupLoggers(config *Config) (*gatedwriter.Writer, *logWriter, io.Writer) { 204 // Setup logging. First create the gated log writer, which will 205 // store logs until we're ready to show them. Then create the level 206 // filter, filtering logs of the specified level. 207 logGate := &gatedwriter.Writer{ 208 Writer: &cli.UiWriter{Ui: c.Ui}, 209 } 210 211 c.logFilter = LevelFilter() 212 c.logFilter.MinLevel = logutils.LogLevel(strings.ToUpper(config.LogLevel)) 213 c.logFilter.Writer = logGate 214 if !ValidateLevelFilter(c.logFilter.MinLevel, c.logFilter) { 215 c.Ui.Error(fmt.Sprintf( 216 "Invalid log level: %s. Valid log levels are: %v", 217 c.logFilter.MinLevel, c.logFilter.Levels)) 218 return nil, nil, nil 219 } 220 221 // Check if syslog is enabled 222 var syslog io.Writer 223 if config.EnableSyslog { 224 l, err := gsyslog.NewLogger(gsyslog.LOG_NOTICE, config.SyslogFacility, "nomad") 225 if err != nil { 226 c.Ui.Error(fmt.Sprintf("Syslog setup failed: %v", err)) 227 return nil, nil, nil 228 } 229 syslog = &SyslogWrapper{l, c.logFilter} 230 } 231 232 // Create a log writer, and wrap a logOutput around it 233 logWriter := NewLogWriter(512) 234 var logOutput io.Writer 235 if syslog != nil { 236 logOutput = io.MultiWriter(c.logFilter, logWriter, syslog) 237 } else { 238 logOutput = io.MultiWriter(c.logFilter, logWriter) 239 } 240 c.logOutput = logOutput 241 return logGate, logWriter, logOutput 242 } 243 244 // setupAgent is used to start the agent and various interfaces 245 func (c *Command) setupAgent(config *Config, logOutput io.Writer) error { 246 c.Ui.Output("Starting Nomad agent...") 247 agent, err := NewAgent(config, logOutput) 248 if err != nil { 249 c.Ui.Error(fmt.Sprintf("Error starting agent: %s", err)) 250 return err 251 } 252 c.agent = agent 253 254 // Enable the SCADA integration 255 if err := c.setupSCADA(config); err != nil { 256 agent.Shutdown() 257 c.Ui.Error(fmt.Sprintf("Error starting SCADA: %s", err)) 258 return err 259 } 260 261 // Setup the HTTP server 262 http, err := NewHTTPServer(agent, config, logOutput) 263 if err != nil { 264 agent.Shutdown() 265 c.Ui.Error(fmt.Sprintf("Error starting http server: %s", err)) 266 return err 267 } 268 c.httpServer = http 269 270 // Setup update checking 271 if !config.DisableUpdateCheck { 272 version := config.Version 273 if config.VersionPrerelease != "" { 274 version += fmt.Sprintf("-%s", config.VersionPrerelease) 275 } 276 updateParams := &checkpoint.CheckParams{ 277 Product: "nomad", 278 Version: version, 279 } 280 if !config.DisableAnonymousSignature { 281 updateParams.SignatureFile = filepath.Join(config.DataDir, "checkpoint-signature") 282 } 283 284 // Schedule a periodic check with expected interval of 24 hours 285 checkpoint.CheckInterval(updateParams, 24*time.Hour, c.checkpointResults) 286 287 // Do an immediate check within the next 30 seconds 288 go func() { 289 time.Sleep(randomStagger(30 * time.Second)) 290 c.checkpointResults(checkpoint.Check(updateParams)) 291 }() 292 } 293 return nil 294 } 295 296 // checkpointResults is used to handler periodic results from our update checker 297 func (c *Command) checkpointResults(results *checkpoint.CheckResponse, err error) { 298 if err != nil { 299 c.Ui.Error(fmt.Sprintf("Failed to check for updates: %v", err)) 300 return 301 } 302 if results.Outdated { 303 c.Ui.Error(fmt.Sprintf("Newer Nomad version available: %s", results.CurrentVersion)) 304 } 305 for _, alert := range results.Alerts { 306 switch alert.Level { 307 case "info": 308 c.Ui.Info(fmt.Sprintf("Bulletin [%s]: %s (%s)", alert.Level, alert.Message, alert.URL)) 309 default: 310 c.Ui.Error(fmt.Sprintf("Bulletin [%s]: %s (%s)", alert.Level, alert.Message, alert.URL)) 311 } 312 } 313 } 314 315 func (c *Command) Run(args []string) int { 316 c.Ui = &cli.PrefixedUi{ 317 OutputPrefix: "==> ", 318 InfoPrefix: " ", 319 ErrorPrefix: "==> ", 320 Ui: c.Ui, 321 } 322 323 // Parse our configs 324 c.args = args 325 config := c.readConfig() 326 if config == nil { 327 return 1 328 } 329 330 // Setup the log outputs 331 logGate, _, logOutput := c.setupLoggers(config) 332 if logGate == nil { 333 return 1 334 } 335 336 // Initialize the telemetry 337 if err := c.setupTelementry(config); err != nil { 338 c.Ui.Error(fmt.Sprintf("Error initializing telemetry: %s", err)) 339 return 1 340 } 341 342 // Create the agent 343 if err := c.setupAgent(config, logOutput); err != nil { 344 return 1 345 } 346 defer c.agent.Shutdown() 347 348 // Check and shut down the SCADA listeners at the end 349 defer func() { 350 if c.httpServer != nil { 351 c.httpServer.Shutdown() 352 } 353 if c.scadaHttp != nil { 354 c.scadaHttp.Shutdown() 355 } 356 if c.scadaProvider != nil { 357 c.scadaProvider.Shutdown() 358 } 359 }() 360 361 // Compile agent information for output later 362 info := make(map[string]string) 363 info["client"] = strconv.FormatBool(config.Client.Enabled) 364 info["log level"] = config.LogLevel 365 info["server"] = strconv.FormatBool(config.Server.Enabled) 366 info["region"] = fmt.Sprintf("%s (DC: %s)", config.Region, config.Datacenter) 367 if config.Atlas != nil && config.Atlas.Infrastructure != "" { 368 info["atlas"] = fmt.Sprintf("(Infrastructure: '%s' Join: %v)", 369 config.Atlas.Infrastructure, config.Atlas.Join) 370 } else { 371 info["atlas"] = "<disabled>" 372 } 373 374 // Sort the keys for output 375 infoKeys := make([]string, 0, len(info)) 376 for key := range info { 377 infoKeys = append(infoKeys, key) 378 } 379 sort.Strings(infoKeys) 380 381 // Agent configuration output 382 padding := 18 383 c.Ui.Output("Nomad agent configuration:\n") 384 for _, k := range infoKeys { 385 c.Ui.Info(fmt.Sprintf( 386 "%s%s: %s", 387 strings.Repeat(" ", padding-len(k)), 388 strings.Title(k), 389 info[k])) 390 } 391 c.Ui.Output("") 392 393 // Output the header that the server has started 394 c.Ui.Output("Nomad agent started! Log data will stream in below:\n") 395 396 // Enable log streaming 397 logGate.Flush() 398 399 // Wait for exit 400 return c.handleSignals(config) 401 } 402 403 // handleSignals blocks until we get an exit-causing signal 404 func (c *Command) handleSignals(config *Config) int { 405 signalCh := make(chan os.Signal, 4) 406 signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM, syscall.SIGHUP) 407 408 // Wait for a signal 409 WAIT: 410 var sig os.Signal 411 select { 412 case s := <-signalCh: 413 sig = s 414 case <-c.ShutdownCh: 415 sig = os.Interrupt 416 } 417 c.Ui.Output(fmt.Sprintf("Caught signal: %v", sig)) 418 419 // Check if this is a SIGHUP 420 if sig == syscall.SIGHUP { 421 if conf := c.handleReload(config); conf != nil { 422 config = conf 423 } 424 goto WAIT 425 } 426 427 // Check if we should do a graceful leave 428 graceful := false 429 if sig == os.Interrupt && config.LeaveOnInt { 430 graceful = true 431 } else if sig == syscall.SIGTERM && config.LeaveOnTerm { 432 graceful = true 433 } 434 435 // Bail fast if not doing a graceful leave 436 if !graceful { 437 return 1 438 } 439 440 // Attempt a graceful leave 441 gracefulCh := make(chan struct{}) 442 c.Ui.Output("Gracefully shutting down agent...") 443 go func() { 444 if err := c.agent.Leave(); err != nil { 445 c.Ui.Error(fmt.Sprintf("Error: %s", err)) 446 return 447 } 448 close(gracefulCh) 449 }() 450 451 // Wait for leave or another signal 452 select { 453 case <-signalCh: 454 return 1 455 case <-time.After(gracefulTimeout): 456 return 1 457 case <-gracefulCh: 458 return 0 459 } 460 } 461 462 // handleReload is invoked when we should reload our configs, e.g. SIGHUP 463 func (c *Command) handleReload(config *Config) *Config { 464 c.Ui.Output("Reloading configuration...") 465 newConf := c.readConfig() 466 if newConf == nil { 467 c.Ui.Error(fmt.Sprintf("Failed to reload configs")) 468 return config 469 } 470 471 // Change the log level 472 minLevel := logutils.LogLevel(strings.ToUpper(newConf.LogLevel)) 473 if ValidateLevelFilter(minLevel, c.logFilter) { 474 c.logFilter.SetMinLevel(minLevel) 475 } else { 476 c.Ui.Error(fmt.Sprintf( 477 "Invalid log level: %s. Valid log levels are: %v", 478 minLevel, c.logFilter.Levels)) 479 480 // Keep the current log level 481 newConf.LogLevel = config.LogLevel 482 } 483 return newConf 484 } 485 486 // setupTelementry is used ot setup the telemetry sub-systems 487 func (c *Command) setupTelementry(config *Config) error { 488 /* Setup telemetry 489 Aggregate on 10 second intervals for 1 minute. Expose the 490 metrics over stderr when there is a SIGUSR1 received. 491 */ 492 inm := metrics.NewInmemSink(10*time.Second, time.Minute) 493 metrics.DefaultInmemSignal(inm) 494 495 var telConfig *Telemetry 496 if config.Telemetry == nil { 497 telConfig = &Telemetry{} 498 } else { 499 telConfig = config.Telemetry 500 } 501 502 metricsConf := metrics.DefaultConfig("nomad") 503 metricsConf.EnableHostname = !telConfig.DisableHostname 504 505 // Configure the statsite sink 506 var fanout metrics.FanoutSink 507 if telConfig.StatsiteAddr != "" { 508 sink, err := metrics.NewStatsiteSink(telConfig.StatsiteAddr) 509 if err != nil { 510 return err 511 } 512 fanout = append(fanout, sink) 513 } 514 515 // Configure the statsd sink 516 if telConfig.StatsdAddr != "" { 517 sink, err := metrics.NewStatsdSink(telConfig.StatsdAddr) 518 if err != nil { 519 return err 520 } 521 fanout = append(fanout, sink) 522 } 523 524 // Initialize the global sink 525 if len(fanout) > 0 { 526 fanout = append(fanout, inm) 527 metrics.NewGlobal(metricsConf, fanout) 528 } else { 529 metricsConf.EnableHostname = false 530 metrics.NewGlobal(metricsConf, inm) 531 } 532 return nil 533 } 534 535 // setupSCADA is used to start a new SCADA provider and listener, 536 // replacing any existing listeners. 537 func (c *Command) setupSCADA(config *Config) error { 538 // Shut down existing SCADA listeners 539 if c.scadaProvider != nil { 540 c.scadaProvider.Shutdown() 541 } 542 if c.scadaHttp != nil { 543 c.scadaHttp.Shutdown() 544 } 545 546 // No-op if we don't have an infrastructure 547 if config.Atlas == nil || config.Atlas.Infrastructure == "" { 548 return nil 549 } 550 551 // Create the new provider and listener 552 c.Ui.Output("Connecting to Atlas: " + config.Atlas.Infrastructure) 553 provider, list, err := NewProvider(config, c.logOutput) 554 if err != nil { 555 return err 556 } 557 c.scadaProvider = provider 558 c.scadaHttp = newScadaHttp(c.agent, list) 559 return nil 560 } 561 562 func (c *Command) Synopsis() string { 563 return "Runs a Nomad agent" 564 } 565 566 func (c *Command) Help() string { 567 helpText := ` 568 Usage: nomad agent [options] 569 570 Starts the Nomad agent and runs until an interrupt is received. 571 The agent may be a client and/or server. 572 573 The Nomad agent's configuration primarily comes from the config 574 files used, but a subset of the options may also be passed directly 575 as CLI arguments, listed below. 576 577 General Options (clients and servers): 578 579 -bind=<addr> 580 The address the agent will bind to for all of its various network 581 services. The individual services that run bind to individual 582 ports on this address. Defaults to the loopback 127.0.0.1. 583 584 -config=<path> 585 The path to either a single config file or a directory of config 586 files to use for configuring the Nomad agent. This option may be 587 specified multiple times. If multiple config files are used, the 588 values from each will be merged together. During merging, values 589 from files found later in the list are merged over values from 590 previously parsed files. 591 592 -data-dir=<path> 593 The data directory used to store state and other persistent data. 594 On client machines this is used to house allocation data such as 595 downloaded artifacts used by drivers. On server nodes, the data 596 dir is also used to store the replicated log. 597 598 -dc=<datacenter> 599 The name of the datacenter this Nomad agent is a member of. By 600 default this is set to "dc1". 601 602 -log-level=<level> 603 Specify the verbosity level of Nomad's logs. Valid values include 604 DEBUG, INFO, and WARN, in decreasing order of verbosity. The 605 default is INFO. 606 607 -node=<name> 608 The name of the local agent. This name is used to identify the node 609 in the cluster. The name must be unique per region. The default is 610 the current hostname of the machine. 611 612 -region=<region> 613 Name of the region the Nomad agent will be a member of. By default 614 this value is set to "global". 615 616 -dev 617 Start the agent in development mode. This enables a pre-configured 618 dual-role agent (client + server) which is useful for developing 619 or testing Nomad. No other configuration is required to start the 620 agent in this mode. 621 622 Server Options: 623 624 -server 625 Enable server mode for the agent. Agents in server mode are 626 clustered together and handle the additional responsibility of 627 leader election, data replication, and scheduling work onto 628 eligible client nodes. 629 630 -bootstrap-expect=<num> 631 Configures the expected number of servers nodes to wait for before 632 bootstrapping the cluster. Once <num> servers have joined eachother, 633 Nomad initiates the bootstrap process. 634 635 Client Options: 636 637 -client 638 Enable client mode for the agent. Client mode enables a given node to be 639 evaluated for allocations. If client mode is not enabled, no work will be 640 scheduled to the agent. 641 642 -state-dir 643 The directory used to store state and other persistent data. If not 644 specified a subdirectory under the "-data-dir" will be used. 645 646 -alloc-dir 647 The directory used to store allocation data such as downloaded artificats as 648 well as data produced by tasks. If not specified, a subdirectory under the 649 "-data-dir" will be used. 650 651 -servers 652 A list of known server addresses to connect to given as "host:port" and 653 delimited by commas. 654 655 -node-id 656 A unique identifier for the node to use. If not provided, a UUID is 657 generated. 658 659 -node-class 660 Mark this node as a member of a node-class. This can be used to label 661 similiar node types. 662 663 -meta 664 User specified metadata to associated with the node. Each instance of -meta 665 parses a single KEY=VALUE pair. Repeat the meta flag for each key/value pair 666 to be added. 667 668 -network-interface 669 Forces the network fingerprinter to use the specified network interface. 670 671 -network-speed 672 The default speed for network interfaces in MBits if the link speed can not 673 be determined dynamically. 674 675 Atlas Options: 676 677 -atlas=<infrastructure> 678 The Atlas infrastructure name to configure. This enables the SCADA 679 client and attempts to connect Nomad to the HashiCorp Atlas service 680 using the provided infrastructure name and token. 681 682 -atlas-token=<token> 683 The Atlas token to use when connecting to the HashiCorp Atlas 684 service. This must be provided to successfully connect your Nomad 685 agent to Atlas. 686 687 -atlas-join 688 Enable the Atlas join feature. This mode allows agents to discover 689 eachother automatically using the SCADA integration features. 690 ` 691 return strings.TrimSpace(helpText) 692 }