github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/client.go (about) 1 package client 2 3 import ( 4 "fmt" 5 "io/ioutil" 6 "net" 7 "net/rpc" 8 "os" 9 "path/filepath" 10 "sort" 11 "strconv" 12 "strings" 13 "sync" 14 "time" 15 16 metrics "github.com/armon/go-metrics" 17 consulapi "github.com/hashicorp/consul/api" 18 "github.com/hashicorp/consul/lib" 19 hclog "github.com/hashicorp/go-hclog" 20 multierror "github.com/hashicorp/go-multierror" 21 "github.com/hashicorp/nomad/client/allocdir" 22 "github.com/hashicorp/nomad/client/allocrunner" 23 "github.com/hashicorp/nomad/client/allocrunner/interfaces" 24 arstate "github.com/hashicorp/nomad/client/allocrunner/state" 25 "github.com/hashicorp/nomad/client/allocwatcher" 26 "github.com/hashicorp/nomad/client/config" 27 consulApi "github.com/hashicorp/nomad/client/consul" 28 "github.com/hashicorp/nomad/client/devicemanager" 29 "github.com/hashicorp/nomad/client/dynamicplugins" 30 "github.com/hashicorp/nomad/client/fingerprint" 31 "github.com/hashicorp/nomad/client/pluginmanager" 32 "github.com/hashicorp/nomad/client/pluginmanager/csimanager" 33 "github.com/hashicorp/nomad/client/pluginmanager/drivermanager" 34 "github.com/hashicorp/nomad/client/servers" 35 "github.com/hashicorp/nomad/client/state" 36 "github.com/hashicorp/nomad/client/stats" 37 cstructs "github.com/hashicorp/nomad/client/structs" 38 "github.com/hashicorp/nomad/client/vaultclient" 39 "github.com/hashicorp/nomad/command/agent/consul" 40 "github.com/hashicorp/nomad/helper" 41 "github.com/hashicorp/nomad/helper/pool" 42 hstats "github.com/hashicorp/nomad/helper/stats" 43 "github.com/hashicorp/nomad/helper/tlsutil" 44 "github.com/hashicorp/nomad/helper/uuid" 45 "github.com/hashicorp/nomad/nomad/structs" 46 nconfig "github.com/hashicorp/nomad/nomad/structs/config" 47 "github.com/hashicorp/nomad/plugins/csi" 48 "github.com/hashicorp/nomad/plugins/device" 49 "github.com/hashicorp/nomad/plugins/drivers" 50 vaultapi "github.com/hashicorp/vault/api" 51 "github.com/pkg/errors" 52 "github.com/shirou/gopsutil/host" 53 ) 54 55 const ( 56 // clientRPCCache controls how long we keep an idle connection 57 // open to a server 58 clientRPCCache = 5 * time.Minute 59 60 // clientMaxStreams controls how many idle streams we keep 61 // open to a server 62 clientMaxStreams = 2 63 64 // datacenterQueryLimit searches through up to this many adjacent 65 // datacenters looking for the Nomad server service. 66 datacenterQueryLimit = 9 67 68 // registerRetryIntv is minimum interval on which we retry 69 // registration. We pick a value between this and 2x this. 70 registerRetryIntv = 15 * time.Second 71 72 // getAllocRetryIntv is minimum interval on which we retry 73 // to fetch allocations. We pick a value between this and 2x this. 74 getAllocRetryIntv = 30 * time.Second 75 76 // devModeRetryIntv is the retry interval used for development 77 devModeRetryIntv = time.Second 78 79 // stateSnapshotIntv is how often the client snapshots state 80 stateSnapshotIntv = 60 * time.Second 81 82 // initialHeartbeatStagger is used to stagger the interval between 83 // starting and the initial heartbeat. After the initial heartbeat, 84 // we switch to using the TTL specified by the servers. 85 initialHeartbeatStagger = 10 * time.Second 86 87 // nodeUpdateRetryIntv is how often the client checks for updates to the 88 // node attributes or meta map. 89 nodeUpdateRetryIntv = 5 * time.Second 90 91 // allocSyncIntv is the batching period of allocation updates before they 92 // are synced with the server. 93 allocSyncIntv = 200 * time.Millisecond 94 95 // allocSyncRetryIntv is the interval on which we retry updating 96 // the status of the allocation 97 allocSyncRetryIntv = 5 * time.Second 98 99 // defaultConnectSidecarImage is the image set in the node meta by default 100 // to be used by Consul Connect sidecar tasks 101 // Update sidecar_task.html when updating this. 102 defaultConnectSidecarImage = "envoyproxy/envoy:v1.11.2@sha256:a7769160c9c1a55bb8d07a3b71ce5d64f72b1f665f10d81aa1581bc3cf850d09" 103 104 // defaultConnectLogLevel is the log level set in the node meta by default 105 // to be used by Consul Connect sidecar tasks 106 defaultConnectLogLevel = "info" 107 ) 108 109 var ( 110 // grace period to allow for batch fingerprint processing 111 batchFirstFingerprintsProcessingGrace = batchFirstFingerprintsTimeout + 5*time.Second 112 ) 113 114 // ClientStatsReporter exposes all the APIs related to resource usage of a Nomad 115 // Client 116 type ClientStatsReporter interface { 117 // GetAllocStats returns the AllocStatsReporter for the passed allocation. 118 // If it does not exist an error is reported. 119 GetAllocStats(allocID string) (interfaces.AllocStatsReporter, error) 120 121 // LatestHostStats returns the latest resource usage stats for the host 122 LatestHostStats() *stats.HostStats 123 } 124 125 // AllocRunner is the interface implemented by the core alloc runner. 126 //TODO Create via factory to allow testing Client with mock AllocRunners. 127 type AllocRunner interface { 128 Alloc() *structs.Allocation 129 AllocState() *arstate.State 130 Destroy() 131 Shutdown() 132 GetAllocDir() *allocdir.AllocDir 133 IsDestroyed() bool 134 IsMigrating() bool 135 IsWaiting() bool 136 Listener() *cstructs.AllocListener 137 Restore() error 138 Run() 139 StatsReporter() interfaces.AllocStatsReporter 140 Update(*structs.Allocation) 141 WaitCh() <-chan struct{} 142 DestroyCh() <-chan struct{} 143 ShutdownCh() <-chan struct{} 144 Signal(taskName, signal string) error 145 GetTaskEventHandler(taskName string) drivermanager.EventHandler 146 PersistState() error 147 148 RestartTask(taskName string, taskEvent *structs.TaskEvent) error 149 RestartAll(taskEvent *structs.TaskEvent) error 150 151 GetTaskExecHandler(taskName string) drivermanager.TaskExecHandler 152 GetTaskDriverCapabilities(taskName string) (*drivers.Capabilities, error) 153 } 154 155 // Client is used to implement the client interaction with Nomad. Clients 156 // are expected to register as a schedulable node to the servers, and to 157 // run allocations as determined by the servers. 158 type Client struct { 159 config *config.Config 160 start time.Time 161 162 // stateDB is used to efficiently store client state. 163 stateDB state.StateDB 164 165 // configCopy is a copy that should be passed to alloc-runners. 166 configCopy *config.Config 167 configLock sync.RWMutex 168 169 logger hclog.InterceptLogger 170 rpcLogger hclog.Logger 171 172 connPool *pool.ConnPool 173 174 // tlsWrap is used to wrap outbound connections using TLS. It should be 175 // accessed using the lock. 176 tlsWrap tlsutil.RegionWrapper 177 tlsWrapLock sync.RWMutex 178 179 // servers is the list of nomad servers 180 servers *servers.Manager 181 182 // heartbeat related times for tracking how often to heartbeat 183 heartbeatTTL time.Duration 184 haveHeartbeated bool 185 heartbeatLock sync.Mutex 186 heartbeatStop *heartbeatStop 187 188 // triggerDiscoveryCh triggers Consul discovery; see triggerDiscovery 189 triggerDiscoveryCh chan struct{} 190 191 // triggerNodeUpdate triggers the client to mark the Node as changed and 192 // update it. 193 triggerNodeUpdate chan struct{} 194 195 // triggerEmitNodeEvent sends an event and triggers the client to update the 196 // server for the node event 197 triggerEmitNodeEvent chan *structs.NodeEvent 198 199 // rpcRetryCh is closed when there an event such as server discovery or a 200 // successful RPC occurring happens such that a retry should happen. Access 201 // should only occur via the getter method 202 rpcRetryCh chan struct{} 203 rpcRetryLock sync.Mutex 204 205 // allocs maps alloc IDs to their AllocRunner. This map includes all 206 // AllocRunners - running and GC'd - until the server GCs them. 207 allocs map[string]AllocRunner 208 allocLock sync.RWMutex 209 210 // invalidAllocs is a map that tracks allocations that failed because 211 // the client couldn't initialize alloc or task runners for it. This can 212 // happen due to driver errors 213 invalidAllocs map[string]struct{} 214 invalidAllocsLock sync.Mutex 215 216 // allocUpdates stores allocations that need to be synced to the server. 217 allocUpdates chan *structs.Allocation 218 219 // consulService is Nomad's custom Consul client for managing services 220 // and checks. 221 consulService consulApi.ConsulServiceAPI 222 223 // consulCatalog is the subset of Consul's Catalog API Nomad uses. 224 consulCatalog consul.CatalogAPI 225 226 // HostStatsCollector collects host resource usage stats 227 hostStatsCollector *stats.HostStatsCollector 228 229 // shutdown is true when the Client has been shutdown. Must hold 230 // shutdownLock to access. 231 shutdown bool 232 233 // shutdownCh is closed to signal the Client is shutting down. 234 shutdownCh chan struct{} 235 236 shutdownLock sync.Mutex 237 238 // shutdownGroup are goroutines that exit when shutdownCh is closed. 239 // Shutdown() blocks on Wait() after closing shutdownCh. 240 shutdownGroup group 241 242 // tokensClient is Nomad Client's custom Consul client for requesting Consul 243 // Service Identity tokens through Nomad Server. 244 tokensClient consulApi.ServiceIdentityAPI 245 246 // vaultClient is used to interact with Vault for token and secret renewals 247 vaultClient vaultclient.VaultClient 248 249 // garbageCollector is used to garbage collect terminal allocations present 250 // in the node automatically 251 garbageCollector *AllocGarbageCollector 252 253 // clientACLResolver holds the ACL resolution state 254 clientACLResolver 255 256 // rpcServer is used to serve RPCs by the local agent. 257 rpcServer *rpc.Server 258 endpoints rpcEndpoints 259 streamingRpcs *structs.StreamingRpcRegistry 260 261 // pluginManagers is the set of PluginManagers registered by the client 262 pluginManagers *pluginmanager.PluginGroup 263 264 // csimanager is responsible for managing csi plugins. 265 csimanager csimanager.Manager 266 267 // devicemanger is responsible for managing device plugins. 268 devicemanager devicemanager.Manager 269 270 // drivermanager is responsible for managing driver plugins 271 drivermanager drivermanager.Manager 272 273 // baseLabels are used when emitting tagged metrics. All client metrics will 274 // have these tags, and optionally more. 275 baseLabels []metrics.Label 276 277 // batchNodeUpdates is used to batch initial updates to the node 278 batchNodeUpdates *batchNodeUpdates 279 280 // fpInitialized chan is closed when the first batch of fingerprints are 281 // applied to the node and the server is updated 282 fpInitialized chan struct{} 283 284 // serversContactedCh is closed when GetClientAllocs and runAllocs have 285 // successfully run once. 286 serversContactedCh chan struct{} 287 serversContactedOnce sync.Once 288 289 // dynamicRegistry provides access to plugins that are dynamically registered 290 // with a nomad client. Currently only used for CSI. 291 dynamicRegistry dynamicplugins.Registry 292 } 293 294 var ( 295 // noServersErr is returned by the RPC method when the client has no 296 // configured servers. This is used to trigger Consul discovery if 297 // enabled. 298 noServersErr = errors.New("no servers") 299 ) 300 301 // NewClient is used to create a new client from the given configuration 302 func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulService consulApi.ConsulServiceAPI) (*Client, error) { 303 // Create the tls wrapper 304 var tlsWrap tlsutil.RegionWrapper 305 if cfg.TLSConfig.EnableRPC { 306 tw, err := tlsutil.NewTLSConfiguration(cfg.TLSConfig, true, true) 307 if err != nil { 308 return nil, err 309 } 310 tlsWrap, err = tw.OutgoingTLSWrapper() 311 if err != nil { 312 return nil, err 313 } 314 } 315 316 if cfg.StateDBFactory == nil { 317 cfg.StateDBFactory = state.GetStateDBFactory(cfg.DevMode) 318 } 319 320 // Create the logger 321 logger := cfg.Logger.ResetNamedIntercept("client") 322 323 // Create the client 324 c := &Client{ 325 config: cfg, 326 consulCatalog: consulCatalog, 327 consulService: consulService, 328 start: time.Now(), 329 connPool: pool.NewPool(logger, clientRPCCache, clientMaxStreams, tlsWrap), 330 tlsWrap: tlsWrap, 331 streamingRpcs: structs.NewStreamingRpcRegistry(), 332 logger: logger, 333 rpcLogger: logger.Named("rpc"), 334 allocs: make(map[string]AllocRunner), 335 allocUpdates: make(chan *structs.Allocation, 64), 336 shutdownCh: make(chan struct{}), 337 triggerDiscoveryCh: make(chan struct{}), 338 triggerNodeUpdate: make(chan struct{}, 8), 339 triggerEmitNodeEvent: make(chan *structs.NodeEvent, 8), 340 fpInitialized: make(chan struct{}), 341 invalidAllocs: make(map[string]struct{}), 342 serversContactedCh: make(chan struct{}), 343 serversContactedOnce: sync.Once{}, 344 } 345 346 c.batchNodeUpdates = newBatchNodeUpdates( 347 c.updateNodeFromDriver, 348 c.updateNodeFromDevices, 349 c.updateNodeFromCSI, 350 ) 351 352 // Initialize the server manager 353 c.servers = servers.New(c.logger, c.shutdownCh, c) 354 355 // Start server manager rebalancing go routine 356 go c.servers.Start() 357 358 // initialize the client 359 if err := c.init(); err != nil { 360 return nil, fmt.Errorf("failed to initialize client: %v", err) 361 } 362 363 // initialize the dynamic registry (needs to happen after init) 364 c.dynamicRegistry = 365 dynamicplugins.NewRegistry(c.stateDB, map[string]dynamicplugins.PluginDispenser{ 366 dynamicplugins.PluginTypeCSIController: func(info *dynamicplugins.PluginInfo) (interface{}, error) { 367 return csi.NewClient(info.ConnectionInfo.SocketPath, logger.Named("csi_client").With("plugin.name", info.Name, "plugin.type", "controller")) 368 }, 369 dynamicplugins.PluginTypeCSINode: func(info *dynamicplugins.PluginInfo) (interface{}, error) { 370 return csi.NewClient(info.ConnectionInfo.SocketPath, logger.Named("csi_client").With("plugin.name", info.Name, "plugin.type", "client")) 371 }, // TODO(tgross): refactor these dispenser constructors into csimanager to tidy it up 372 }) 373 374 // Setup the clients RPC server 375 c.setupClientRpc() 376 377 // Initialize the ACL state 378 if err := c.clientACLResolver.init(); err != nil { 379 return nil, fmt.Errorf("failed to initialize ACL state: %v", err) 380 } 381 382 // Setup the node 383 if err := c.setupNode(); err != nil { 384 return nil, fmt.Errorf("node setup failed: %v", err) 385 } 386 387 // Store the config copy before restoring state but after it has been 388 // initialized. 389 c.configLock.Lock() 390 c.configCopy = c.config.Copy() 391 c.configLock.Unlock() 392 393 fingerprintManager := NewFingerprintManager( 394 c.configCopy.PluginSingletonLoader, c.GetConfig, c.configCopy.Node, 395 c.shutdownCh, c.updateNodeFromFingerprint, c.logger) 396 397 c.pluginManagers = pluginmanager.New(c.logger) 398 399 // Fingerprint the node and scan for drivers 400 if err := fingerprintManager.Run(); err != nil { 401 return nil, fmt.Errorf("fingerprinting failed: %v", err) 402 } 403 404 // Build the white/blacklists of drivers. 405 allowlistDrivers := cfg.ReadStringListToMap("driver.whitelist") 406 blocklistDrivers := cfg.ReadStringListToMap("driver.blacklist") 407 408 // Setup the csi manager 409 csiConfig := &csimanager.Config{ 410 Logger: c.logger, 411 DynamicRegistry: c.dynamicRegistry, 412 UpdateNodeCSIInfoFunc: c.batchNodeUpdates.updateNodeFromCSI, 413 TriggerNodeEvent: c.triggerNodeEvent, 414 } 415 csiManager := csimanager.New(csiConfig) 416 c.csimanager = csiManager 417 c.pluginManagers.RegisterAndRun(csiManager.PluginManager()) 418 419 // Setup the driver manager 420 driverConfig := &drivermanager.Config{ 421 Logger: c.logger, 422 Loader: c.configCopy.PluginSingletonLoader, 423 PluginConfig: c.configCopy.NomadPluginConfig(), 424 Updater: c.batchNodeUpdates.updateNodeFromDriver, 425 EventHandlerFactory: c.GetTaskEventHandler, 426 State: c.stateDB, 427 AllowedDrivers: allowlistDrivers, 428 BlockedDrivers: blocklistDrivers, 429 } 430 drvManager := drivermanager.New(driverConfig) 431 c.drivermanager = drvManager 432 c.pluginManagers.RegisterAndRun(drvManager) 433 434 // Setup the device manager 435 devConfig := &devicemanager.Config{ 436 Logger: c.logger, 437 Loader: c.configCopy.PluginSingletonLoader, 438 PluginConfig: c.configCopy.NomadPluginConfig(), 439 Updater: c.batchNodeUpdates.updateNodeFromDevices, 440 StatsInterval: c.configCopy.StatsCollectionInterval, 441 State: c.stateDB, 442 } 443 devManager := devicemanager.New(devConfig) 444 c.devicemanager = devManager 445 c.pluginManagers.RegisterAndRun(devManager) 446 447 // Batching of initial fingerprints is done to reduce the number of node 448 // updates sent to the server on startup. This is the first RPC to the servers 449 go c.batchFirstFingerprints() 450 451 // create heartbeatStop. We go after the first attempt to connect to the server, so 452 // that our grace period for connection goes for the full time 453 c.heartbeatStop = newHeartbeatStop(c.getAllocRunner, batchFirstFingerprintsTimeout, logger, c.shutdownCh) 454 455 // Watch for disconnection, and heartbeatStopAllocs configured to have a maximum 456 // lifetime when out of touch with the server 457 go c.heartbeatStop.watch() 458 459 // Add the stats collector 460 statsCollector := stats.NewHostStatsCollector(c.logger, c.config.AllocDir, c.devicemanager.AllStats) 461 c.hostStatsCollector = statsCollector 462 463 // Add the garbage collector 464 gcConfig := &GCConfig{ 465 MaxAllocs: cfg.GCMaxAllocs, 466 DiskUsageThreshold: cfg.GCDiskUsageThreshold, 467 InodeUsageThreshold: cfg.GCInodeUsageThreshold, 468 Interval: cfg.GCInterval, 469 ParallelDestroys: cfg.GCParallelDestroys, 470 ReservedDiskMB: cfg.Node.Reserved.DiskMB, 471 } 472 c.garbageCollector = NewAllocGarbageCollector(c.logger, statsCollector, c, gcConfig) 473 go c.garbageCollector.Run() 474 475 // Set the preconfigured list of static servers 476 c.configLock.RLock() 477 if len(c.configCopy.Servers) > 0 { 478 if _, err := c.setServersImpl(c.configCopy.Servers, true); err != nil { 479 logger.Warn("none of the configured servers are valid", "error", err) 480 } 481 } 482 c.configLock.RUnlock() 483 484 // Setup Consul discovery if enabled 485 if c.configCopy.ConsulConfig.ClientAutoJoin != nil && *c.configCopy.ConsulConfig.ClientAutoJoin { 486 c.shutdownGroup.Go(c.consulDiscovery) 487 if c.servers.NumServers() == 0 { 488 // No configured servers; trigger discovery manually 489 c.triggerDiscoveryCh <- struct{}{} 490 } 491 } 492 493 if err := c.setupConsulTokenClient(); err != nil { 494 return nil, errors.Wrap(err, "failed to setup consul tokens client") 495 } 496 497 // Setup the vault client for token and secret renewals 498 if err := c.setupVaultClient(); err != nil { 499 return nil, fmt.Errorf("failed to setup vault client: %v", err) 500 } 501 502 // wait until drivers are healthy before restoring or registering with servers 503 select { 504 case <-c.Ready(): 505 case <-time.After(batchFirstFingerprintsProcessingGrace): 506 logger.Warn("batch fingerprint operation timed out; proceeding to register with fingerprinted plugins so far") 507 } 508 509 // Register and then start heartbeating to the servers. 510 c.shutdownGroup.Go(c.registerAndHeartbeat) 511 512 // Restore the state 513 if err := c.restoreState(); err != nil { 514 logger.Error("failed to restore state", "error", err) 515 logger.Error("Nomad is unable to start due to corrupt state. "+ 516 "The safest way to proceed is to manually stop running task processes "+ 517 "and remove Nomad's state and alloc directories before "+ 518 "restarting. Lost allocations will be rescheduled.", 519 "state_dir", c.config.StateDir, "alloc_dir", c.config.AllocDir) 520 logger.Error("Corrupt state is often caused by a bug. Please " + 521 "report as much information as possible to " + 522 "https://github.com/hashicorp/nomad/issues") 523 return nil, fmt.Errorf("failed to restore state") 524 } 525 526 // Begin periodic snapshotting of state. 527 c.shutdownGroup.Go(c.periodicSnapshot) 528 529 // Begin syncing allocations to the server 530 c.shutdownGroup.Go(c.allocSync) 531 532 // Start the client! Don't use the shutdownGroup as run handles 533 // shutdowns manually to prevent updates from being applied during 534 // shutdown. 535 go c.run() 536 537 // Start collecting stats 538 c.shutdownGroup.Go(c.emitStats) 539 540 c.logger.Info("started client", "node_id", c.NodeID()) 541 return c, nil 542 } 543 544 // Ready returns a chan that is closed when the client is fully initialized 545 func (c *Client) Ready() <-chan struct{} { 546 return c.fpInitialized 547 } 548 549 // init is used to initialize the client and perform any setup 550 // needed before we begin starting its various components. 551 func (c *Client) init() error { 552 // Ensure the state dir exists if we have one 553 if c.config.StateDir != "" { 554 if err := os.MkdirAll(c.config.StateDir, 0700); err != nil { 555 return fmt.Errorf("failed creating state dir: %s", err) 556 } 557 558 } else { 559 // Otherwise make a temp directory to use. 560 p, err := ioutil.TempDir("", "NomadClient") 561 if err != nil { 562 return fmt.Errorf("failed creating temporary directory for the StateDir: %v", err) 563 } 564 565 p, err = filepath.EvalSymlinks(p) 566 if err != nil { 567 return fmt.Errorf("failed to find temporary directory for the StateDir: %v", err) 568 } 569 570 c.config.StateDir = p 571 } 572 c.logger.Info("using state directory", "state_dir", c.config.StateDir) 573 574 // Open the state database 575 db, err := c.config.StateDBFactory(c.logger, c.config.StateDir) 576 if err != nil { 577 return fmt.Errorf("failed to open state database: %v", err) 578 } 579 580 // Upgrade the state database 581 if err := db.Upgrade(); err != nil { 582 // Upgrade only returns an error on critical persistence 583 // failures in which an operator should intervene before the 584 // node is accessible. Upgrade drops and logs corrupt state it 585 // encounters, so failing to start the agent should be extremely 586 // rare. 587 return fmt.Errorf("failed to upgrade state database: %v", err) 588 } 589 590 c.stateDB = db 591 592 // Ensure the alloc dir exists if we have one 593 if c.config.AllocDir != "" { 594 if err := os.MkdirAll(c.config.AllocDir, 0711); err != nil { 595 return fmt.Errorf("failed creating alloc dir: %s", err) 596 } 597 } else { 598 // Otherwise make a temp directory to use. 599 p, err := ioutil.TempDir("", "NomadClient") 600 if err != nil { 601 return fmt.Errorf("failed creating temporary directory for the AllocDir: %v", err) 602 } 603 604 p, err = filepath.EvalSymlinks(p) 605 if err != nil { 606 return fmt.Errorf("failed to find temporary directory for the AllocDir: %v", err) 607 } 608 609 // Change the permissions to have the execute bit 610 if err := os.Chmod(p, 0711); err != nil { 611 return fmt.Errorf("failed to change directory permissions for the AllocDir: %v", err) 612 } 613 614 c.config.AllocDir = p 615 } 616 617 c.logger.Info("using alloc directory", "alloc_dir", c.config.AllocDir) 618 return nil 619 } 620 621 // reloadTLSConnections allows a client to reload its TLS configuration on the 622 // fly 623 func (c *Client) reloadTLSConnections(newConfig *nconfig.TLSConfig) error { 624 var tlsWrap tlsutil.RegionWrapper 625 if newConfig != nil && newConfig.EnableRPC { 626 tw, err := tlsutil.NewTLSConfiguration(newConfig, true, true) 627 if err != nil { 628 return err 629 } 630 631 twWrap, err := tw.OutgoingTLSWrapper() 632 if err != nil { 633 return err 634 } 635 tlsWrap = twWrap 636 } 637 638 // Store the new tls wrapper. 639 c.tlsWrapLock.Lock() 640 c.tlsWrap = tlsWrap 641 c.tlsWrapLock.Unlock() 642 643 // Keep the client configuration up to date as we use configuration values to 644 // decide on what type of connections to accept 645 c.configLock.Lock() 646 c.config.TLSConfig = newConfig 647 c.configLock.Unlock() 648 649 c.connPool.ReloadTLS(tlsWrap) 650 651 return nil 652 } 653 654 // Reload allows a client to reload its configuration on the fly 655 func (c *Client) Reload(newConfig *config.Config) error { 656 shouldReloadTLS, err := tlsutil.ShouldReloadRPCConnections(c.config.TLSConfig, newConfig.TLSConfig) 657 if err != nil { 658 c.logger.Error("error parsing TLS configuration", "error", err) 659 return err 660 } 661 662 if shouldReloadTLS { 663 return c.reloadTLSConnections(newConfig.TLSConfig) 664 } 665 666 return nil 667 } 668 669 // Leave is used to prepare the client to leave the cluster 670 func (c *Client) Leave() error { 671 // TODO 672 return nil 673 } 674 675 // GetConfig returns the config of the client 676 func (c *Client) GetConfig() *config.Config { 677 c.configLock.Lock() 678 defer c.configLock.Unlock() 679 return c.configCopy 680 } 681 682 // Datacenter returns the datacenter for the given client 683 func (c *Client) Datacenter() string { 684 return c.config.Node.Datacenter 685 } 686 687 // Region returns the region for the given client 688 func (c *Client) Region() string { 689 return c.config.Region 690 } 691 692 // NodeID returns the node ID for the given client 693 func (c *Client) NodeID() string { 694 return c.config.Node.ID 695 } 696 697 // secretNodeID returns the secret node ID for the given client 698 func (c *Client) secretNodeID() string { 699 return c.config.Node.SecretID 700 } 701 702 // AuthToken returns the ACL token for client RPC authentication 703 func (c *Client) AuthToken() string { 704 return c.config.Node.Token 705 } 706 707 // RPCMajorVersion returns the structs.ApiMajorVersion supported by the 708 // client. 709 func (c *Client) RPCMajorVersion() int { 710 return structs.ApiMajorVersion 711 } 712 713 // RPCMinorVersion returns the structs.ApiMinorVersion supported by the 714 // client. 715 func (c *Client) RPCMinorVersion() int { 716 return structs.ApiMinorVersion 717 } 718 719 // Shutdown is used to tear down the client 720 func (c *Client) Shutdown() error { 721 c.shutdownLock.Lock() 722 defer c.shutdownLock.Unlock() 723 724 if c.shutdown { 725 c.logger.Info("already shutdown") 726 return nil 727 } 728 c.logger.Info("shutting down") 729 730 // Stop renewing tokens and secrets 731 if c.vaultClient != nil { 732 c.vaultClient.Stop() 733 } 734 735 // Stop Garbage collector 736 c.garbageCollector.Stop() 737 738 arGroup := group{} 739 if c.config.DevMode { 740 // In DevMode destroy all the running allocations. 741 for _, ar := range c.getAllocRunners() { 742 ar.Destroy() 743 arGroup.AddCh(ar.DestroyCh()) 744 } 745 } else { 746 // In normal mode call shutdown 747 for _, ar := range c.getAllocRunners() { 748 ar.Shutdown() 749 arGroup.AddCh(ar.ShutdownCh()) 750 } 751 } 752 arGroup.Wait() 753 754 // Shutdown the plugin managers 755 c.pluginManagers.Shutdown() 756 757 c.shutdown = true 758 close(c.shutdownCh) 759 760 // Must close connection pool to unblock alloc watcher 761 c.connPool.Shutdown() 762 763 // Wait for goroutines to stop 764 c.shutdownGroup.Wait() 765 766 // One final save state 767 c.saveState() 768 return c.stateDB.Close() 769 } 770 771 // Stats is used to return statistics for debugging and insight 772 // for various sub-systems 773 func (c *Client) Stats() map[string]map[string]string { 774 c.heartbeatLock.Lock() 775 defer c.heartbeatLock.Unlock() 776 stats := map[string]map[string]string{ 777 "client": { 778 "node_id": c.NodeID(), 779 "known_servers": strings.Join(c.GetServers(), ","), 780 "num_allocations": strconv.Itoa(c.NumAllocs()), 781 "last_heartbeat": fmt.Sprintf("%v", time.Since(c.lastHeartbeat())), 782 "heartbeat_ttl": fmt.Sprintf("%v", c.heartbeatTTL), 783 }, 784 "runtime": hstats.RuntimeStats(), 785 } 786 return stats 787 } 788 789 // GetAlloc returns an allocation or an error. 790 func (c *Client) GetAlloc(allocID string) (*structs.Allocation, error) { 791 ar, err := c.getAllocRunner(allocID) 792 if err != nil { 793 return nil, err 794 } 795 796 return ar.Alloc(), nil 797 } 798 799 // SignalAllocation sends a signal to the tasks within an allocation. 800 // If the provided task is empty, then every allocation will be signalled. 801 // If a task is provided, then only an exactly matching task will be signalled. 802 func (c *Client) SignalAllocation(allocID, task, signal string) error { 803 ar, err := c.getAllocRunner(allocID) 804 if err != nil { 805 return err 806 } 807 808 return ar.Signal(task, signal) 809 } 810 811 // CollectAllocation garbage collects a single allocation on a node. Returns 812 // true if alloc was found and garbage collected; otherwise false. 813 func (c *Client) CollectAllocation(allocID string) bool { 814 return c.garbageCollector.Collect(allocID) 815 } 816 817 // CollectAllAllocs garbage collects all allocations on a node in the terminal 818 // state 819 func (c *Client) CollectAllAllocs() { 820 c.garbageCollector.CollectAll() 821 } 822 823 func (c *Client) RestartAllocation(allocID, taskName string) error { 824 ar, err := c.getAllocRunner(allocID) 825 if err != nil { 826 return err 827 } 828 829 event := structs.NewTaskEvent(structs.TaskRestartSignal). 830 SetRestartReason("User requested restart") 831 832 if taskName != "" { 833 return ar.RestartTask(taskName, event) 834 } 835 836 return ar.RestartAll(event) 837 } 838 839 // Node returns the locally registered node 840 func (c *Client) Node() *structs.Node { 841 c.configLock.RLock() 842 defer c.configLock.RUnlock() 843 return c.configCopy.Node 844 } 845 846 // getAllocRunner returns an AllocRunner or an UnknownAllocation error if the 847 // client has no runner for the given alloc ID. 848 func (c *Client) getAllocRunner(allocID string) (AllocRunner, error) { 849 c.allocLock.RLock() 850 defer c.allocLock.RUnlock() 851 852 ar, ok := c.allocs[allocID] 853 if !ok { 854 return nil, structs.NewErrUnknownAllocation(allocID) 855 } 856 857 return ar, nil 858 } 859 860 // StatsReporter exposes the various APIs related resource usage of a Nomad 861 // client 862 func (c *Client) StatsReporter() ClientStatsReporter { 863 return c 864 } 865 866 func (c *Client) GetAllocStats(allocID string) (interfaces.AllocStatsReporter, error) { 867 ar, err := c.getAllocRunner(allocID) 868 if err != nil { 869 return nil, err 870 } 871 return ar.StatsReporter(), nil 872 } 873 874 // HostStats returns all the stats related to a Nomad client 875 func (c *Client) LatestHostStats() *stats.HostStats { 876 return c.hostStatsCollector.Stats() 877 } 878 879 func (c *Client) LatestDeviceResourceStats(devices []*structs.AllocatedDeviceResource) []*device.DeviceGroupStats { 880 return c.computeAllocatedDeviceGroupStats(devices, c.LatestHostStats().DeviceStats) 881 } 882 883 func (c *Client) computeAllocatedDeviceGroupStats(devices []*structs.AllocatedDeviceResource, hostDeviceGroupStats []*device.DeviceGroupStats) []*device.DeviceGroupStats { 884 // basic optimization for the usual case 885 if len(devices) == 0 || len(hostDeviceGroupStats) == 0 { 886 return nil 887 } 888 889 // Build an index of allocated devices 890 adIdx := map[structs.DeviceIdTuple][]string{} 891 892 total := 0 893 for _, ds := range devices { 894 adIdx[*ds.ID()] = ds.DeviceIDs 895 total += len(ds.DeviceIDs) 896 } 897 898 // Collect allocated device stats from host stats 899 result := make([]*device.DeviceGroupStats, 0, len(adIdx)) 900 901 for _, dg := range hostDeviceGroupStats { 902 k := structs.DeviceIdTuple{ 903 Vendor: dg.Vendor, 904 Type: dg.Type, 905 Name: dg.Name, 906 } 907 908 allocatedDeviceIDs, ok := adIdx[k] 909 if !ok { 910 continue 911 } 912 913 rdgStats := &device.DeviceGroupStats{ 914 Vendor: dg.Vendor, 915 Type: dg.Type, 916 Name: dg.Name, 917 InstanceStats: map[string]*device.DeviceStats{}, 918 } 919 920 for _, adID := range allocatedDeviceIDs { 921 deviceStats, ok := dg.InstanceStats[adID] 922 if !ok || deviceStats == nil { 923 c.logger.Warn("device not found in stats", "device_id", adID, "device_group_id", k) 924 continue 925 } 926 927 rdgStats.InstanceStats[adID] = deviceStats 928 } 929 result = append(result, rdgStats) 930 } 931 932 return result 933 } 934 935 // ValidateMigrateToken verifies that a token is for a specific client and 936 // allocation, and has been created by a trusted party that has privileged 937 // knowledge of the client's secret identifier 938 func (c *Client) ValidateMigrateToken(allocID, migrateToken string) bool { 939 if !c.config.ACLEnabled { 940 return true 941 } 942 943 return structs.CompareMigrateToken(allocID, c.secretNodeID(), migrateToken) 944 } 945 946 // GetAllocFS returns the AllocFS interface for the alloc dir of an allocation 947 func (c *Client) GetAllocFS(allocID string) (allocdir.AllocDirFS, error) { 948 ar, err := c.getAllocRunner(allocID) 949 if err != nil { 950 return nil, err 951 } 952 return ar.GetAllocDir(), nil 953 } 954 955 // GetAllocState returns a copy of an allocation's state on this client. It 956 // returns either an AllocState or an unknown allocation error. 957 func (c *Client) GetAllocState(allocID string) (*arstate.State, error) { 958 ar, err := c.getAllocRunner(allocID) 959 if err != nil { 960 return nil, err 961 } 962 963 return ar.AllocState(), nil 964 } 965 966 // GetServers returns the list of nomad servers this client is aware of. 967 func (c *Client) GetServers() []string { 968 endpoints := c.servers.GetServers() 969 res := make([]string, len(endpoints)) 970 for i := range endpoints { 971 res[i] = endpoints[i].String() 972 } 973 sort.Strings(res) 974 return res 975 } 976 977 // SetServers sets a new list of nomad servers to connect to. As long as one 978 // server is resolvable no error is returned. 979 func (c *Client) SetServers(in []string) (int, error) { 980 return c.setServersImpl(in, false) 981 } 982 983 // setServersImpl sets a new list of nomad servers to connect to. If force is 984 // set, we add the server to the internal serverlist even if the server could not 985 // be pinged. An error is returned if no endpoints were valid when non-forcing. 986 // 987 // Force should be used when setting the servers from the initial configuration 988 // since the server may be starting up in parallel and initial pings may fail. 989 func (c *Client) setServersImpl(in []string, force bool) (int, error) { 990 var mu sync.Mutex 991 var wg sync.WaitGroup 992 var merr multierror.Error 993 994 endpoints := make([]*servers.Server, 0, len(in)) 995 wg.Add(len(in)) 996 997 for _, s := range in { 998 go func(srv string) { 999 defer wg.Done() 1000 addr, err := resolveServer(srv) 1001 if err != nil { 1002 mu.Lock() 1003 c.logger.Debug("ignoring server due to resolution error", "error", err, "server", srv) 1004 merr.Errors = append(merr.Errors, err) 1005 mu.Unlock() 1006 return 1007 } 1008 1009 // Try to ping to check if it is a real server 1010 if err := c.Ping(addr); err != nil { 1011 mu.Lock() 1012 merr.Errors = append(merr.Errors, fmt.Errorf("Server at address %s failed ping: %v", addr, err)) 1013 mu.Unlock() 1014 1015 // If we are forcing the setting of the servers, inject it to 1016 // the serverlist even if we can't ping immediately. 1017 if !force { 1018 return 1019 } 1020 } 1021 1022 mu.Lock() 1023 endpoints = append(endpoints, &servers.Server{Addr: addr}) 1024 mu.Unlock() 1025 }(s) 1026 } 1027 1028 wg.Wait() 1029 1030 // Only return errors if no servers are valid 1031 if len(endpoints) == 0 { 1032 if len(merr.Errors) > 0 { 1033 return 0, merr.ErrorOrNil() 1034 } 1035 return 0, noServersErr 1036 } 1037 1038 c.servers.SetServers(endpoints) 1039 return len(endpoints), nil 1040 } 1041 1042 // restoreState is used to restore our state from the data dir 1043 // If there are errors restoring a specific allocation it is marked 1044 // as failed whenever possible. 1045 func (c *Client) restoreState() error { 1046 if c.config.DevMode { 1047 return nil 1048 } 1049 1050 //XXX REMOVED! make a note in backward compat / upgrading doc 1051 // COMPAT: Remove in 0.7.0 1052 // 0.6.0 transitioned from individual state files to a single bolt-db. 1053 // The upgrade path is to: 1054 // Check if old state exists 1055 // If so, restore from that and delete old state 1056 // Restore using state database 1057 1058 // Restore allocations 1059 allocs, allocErrs, err := c.stateDB.GetAllAllocations() 1060 if err != nil { 1061 return err 1062 } 1063 1064 for allocID, err := range allocErrs { 1065 c.logger.Error("error restoring alloc", "error", err, "alloc_id", allocID) 1066 //TODO Cleanup 1067 // Try to clean up alloc dir 1068 // Remove boltdb entries? 1069 // Send to server with clientstatus=failed 1070 } 1071 1072 // Load each alloc back 1073 for _, alloc := range allocs { 1074 1075 // COMPAT(0.12): remove once upgrading from 0.9.5 is no longer supported 1076 // See hasLocalState for details. Skipping suspicious allocs 1077 // now. If allocs should be run, they will be started when the client 1078 // gets allocs from servers. 1079 if !c.hasLocalState(alloc) { 1080 c.logger.Warn("found a alloc without any local state, skipping restore", "alloc_id", alloc.ID) 1081 continue 1082 } 1083 1084 //XXX On Restore we give up on watching previous allocs because 1085 // we need the local AllocRunners initialized first. We could 1086 // add a second loop to initialize just the alloc watcher. 1087 prevAllocWatcher := allocwatcher.NoopPrevAlloc{} 1088 prevAllocMigrator := allocwatcher.NoopPrevAlloc{} 1089 1090 c.configLock.RLock() 1091 arConf := &allocrunner.Config{ 1092 Alloc: alloc, 1093 Logger: c.logger, 1094 ClientConfig: c.configCopy, 1095 StateDB: c.stateDB, 1096 StateUpdater: c, 1097 DeviceStatsReporter: c, 1098 Consul: c.consulService, 1099 ConsulSI: c.tokensClient, 1100 Vault: c.vaultClient, 1101 PrevAllocWatcher: prevAllocWatcher, 1102 PrevAllocMigrator: prevAllocMigrator, 1103 DynamicRegistry: c.dynamicRegistry, 1104 CSIManager: c.csimanager, 1105 DeviceManager: c.devicemanager, 1106 DriverManager: c.drivermanager, 1107 ServersContactedCh: c.serversContactedCh, 1108 RPCClient: c, 1109 } 1110 c.configLock.RUnlock() 1111 1112 ar, err := allocrunner.NewAllocRunner(arConf) 1113 if err != nil { 1114 c.logger.Error("error running alloc", "error", err, "alloc_id", alloc.ID) 1115 c.handleInvalidAllocs(alloc, err) 1116 continue 1117 } 1118 1119 // Restore state 1120 if err := ar.Restore(); err != nil { 1121 c.logger.Error("error restoring alloc", "error", err, "alloc_id", alloc.ID) 1122 // Override the status of the alloc to failed 1123 ar.SetClientStatus(structs.AllocClientStatusFailed) 1124 // Destroy the alloc runner since this is a failed restore 1125 ar.Destroy() 1126 continue 1127 } 1128 1129 // Maybe mark the alloc for halt on missing server heartbeats 1130 if c.heartbeatStop.shouldStop(alloc) { 1131 err = c.heartbeatStop.stopAlloc(alloc.ID) 1132 if err != nil { 1133 c.logger.Error("error stopping alloc", "error", err, "alloc_id", alloc.ID) 1134 } 1135 continue 1136 } 1137 1138 //XXX is this locking necessary? 1139 c.allocLock.Lock() 1140 c.allocs[alloc.ID] = ar 1141 c.allocLock.Unlock() 1142 1143 c.heartbeatStop.allocHook(alloc) 1144 } 1145 1146 // All allocs restored successfully, run them! 1147 c.allocLock.Lock() 1148 for _, ar := range c.allocs { 1149 go ar.Run() 1150 } 1151 c.allocLock.Unlock() 1152 return nil 1153 } 1154 1155 // hasLocalState returns true if we have any other associated state 1156 // with alloc beyond the task itself 1157 // 1158 // Useful for detecting if a potentially completed alloc got resurrected 1159 // after AR was destroyed. In such cases, re-running the alloc lead to 1160 // unexpected reruns and may lead to process and task exhaustion on node. 1161 // 1162 // The heuristic used here is an alloc is suspect if we see no other information 1163 // and no other task/status info is found. 1164 // 1165 // Also, an alloc without any client state will not be restored correctly; there will 1166 // be no tasks processes to reattach to, etc. In such cases, client should 1167 // wait until it gets allocs from server to launch them. 1168 // 1169 // See: 1170 // * https://github.com/hashicorp/nomad/pull/6207 1171 // * https://github.com/hashicorp/nomad/issues/5984 1172 // 1173 // COMPAT(0.12): remove once upgrading from 0.9.5 is no longer supported 1174 func (c *Client) hasLocalState(alloc *structs.Allocation) bool { 1175 tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) 1176 if tg == nil { 1177 // corrupt alloc?! 1178 return false 1179 } 1180 1181 for _, task := range tg.Tasks { 1182 ls, tr, _ := c.stateDB.GetTaskRunnerState(alloc.ID, task.Name) 1183 if ls != nil || tr != nil { 1184 return true 1185 } 1186 } 1187 1188 return false 1189 } 1190 1191 func (c *Client) handleInvalidAllocs(alloc *structs.Allocation, err error) { 1192 c.invalidAllocsLock.Lock() 1193 c.invalidAllocs[alloc.ID] = struct{}{} 1194 c.invalidAllocsLock.Unlock() 1195 1196 // Mark alloc as failed so server can handle this 1197 failed := makeFailedAlloc(alloc, err) 1198 select { 1199 case c.allocUpdates <- failed: 1200 case <-c.shutdownCh: 1201 } 1202 } 1203 1204 // saveState is used to snapshot our state into the data dir. 1205 func (c *Client) saveState() error { 1206 var wg sync.WaitGroup 1207 var l sync.Mutex 1208 var mErr multierror.Error 1209 runners := c.getAllocRunners() 1210 wg.Add(len(runners)) 1211 1212 for id, ar := range runners { 1213 go func(id string, ar AllocRunner) { 1214 err := ar.PersistState() 1215 if err != nil { 1216 c.logger.Error("error saving alloc state", "error", err, "alloc_id", id) 1217 l.Lock() 1218 multierror.Append(&mErr, err) 1219 l.Unlock() 1220 } 1221 wg.Done() 1222 }(id, ar) 1223 } 1224 1225 wg.Wait() 1226 return mErr.ErrorOrNil() 1227 } 1228 1229 // getAllocRunners returns a snapshot of the current set of alloc runners. 1230 func (c *Client) getAllocRunners() map[string]AllocRunner { 1231 c.allocLock.RLock() 1232 defer c.allocLock.RUnlock() 1233 runners := make(map[string]AllocRunner, len(c.allocs)) 1234 for id, ar := range c.allocs { 1235 runners[id] = ar 1236 } 1237 return runners 1238 } 1239 1240 // NumAllocs returns the number of un-GC'd allocs this client has. Used to 1241 // fulfill the AllocCounter interface for the GC. 1242 func (c *Client) NumAllocs() int { 1243 n := 0 1244 c.allocLock.RLock() 1245 for _, a := range c.allocs { 1246 if !a.IsDestroyed() { 1247 n++ 1248 } 1249 } 1250 c.allocLock.RUnlock() 1251 return n 1252 } 1253 1254 // nodeID restores, or generates if necessary, a unique node ID and SecretID. 1255 // The node ID is, if available, a persistent unique ID. The secret ID is a 1256 // high-entropy random UUID. 1257 func (c *Client) nodeID() (id, secret string, err error) { 1258 var hostID string 1259 hostInfo, err := host.Info() 1260 if !c.config.NoHostUUID && err == nil { 1261 if hashed, ok := helper.HashUUID(hostInfo.HostID); ok { 1262 hostID = hashed 1263 } 1264 } 1265 1266 if hostID == "" { 1267 // Generate a random hostID if no constant ID is available on 1268 // this platform. 1269 hostID = uuid.Generate() 1270 } 1271 1272 // Do not persist in dev mode 1273 if c.config.DevMode { 1274 return hostID, uuid.Generate(), nil 1275 } 1276 1277 // Attempt to read existing ID 1278 idPath := filepath.Join(c.config.StateDir, "client-id") 1279 idBuf, err := ioutil.ReadFile(idPath) 1280 if err != nil && !os.IsNotExist(err) { 1281 return "", "", err 1282 } 1283 1284 // Attempt to read existing secret ID 1285 secretPath := filepath.Join(c.config.StateDir, "secret-id") // TODO: override this from command args 1286 secretBuf, err := ioutil.ReadFile(secretPath) 1287 if err != nil && !os.IsNotExist(err) { 1288 return "", "", err 1289 } 1290 1291 // Use existing ID if any 1292 if len(idBuf) != 0 { 1293 id = strings.ToLower(string(idBuf)) 1294 } else { 1295 id = hostID 1296 1297 // Persist the ID 1298 if err := ioutil.WriteFile(idPath, []byte(id), 0700); err != nil { 1299 return "", "", err 1300 } 1301 } 1302 1303 if len(secretBuf) != 0 { 1304 secret = string(secretBuf) 1305 } else { 1306 // Generate new ID 1307 secret = uuid.Generate() 1308 1309 // Persist the ID 1310 if err := ioutil.WriteFile(secretPath, []byte(secret), 0700); err != nil { 1311 return "", "", err 1312 } 1313 } 1314 1315 return id, secret, nil 1316 } 1317 1318 // setupNode is used to setup the initial node 1319 func (c *Client) setupNode() error { 1320 node := c.config.Node 1321 if node == nil { 1322 node = &structs.Node{} 1323 c.config.Node = node 1324 } 1325 // Generate an ID and secret for the node 1326 id, secretID, err := c.nodeID() 1327 if err != nil { 1328 return fmt.Errorf("node ID setup failed: %v", err) 1329 } 1330 1331 node.ID = id 1332 node.SecretID = secretID 1333 if node.Attributes == nil { 1334 node.Attributes = make(map[string]string) 1335 } 1336 if node.Links == nil { 1337 node.Links = make(map[string]string) 1338 } 1339 if node.Drivers == nil { 1340 node.Drivers = make(map[string]*structs.DriverInfo) 1341 } 1342 if node.CSIControllerPlugins == nil { 1343 node.CSIControllerPlugins = make(map[string]*structs.CSIInfo) 1344 } 1345 if node.CSINodePlugins == nil { 1346 node.CSINodePlugins = make(map[string]*structs.CSIInfo) 1347 } 1348 if node.Meta == nil { 1349 node.Meta = make(map[string]string) 1350 } 1351 if node.NodeResources == nil { 1352 node.NodeResources = &structs.NodeResources{} 1353 } 1354 if node.ReservedResources == nil { 1355 node.ReservedResources = &structs.NodeReservedResources{} 1356 } 1357 if node.Resources == nil { 1358 node.Resources = &structs.Resources{} 1359 } 1360 if node.Reserved == nil { 1361 node.Reserved = &structs.Resources{} 1362 } 1363 if node.Datacenter == "" { 1364 node.Datacenter = "dc1" 1365 } 1366 if node.Name == "" { 1367 node.Name, _ = os.Hostname() 1368 } 1369 if node.HostVolumes == nil { 1370 if l := len(c.config.HostVolumes); l != 0 { 1371 node.HostVolumes = make(map[string]*structs.ClientHostVolumeConfig, l) 1372 for k, v := range c.config.HostVolumes { 1373 if _, err := os.Stat(v.Path); err != nil { 1374 return fmt.Errorf("failed to validate volume %s, err: %v", v.Name, err) 1375 } 1376 node.HostVolumes[k] = v.Copy() 1377 } 1378 } 1379 } 1380 1381 if node.Name == "" { 1382 node.Name = node.ID 1383 } 1384 node.Status = structs.NodeStatusInit 1385 1386 // Setup default meta 1387 if _, ok := node.Meta["connect.sidecar_image"]; !ok { 1388 node.Meta["connect.sidecar_image"] = defaultConnectSidecarImage 1389 } 1390 if _, ok := node.Meta["connect.log_level"]; !ok { 1391 node.Meta["connect.log_level"] = defaultConnectLogLevel 1392 } 1393 1394 return nil 1395 } 1396 1397 // updateNodeFromFingerprint updates the node with the result of 1398 // fingerprinting the node from the diff that was created 1399 func (c *Client) updateNodeFromFingerprint(response *fingerprint.FingerprintResponse) *structs.Node { 1400 c.configLock.Lock() 1401 defer c.configLock.Unlock() 1402 1403 nodeHasChanged := false 1404 1405 for name, newVal := range response.Attributes { 1406 oldVal := c.config.Node.Attributes[name] 1407 if oldVal == newVal { 1408 continue 1409 } 1410 1411 nodeHasChanged = true 1412 if newVal == "" { 1413 delete(c.config.Node.Attributes, name) 1414 } else { 1415 c.config.Node.Attributes[name] = newVal 1416 } 1417 } 1418 1419 // update node links and resources from the diff created from 1420 // fingerprinting 1421 for name, newVal := range response.Links { 1422 oldVal := c.config.Node.Links[name] 1423 if oldVal == newVal { 1424 continue 1425 } 1426 1427 nodeHasChanged = true 1428 if newVal == "" { 1429 delete(c.config.Node.Links, name) 1430 } else { 1431 c.config.Node.Links[name] = newVal 1432 } 1433 } 1434 1435 // COMPAT(0.10): Remove in 0.10 1436 // update the response networks with the config 1437 // if we still have node changes, merge them 1438 if response.Resources != nil { 1439 response.Resources.Networks = updateNetworks( 1440 c.config.Node.Resources.Networks, 1441 response.Resources.Networks, 1442 c.config) 1443 if !c.config.Node.Resources.Equals(response.Resources) { 1444 c.config.Node.Resources.Merge(response.Resources) 1445 nodeHasChanged = true 1446 } 1447 } 1448 1449 // update the response networks with the config 1450 // if we still have node changes, merge them 1451 if response.NodeResources != nil { 1452 response.NodeResources.Networks = updateNetworks( 1453 c.config.Node.NodeResources.Networks, 1454 response.NodeResources.Networks, 1455 c.config) 1456 if !c.config.Node.NodeResources.Equals(response.NodeResources) { 1457 c.config.Node.NodeResources.Merge(response.NodeResources) 1458 nodeHasChanged = true 1459 } 1460 } 1461 1462 if nodeHasChanged { 1463 c.updateNodeLocked() 1464 } 1465 1466 return c.configCopy.Node 1467 } 1468 1469 // updateNetworks preserves manually configured network options, but 1470 // applies fingerprint updates 1471 func updateNetworks(ns structs.Networks, up structs.Networks, c *config.Config) structs.Networks { 1472 if c.NetworkInterface == "" { 1473 ns = up 1474 } else { 1475 // If a network device is configured, filter up to contain details for only 1476 // that device 1477 upd := []*structs.NetworkResource{} 1478 for _, n := range up { 1479 if c.NetworkInterface == n.Device { 1480 upd = append(upd, n) 1481 } 1482 } 1483 // If updates, use them. Otherwise, ns contains the configured interfaces 1484 if len(upd) > 0 { 1485 ns = upd 1486 } 1487 } 1488 1489 // ns is set, apply the config NetworkSpeed to all 1490 if c.NetworkSpeed != 0 { 1491 for _, n := range ns { 1492 n.MBits = c.NetworkSpeed 1493 } 1494 } 1495 return ns 1496 } 1497 1498 // retryIntv calculates a retry interval value given the base 1499 func (c *Client) retryIntv(base time.Duration) time.Duration { 1500 if c.config.DevMode { 1501 return devModeRetryIntv 1502 } 1503 return base + lib.RandomStagger(base) 1504 } 1505 1506 // registerAndHeartbeat is a long lived goroutine used to register the client 1507 // and then start heartbeating to the server. 1508 func (c *Client) registerAndHeartbeat() { 1509 // Register the node 1510 c.retryRegisterNode() 1511 1512 // Start watching changes for node changes 1513 go c.watchNodeUpdates() 1514 1515 // Start watching for emitting node events 1516 go c.watchNodeEvents() 1517 1518 // Setup the heartbeat timer, for the initial registration 1519 // we want to do this quickly. We want to do it extra quickly 1520 // in development mode. 1521 var heartbeat <-chan time.Time 1522 if c.config.DevMode { 1523 heartbeat = time.After(0) 1524 } else { 1525 heartbeat = time.After(lib.RandomStagger(initialHeartbeatStagger)) 1526 } 1527 1528 for { 1529 select { 1530 case <-c.rpcRetryWatcher(): 1531 case <-heartbeat: 1532 case <-c.shutdownCh: 1533 return 1534 } 1535 if err := c.updateNodeStatus(); err != nil { 1536 // The servers have changed such that this node has not been 1537 // registered before 1538 if strings.Contains(err.Error(), "node not found") { 1539 // Re-register the node 1540 c.logger.Info("re-registering node") 1541 c.retryRegisterNode() 1542 heartbeat = time.After(lib.RandomStagger(initialHeartbeatStagger)) 1543 } else { 1544 intv := c.getHeartbeatRetryIntv(err) 1545 c.logger.Error("error heartbeating. retrying", "error", err, "period", intv) 1546 heartbeat = time.After(intv) 1547 1548 // If heartbeating fails, trigger Consul discovery 1549 c.triggerDiscovery() 1550 } 1551 } else { 1552 c.heartbeatLock.Lock() 1553 heartbeat = time.After(c.heartbeatTTL) 1554 c.heartbeatLock.Unlock() 1555 } 1556 } 1557 } 1558 1559 func (c *Client) lastHeartbeat() time.Time { 1560 return c.heartbeatStop.getLastOk() 1561 } 1562 1563 // getHeartbeatRetryIntv is used to retrieve the time to wait before attempting 1564 // another heartbeat. 1565 func (c *Client) getHeartbeatRetryIntv(err error) time.Duration { 1566 if c.config.DevMode { 1567 return devModeRetryIntv 1568 } 1569 1570 // Collect the useful heartbeat info 1571 c.heartbeatLock.Lock() 1572 haveHeartbeated := c.haveHeartbeated 1573 last := c.lastHeartbeat() 1574 ttl := c.heartbeatTTL 1575 c.heartbeatLock.Unlock() 1576 1577 // If we haven't even successfully heartbeated once or there is no leader 1578 // treat it as a registration. In the case that there is a leadership loss, 1579 // we will have our heartbeat timer reset to a much larger threshold, so 1580 // do not put unnecessary pressure on the new leader. 1581 if !haveHeartbeated || err == structs.ErrNoLeader { 1582 return c.retryIntv(registerRetryIntv) 1583 } 1584 1585 // Determine how much time we have left to heartbeat 1586 left := last.Add(ttl).Sub(time.Now()) 1587 1588 // Logic for retrying is: 1589 // * Do not retry faster than once a second 1590 // * Do not retry less that once every 30 seconds 1591 // * If we have missed the heartbeat by more than 30 seconds, start to use 1592 // the absolute time since we do not want to retry indefinitely 1593 switch { 1594 case left < -30*time.Second: 1595 // Make left the absolute value so we delay and jitter properly. 1596 left *= -1 1597 case left < 0: 1598 return time.Second + lib.RandomStagger(time.Second) 1599 default: 1600 } 1601 1602 stagger := lib.RandomStagger(left) 1603 switch { 1604 case stagger < time.Second: 1605 return time.Second + lib.RandomStagger(time.Second) 1606 case stagger > 30*time.Second: 1607 return 25*time.Second + lib.RandomStagger(5*time.Second) 1608 default: 1609 return stagger 1610 } 1611 } 1612 1613 // periodicSnapshot is a long lived goroutine used to periodically snapshot the 1614 // state of the client 1615 func (c *Client) periodicSnapshot() { 1616 // Create a snapshot timer 1617 snapshot := time.After(stateSnapshotIntv) 1618 1619 for { 1620 select { 1621 case <-snapshot: 1622 snapshot = time.After(stateSnapshotIntv) 1623 if err := c.saveState(); err != nil { 1624 c.logger.Error("error saving state", "error", err) 1625 } 1626 1627 case <-c.shutdownCh: 1628 return 1629 } 1630 } 1631 } 1632 1633 // run is a long lived goroutine used to run the client. Shutdown() stops it first 1634 func (c *Client) run() { 1635 // Watch for changes in allocations 1636 allocUpdates := make(chan *allocUpdates, 8) 1637 go c.watchAllocations(allocUpdates) 1638 1639 for { 1640 select { 1641 case update := <-allocUpdates: 1642 // Don't apply updates while shutting down. 1643 c.shutdownLock.Lock() 1644 if c.shutdown { 1645 c.shutdownLock.Unlock() 1646 return 1647 } 1648 1649 // Apply updates inside lock to prevent a concurrent 1650 // shutdown. 1651 c.runAllocs(update) 1652 c.shutdownLock.Unlock() 1653 1654 case <-c.shutdownCh: 1655 return 1656 } 1657 } 1658 } 1659 1660 // submitNodeEvents is used to submit a client-side node event. Examples of 1661 // these kinds of events include when a driver moves from healthy to unhealthy 1662 // (and vice versa) 1663 func (c *Client) submitNodeEvents(events []*structs.NodeEvent) error { 1664 nodeID := c.NodeID() 1665 nodeEvents := map[string][]*structs.NodeEvent{ 1666 nodeID: events, 1667 } 1668 req := structs.EmitNodeEventsRequest{ 1669 NodeEvents: nodeEvents, 1670 WriteRequest: structs.WriteRequest{ 1671 Region: c.Region(), 1672 AuthToken: c.AuthToken(), 1673 }, 1674 } 1675 var resp structs.EmitNodeEventsResponse 1676 if err := c.RPC("Node.EmitEvents", &req, &resp); err != nil { 1677 return fmt.Errorf("Emitting node events failed: %v", err) 1678 } 1679 return nil 1680 } 1681 1682 // watchNodeEvents is a handler which receives node events and on a interval 1683 // and submits them in batch format to the server 1684 func (c *Client) watchNodeEvents() { 1685 // batchEvents stores events that have yet to be published 1686 var batchEvents []*structs.NodeEvent 1687 1688 timer := stoppedTimer() 1689 defer timer.Stop() 1690 1691 for { 1692 select { 1693 case event := <-c.triggerEmitNodeEvent: 1694 if l := len(batchEvents); l <= structs.MaxRetainedNodeEvents { 1695 batchEvents = append(batchEvents, event) 1696 } else { 1697 // Drop the oldest event 1698 c.logger.Warn("dropping node event", "node_event", batchEvents[0]) 1699 batchEvents = append(batchEvents[1:], event) 1700 } 1701 timer.Reset(c.retryIntv(nodeUpdateRetryIntv)) 1702 case <-timer.C: 1703 if err := c.submitNodeEvents(batchEvents); err != nil { 1704 c.logger.Error("error submitting node events", "error", err) 1705 timer.Reset(c.retryIntv(nodeUpdateRetryIntv)) 1706 } else { 1707 // Reset the events since we successfully sent them. 1708 batchEvents = []*structs.NodeEvent{} 1709 } 1710 case <-c.shutdownCh: 1711 return 1712 } 1713 } 1714 } 1715 1716 // triggerNodeEvent triggers a emit node event 1717 func (c *Client) triggerNodeEvent(nodeEvent *structs.NodeEvent) { 1718 select { 1719 case c.triggerEmitNodeEvent <- nodeEvent: 1720 // emit node event goroutine was released to execute 1721 default: 1722 // emit node event goroutine was already running 1723 } 1724 } 1725 1726 // retryRegisterNode is used to register the node or update the registration and 1727 // retry in case of failure. 1728 func (c *Client) retryRegisterNode() { 1729 for { 1730 err := c.registerNode() 1731 if err == nil { 1732 // Registered! 1733 return 1734 } 1735 1736 if err == noServersErr { 1737 c.logger.Debug("registration waiting on servers") 1738 c.triggerDiscovery() 1739 } else { 1740 c.logger.Error("error registering", "error", err) 1741 } 1742 select { 1743 case <-c.rpcRetryWatcher(): 1744 case <-time.After(c.retryIntv(registerRetryIntv)): 1745 case <-c.shutdownCh: 1746 return 1747 } 1748 } 1749 } 1750 1751 // registerNode is used to register the node or update the registration 1752 func (c *Client) registerNode() error { 1753 node := c.Node() 1754 req := structs.NodeRegisterRequest{ 1755 Node: node, 1756 WriteRequest: structs.WriteRequest{ 1757 Region: c.Region(), 1758 AuthToken: c.AuthToken(), 1759 }, 1760 } 1761 var resp structs.NodeUpdateResponse 1762 if err := c.RPC("Node.Register", &req, &resp); err != nil { 1763 return err 1764 } 1765 1766 // Update the node status to ready after we register. 1767 c.configLock.Lock() 1768 node.Status = structs.NodeStatusReady 1769 c.config.Node.Status = structs.NodeStatusReady 1770 c.configLock.Unlock() 1771 1772 c.logger.Info("node registration complete") 1773 if len(resp.EvalIDs) != 0 { 1774 c.logger.Debug("evaluations triggered by node registration", "num_evals", len(resp.EvalIDs)) 1775 } 1776 1777 c.heartbeatLock.Lock() 1778 defer c.heartbeatLock.Unlock() 1779 c.heartbeatStop.setLastOk(time.Now()) 1780 c.heartbeatTTL = resp.HeartbeatTTL 1781 return nil 1782 } 1783 1784 // updateNodeStatus is used to heartbeat and update the status of the node 1785 func (c *Client) updateNodeStatus() error { 1786 start := time.Now() 1787 req := structs.NodeUpdateStatusRequest{ 1788 NodeID: c.NodeID(), 1789 Status: structs.NodeStatusReady, 1790 WriteRequest: structs.WriteRequest{ 1791 Region: c.Region(), 1792 AuthToken: c.AuthToken(), 1793 }, 1794 } 1795 var resp structs.NodeUpdateResponse 1796 if err := c.RPC("Node.UpdateStatus", &req, &resp); err != nil { 1797 c.triggerDiscovery() 1798 return fmt.Errorf("failed to update status: %v", err) 1799 } 1800 end := time.Now() 1801 1802 if len(resp.EvalIDs) != 0 { 1803 c.logger.Debug("evaluations triggered by node update", "num_evals", len(resp.EvalIDs)) 1804 } 1805 1806 // Update the last heartbeat and the new TTL, capturing the old values 1807 c.heartbeatLock.Lock() 1808 last := c.lastHeartbeat() 1809 oldTTL := c.heartbeatTTL 1810 haveHeartbeated := c.haveHeartbeated 1811 c.heartbeatStop.setLastOk(time.Now()) 1812 c.heartbeatTTL = resp.HeartbeatTTL 1813 c.haveHeartbeated = true 1814 c.heartbeatLock.Unlock() 1815 c.logger.Trace("next heartbeat", "period", resp.HeartbeatTTL) 1816 1817 if resp.Index != 0 { 1818 c.logger.Debug("state updated", "node_status", req.Status) 1819 1820 // We have potentially missed our TTL log how delayed we were 1821 if haveHeartbeated { 1822 c.logger.Warn("missed heartbeat", 1823 "req_latency", end.Sub(start), "heartbeat_ttl", oldTTL, "since_last_heartbeat", time.Since(last)) 1824 } 1825 } 1826 1827 // Update the number of nodes in the cluster so we can adjust our server 1828 // rebalance rate. 1829 c.servers.SetNumNodes(resp.NumNodes) 1830 1831 // Convert []*NodeServerInfo to []*servers.Server 1832 nomadServers := make([]*servers.Server, 0, len(resp.Servers)) 1833 for _, s := range resp.Servers { 1834 addr, err := resolveServer(s.RPCAdvertiseAddr) 1835 if err != nil { 1836 c.logger.Warn("ignoring invalid server", "error", err, "server", s.RPCAdvertiseAddr) 1837 continue 1838 } 1839 e := &servers.Server{Addr: addr} 1840 nomadServers = append(nomadServers, e) 1841 } 1842 if len(nomadServers) == 0 { 1843 return fmt.Errorf("heartbeat response returned no valid servers") 1844 } 1845 c.servers.SetServers(nomadServers) 1846 1847 // Begin polling Consul if there is no Nomad leader. We could be 1848 // heartbeating to a Nomad server that is in the minority of a 1849 // partition of the Nomad server quorum, but this Nomad Agent still 1850 // has connectivity to the existing majority of Nomad Servers, but 1851 // only if it queries Consul. 1852 if resp.LeaderRPCAddr == "" { 1853 c.triggerDiscovery() 1854 } 1855 1856 return nil 1857 } 1858 1859 // AllocStateUpdated asynchronously updates the server with the current state 1860 // of an allocations and its tasks. 1861 func (c *Client) AllocStateUpdated(alloc *structs.Allocation) { 1862 if alloc.Terminated() { 1863 // Terminated, mark for GC if we're still tracking this alloc 1864 // runner. If it's not being tracked that means the server has 1865 // already GC'd it (see removeAlloc). 1866 ar, err := c.getAllocRunner(alloc.ID) 1867 1868 if err == nil { 1869 c.garbageCollector.MarkForCollection(alloc.ID, ar) 1870 1871 // Trigger a GC in case we're over thresholds and just 1872 // waiting for eligible allocs. 1873 c.garbageCollector.Trigger() 1874 } 1875 } 1876 1877 // Strip all the information that can be reconstructed at the server. Only 1878 // send the fields that are updatable by the client. 1879 stripped := new(structs.Allocation) 1880 stripped.ID = alloc.ID 1881 stripped.NodeID = c.NodeID() 1882 stripped.TaskStates = alloc.TaskStates 1883 stripped.ClientStatus = alloc.ClientStatus 1884 stripped.ClientDescription = alloc.ClientDescription 1885 stripped.DeploymentStatus = alloc.DeploymentStatus 1886 1887 select { 1888 case c.allocUpdates <- stripped: 1889 case <-c.shutdownCh: 1890 } 1891 } 1892 1893 // allocSync is a long lived function that batches allocation updates to the 1894 // server. 1895 func (c *Client) allocSync() { 1896 staggered := false 1897 syncTicker := time.NewTicker(allocSyncIntv) 1898 updates := make(map[string]*structs.Allocation) 1899 for { 1900 select { 1901 case <-c.shutdownCh: 1902 syncTicker.Stop() 1903 return 1904 case alloc := <-c.allocUpdates: 1905 // Batch the allocation updates until the timer triggers. 1906 updates[alloc.ID] = alloc 1907 case <-syncTicker.C: 1908 // Fast path if there are no updates 1909 if len(updates) == 0 { 1910 continue 1911 } 1912 1913 sync := make([]*structs.Allocation, 0, len(updates)) 1914 for _, alloc := range updates { 1915 sync = append(sync, alloc) 1916 } 1917 1918 // Send to server. 1919 args := structs.AllocUpdateRequest{ 1920 Alloc: sync, 1921 WriteRequest: structs.WriteRequest{ 1922 Region: c.Region(), 1923 AuthToken: c.AuthToken(), 1924 }, 1925 } 1926 1927 var resp structs.GenericResponse 1928 if err := c.RPC("Node.UpdateAlloc", &args, &resp); err != nil { 1929 c.logger.Error("error updating allocations", "error", err) 1930 syncTicker.Stop() 1931 syncTicker = time.NewTicker(c.retryIntv(allocSyncRetryIntv)) 1932 staggered = true 1933 } else { 1934 updates = make(map[string]*structs.Allocation) 1935 if staggered { 1936 syncTicker.Stop() 1937 syncTicker = time.NewTicker(allocSyncIntv) 1938 staggered = false 1939 } 1940 } 1941 } 1942 } 1943 } 1944 1945 // allocUpdates holds the results of receiving updated allocations from the 1946 // servers. 1947 type allocUpdates struct { 1948 // index is index of server store snapshot used for fetching alloc status 1949 index uint64 1950 1951 // pulled is the set of allocations that were downloaded from the servers. 1952 pulled map[string]*structs.Allocation 1953 1954 // filtered is the set of allocations that were not pulled because their 1955 // AllocModifyIndex didn't change. 1956 filtered map[string]struct{} 1957 1958 // migrateTokens are a list of tokens necessary for when clients pull data 1959 // from authorized volumes 1960 migrateTokens map[string]string 1961 } 1962 1963 // watchAllocations is used to scan for updates to allocations 1964 func (c *Client) watchAllocations(updates chan *allocUpdates) { 1965 // The request and response for getting the map of allocations that should 1966 // be running on the Node to their AllocModifyIndex which is incremented 1967 // when the allocation is updated by the servers. 1968 req := structs.NodeSpecificRequest{ 1969 NodeID: c.NodeID(), 1970 SecretID: c.secretNodeID(), 1971 QueryOptions: structs.QueryOptions{ 1972 Region: c.Region(), 1973 AllowStale: true, 1974 }, 1975 } 1976 var resp structs.NodeClientAllocsResponse 1977 1978 // The request and response for pulling down the set of allocations that are 1979 // new, or updated server side. 1980 allocsReq := structs.AllocsGetRequest{ 1981 QueryOptions: structs.QueryOptions{ 1982 Region: c.Region(), 1983 AllowStale: true, 1984 AuthToken: c.secretNodeID(), 1985 }, 1986 } 1987 var allocsResp structs.AllocsGetResponse 1988 1989 OUTER: 1990 for { 1991 // Get the allocation modify index map, blocking for updates. We will 1992 // use this to determine exactly what allocations need to be downloaded 1993 // in full. 1994 resp = structs.NodeClientAllocsResponse{} 1995 err := c.RPC("Node.GetClientAllocs", &req, &resp) 1996 if err != nil { 1997 // Shutdown often causes EOF errors, so check for shutdown first 1998 select { 1999 case <-c.shutdownCh: 2000 return 2001 default: 2002 } 2003 2004 // COMPAT: Remove in 0.6. This is to allow the case in which the 2005 // servers are not fully upgraded before the clients register. This 2006 // can cause the SecretID to be lost 2007 if strings.Contains(err.Error(), "node secret ID does not match") { 2008 c.logger.Debug("secret mismatch; re-registering node", "error", err) 2009 c.retryRegisterNode() 2010 } else if err != noServersErr { 2011 c.logger.Error("error querying node allocations", "error", err) 2012 } 2013 retry := c.retryIntv(getAllocRetryIntv) 2014 select { 2015 case <-c.rpcRetryWatcher(): 2016 continue 2017 case <-time.After(retry): 2018 continue 2019 case <-c.shutdownCh: 2020 return 2021 } 2022 } 2023 2024 // Check for shutdown 2025 select { 2026 case <-c.shutdownCh: 2027 return 2028 default: 2029 } 2030 2031 // Filter all allocations whose AllocModifyIndex was not incremented. 2032 // These are the allocations who have either not been updated, or whose 2033 // updates are a result of the client sending an update for the alloc. 2034 // This lets us reduce the network traffic to the server as we don't 2035 // need to pull all the allocations. 2036 var pull []string 2037 filtered := make(map[string]struct{}) 2038 var pullIndex uint64 2039 for allocID, modifyIndex := range resp.Allocs { 2040 // Pull the allocation if we don't have an alloc runner for the 2041 // allocation or if the alloc runner requires an updated allocation. 2042 //XXX Part of Client alloc index tracking exp 2043 c.allocLock.RLock() 2044 currentAR, ok := c.allocs[allocID] 2045 c.allocLock.RUnlock() 2046 2047 // Ignore alloc updates for allocs that are invalid because of initialization errors 2048 c.invalidAllocsLock.Lock() 2049 _, isInvalid := c.invalidAllocs[allocID] 2050 c.invalidAllocsLock.Unlock() 2051 2052 if (!ok || modifyIndex > currentAR.Alloc().AllocModifyIndex) && !isInvalid { 2053 // Only pull allocs that are required. Filtered 2054 // allocs might be at a higher index, so ignore 2055 // it. 2056 if modifyIndex > pullIndex { 2057 pullIndex = modifyIndex 2058 } 2059 pull = append(pull, allocID) 2060 } else { 2061 filtered[allocID] = struct{}{} 2062 } 2063 } 2064 2065 // Pull the allocations that passed filtering. 2066 allocsResp.Allocs = nil 2067 var pulledAllocs map[string]*structs.Allocation 2068 if len(pull) != 0 { 2069 // Pull the allocations that need to be updated. 2070 allocsReq.AllocIDs = pull 2071 allocsReq.MinQueryIndex = pullIndex - 1 2072 allocsResp = structs.AllocsGetResponse{} 2073 if err := c.RPC("Alloc.GetAllocs", &allocsReq, &allocsResp); err != nil { 2074 c.logger.Error("error querying updated allocations", "error", err) 2075 retry := c.retryIntv(getAllocRetryIntv) 2076 select { 2077 case <-c.rpcRetryWatcher(): 2078 continue 2079 case <-time.After(retry): 2080 continue 2081 case <-c.shutdownCh: 2082 return 2083 } 2084 } 2085 2086 // Ensure that we received all the allocations we wanted 2087 pulledAllocs = make(map[string]*structs.Allocation, len(allocsResp.Allocs)) 2088 for _, alloc := range allocsResp.Allocs { 2089 2090 // handle an old Server 2091 alloc.Canonicalize() 2092 2093 pulledAllocs[alloc.ID] = alloc 2094 } 2095 2096 for _, desiredID := range pull { 2097 if _, ok := pulledAllocs[desiredID]; !ok { 2098 // We didn't get everything we wanted. Do not update the 2099 // MinQueryIndex, sleep and then retry. 2100 wait := c.retryIntv(2 * time.Second) 2101 select { 2102 case <-time.After(wait): 2103 // Wait for the server we contact to receive the 2104 // allocations 2105 continue OUTER 2106 case <-c.shutdownCh: 2107 return 2108 } 2109 } 2110 } 2111 2112 // Check for shutdown 2113 select { 2114 case <-c.shutdownCh: 2115 return 2116 default: 2117 } 2118 } 2119 2120 c.logger.Debug("updated allocations", "index", resp.Index, 2121 "total", len(resp.Allocs), "pulled", len(allocsResp.Allocs), "filtered", len(filtered)) 2122 2123 // Update the query index. 2124 if resp.Index > req.MinQueryIndex { 2125 req.MinQueryIndex = resp.Index 2126 } 2127 2128 // Push the updates. 2129 update := &allocUpdates{ 2130 filtered: filtered, 2131 pulled: pulledAllocs, 2132 migrateTokens: resp.MigrateTokens, 2133 index: resp.Index, 2134 } 2135 2136 select { 2137 case updates <- update: 2138 case <-c.shutdownCh: 2139 return 2140 } 2141 } 2142 } 2143 2144 // updateNode updates the Node copy and triggers the client to send the updated 2145 // Node to the server. This should be done while the caller holds the 2146 // configLock lock. 2147 func (c *Client) updateNodeLocked() { 2148 // Update the config copy. 2149 node := c.config.Node.Copy() 2150 c.configCopy.Node = node 2151 2152 select { 2153 case c.triggerNodeUpdate <- struct{}{}: 2154 // Node update goroutine was released to execute 2155 default: 2156 // Node update goroutine was already running 2157 } 2158 } 2159 2160 // watchNodeUpdates blocks until it is edge triggered. Once triggered, 2161 // it will update the client node copy and re-register the node. 2162 func (c *Client) watchNodeUpdates() { 2163 var hasChanged bool 2164 2165 timer := stoppedTimer() 2166 defer timer.Stop() 2167 2168 for { 2169 select { 2170 case <-timer.C: 2171 c.logger.Debug("state changed, updating node and re-registering") 2172 c.retryRegisterNode() 2173 hasChanged = false 2174 case <-c.triggerNodeUpdate: 2175 if hasChanged { 2176 continue 2177 } 2178 hasChanged = true 2179 timer.Reset(c.retryIntv(nodeUpdateRetryIntv)) 2180 case <-c.shutdownCh: 2181 return 2182 } 2183 } 2184 } 2185 2186 // runAllocs is invoked when we get an updated set of allocations 2187 func (c *Client) runAllocs(update *allocUpdates) { 2188 // Get the existing allocs 2189 c.allocLock.RLock() 2190 existing := make(map[string]uint64, len(c.allocs)) 2191 for id, ar := range c.allocs { 2192 existing[id] = ar.Alloc().AllocModifyIndex 2193 } 2194 c.allocLock.RUnlock() 2195 2196 // Diff the existing and updated allocations 2197 diff := diffAllocs(existing, update) 2198 c.logger.Debug("allocation updates", "added", len(diff.added), "removed", len(diff.removed), 2199 "updated", len(diff.updated), "ignored", len(diff.ignore)) 2200 2201 errs := 0 2202 2203 // Remove the old allocations 2204 for _, remove := range diff.removed { 2205 c.removeAlloc(remove) 2206 } 2207 2208 // Update the existing allocations 2209 for _, update := range diff.updated { 2210 c.logger.Trace("updating alloc", "alloc_id", update.ID, "index", update.AllocModifyIndex) 2211 c.updateAlloc(update) 2212 } 2213 2214 // Make room for new allocations before running 2215 if err := c.garbageCollector.MakeRoomFor(diff.added); err != nil { 2216 c.logger.Error("error making room for new allocations", "error", err) 2217 errs++ 2218 } 2219 2220 // Start the new allocations 2221 for _, add := range diff.added { 2222 migrateToken := update.migrateTokens[add.ID] 2223 if err := c.addAlloc(add, migrateToken); err != nil { 2224 c.logger.Error("error adding alloc", "error", err, "alloc_id", add.ID) 2225 errs++ 2226 // We mark the alloc as failed and send an update to the server 2227 // We track the fact that creating an allocrunner failed so that we don't send updates again 2228 if add.ClientStatus != structs.AllocClientStatusFailed { 2229 c.handleInvalidAllocs(add, err) 2230 } 2231 } 2232 } 2233 2234 // Mark servers as having been contacted so blocked tasks that failed 2235 // to restore can now restart. 2236 c.serversContactedOnce.Do(func() { 2237 close(c.serversContactedCh) 2238 }) 2239 2240 // Trigger the GC once more now that new allocs are started that could 2241 // have caused thresholds to be exceeded 2242 c.garbageCollector.Trigger() 2243 c.logger.Debug("allocation updates applied", "added", len(diff.added), "removed", len(diff.removed), 2244 "updated", len(diff.updated), "ignored", len(diff.ignore), "errors", errs) 2245 } 2246 2247 // makeFailedAlloc creates a stripped down version of the allocation passed in 2248 // with its status set to failed and other fields needed for the server to be 2249 // able to examine deployment and task states 2250 func makeFailedAlloc(add *structs.Allocation, err error) *structs.Allocation { 2251 stripped := new(structs.Allocation) 2252 stripped.ID = add.ID 2253 stripped.NodeID = add.NodeID 2254 stripped.ClientStatus = structs.AllocClientStatusFailed 2255 stripped.ClientDescription = fmt.Sprintf("Unable to add allocation due to error: %v", err) 2256 2257 // Copy task states if it exists in the original allocation 2258 if add.TaskStates != nil { 2259 stripped.TaskStates = add.TaskStates 2260 } else { 2261 stripped.TaskStates = make(map[string]*structs.TaskState) 2262 } 2263 2264 failTime := time.Now() 2265 if add.DeploymentStatus.HasHealth() { 2266 // Never change deployment health once it has been set 2267 stripped.DeploymentStatus = add.DeploymentStatus.Copy() 2268 } else { 2269 stripped.DeploymentStatus = &structs.AllocDeploymentStatus{ 2270 Healthy: helper.BoolToPtr(false), 2271 Timestamp: failTime, 2272 } 2273 } 2274 2275 taskGroup := add.Job.LookupTaskGroup(add.TaskGroup) 2276 if taskGroup == nil { 2277 return stripped 2278 } 2279 for _, task := range taskGroup.Tasks { 2280 ts, ok := stripped.TaskStates[task.Name] 2281 if !ok { 2282 ts = &structs.TaskState{} 2283 stripped.TaskStates[task.Name] = ts 2284 } 2285 if ts.FinishedAt.IsZero() { 2286 ts.FinishedAt = failTime 2287 } 2288 } 2289 return stripped 2290 } 2291 2292 // removeAlloc is invoked when we should remove an allocation because it has 2293 // been removed by the server. 2294 func (c *Client) removeAlloc(allocID string) { 2295 c.allocLock.Lock() 2296 defer c.allocLock.Unlock() 2297 2298 ar, ok := c.allocs[allocID] 2299 if !ok { 2300 c.invalidAllocsLock.Lock() 2301 if _, ok := c.invalidAllocs[allocID]; ok { 2302 // Removing from invalid allocs map if present 2303 delete(c.invalidAllocs, allocID) 2304 } else { 2305 // Alloc is unknown, log a warning. 2306 c.logger.Warn("cannot remove nonexistent alloc", "alloc_id", allocID, "error", "alloc not found") 2307 } 2308 c.invalidAllocsLock.Unlock() 2309 return 2310 } 2311 2312 // Stop tracking alloc runner as it's been GC'd by the server 2313 delete(c.allocs, allocID) 2314 2315 // Ensure the GC has a reference and then collect. Collecting through the GC 2316 // applies rate limiting 2317 c.garbageCollector.MarkForCollection(allocID, ar) 2318 2319 // GC immediately since the server has GC'd it 2320 go c.garbageCollector.Collect(allocID) 2321 } 2322 2323 // updateAlloc is invoked when we should update an allocation 2324 func (c *Client) updateAlloc(update *structs.Allocation) { 2325 ar, err := c.getAllocRunner(update.ID) 2326 if err != nil { 2327 c.logger.Warn("cannot update nonexistent alloc", "alloc_id", update.ID) 2328 return 2329 } 2330 2331 // Update local copy of alloc 2332 if err := c.stateDB.PutAllocation(update); err != nil { 2333 c.logger.Error("error persisting updated alloc locally", "error", err, "alloc_id", update.ID) 2334 } 2335 2336 // Update alloc runner 2337 ar.Update(update) 2338 } 2339 2340 // addAlloc is invoked when we should add an allocation 2341 func (c *Client) addAlloc(alloc *structs.Allocation, migrateToken string) error { 2342 c.allocLock.Lock() 2343 defer c.allocLock.Unlock() 2344 2345 // Check if we already have an alloc runner 2346 if _, ok := c.allocs[alloc.ID]; ok { 2347 c.logger.Debug("dropping duplicate add allocation request", "alloc_id", alloc.ID) 2348 return nil 2349 } 2350 2351 // Initialize local copy of alloc before creating the alloc runner so 2352 // we can't end up with an alloc runner that does not have an alloc. 2353 if err := c.stateDB.PutAllocation(alloc); err != nil { 2354 return err 2355 } 2356 2357 // Collect any preempted allocations to pass into the previous alloc watcher 2358 var preemptedAllocs map[string]allocwatcher.AllocRunnerMeta 2359 if len(alloc.PreemptedAllocations) > 0 { 2360 preemptedAllocs = make(map[string]allocwatcher.AllocRunnerMeta) 2361 for _, palloc := range alloc.PreemptedAllocations { 2362 preemptedAllocs[palloc] = c.allocs[palloc] 2363 } 2364 } 2365 2366 // Since only the Client has access to other AllocRunners and the RPC 2367 // client, create the previous allocation watcher here. 2368 watcherConfig := allocwatcher.Config{ 2369 Alloc: alloc, 2370 PreviousRunner: c.allocs[alloc.PreviousAllocation], 2371 PreemptedRunners: preemptedAllocs, 2372 RPC: c, 2373 Config: c.configCopy, 2374 MigrateToken: migrateToken, 2375 Logger: c.logger, 2376 } 2377 prevAllocWatcher, prevAllocMigrator := allocwatcher.NewAllocWatcher(watcherConfig) 2378 2379 // Copy the config since the node can be swapped out as it is being updated. 2380 // The long term fix is to pass in the config and node separately and then 2381 // we don't have to do a copy. 2382 c.configLock.RLock() 2383 arConf := &allocrunner.Config{ 2384 Alloc: alloc, 2385 Logger: c.logger, 2386 ClientConfig: c.configCopy, 2387 StateDB: c.stateDB, 2388 Consul: c.consulService, 2389 ConsulSI: c.tokensClient, 2390 Vault: c.vaultClient, 2391 StateUpdater: c, 2392 DeviceStatsReporter: c, 2393 PrevAllocWatcher: prevAllocWatcher, 2394 PrevAllocMigrator: prevAllocMigrator, 2395 DynamicRegistry: c.dynamicRegistry, 2396 CSIManager: c.csimanager, 2397 DeviceManager: c.devicemanager, 2398 DriverManager: c.drivermanager, 2399 RPCClient: c, 2400 } 2401 c.configLock.RUnlock() 2402 2403 ar, err := allocrunner.NewAllocRunner(arConf) 2404 if err != nil { 2405 return err 2406 } 2407 2408 // Store the alloc runner. 2409 c.allocs[alloc.ID] = ar 2410 2411 // Maybe mark the alloc for halt on missing server heartbeats 2412 c.heartbeatStop.allocHook(alloc) 2413 2414 go ar.Run() 2415 return nil 2416 } 2417 2418 // setupConsulTokenClient configures a tokenClient for managing consul service 2419 // identity tokens. 2420 func (c *Client) setupConsulTokenClient() error { 2421 tc := consulApi.NewIdentitiesClient(c.logger, c.deriveSIToken) 2422 c.tokensClient = tc 2423 return nil 2424 } 2425 2426 // setupVaultClient creates an object to periodically renew tokens and secrets 2427 // with vault. 2428 func (c *Client) setupVaultClient() error { 2429 var err error 2430 c.vaultClient, err = vaultclient.NewVaultClient(c.config.VaultConfig, c.logger, c.deriveToken) 2431 if err != nil { 2432 return err 2433 } 2434 2435 if c.vaultClient == nil { 2436 c.logger.Error("failed to create vault client") 2437 return fmt.Errorf("failed to create vault client") 2438 } 2439 2440 // Start renewing tokens and secrets 2441 c.vaultClient.Start() 2442 2443 return nil 2444 } 2445 2446 // deriveToken takes in an allocation and a set of tasks and derives vault 2447 // tokens for each of the tasks, unwraps all of them using the supplied vault 2448 // client and returns a map of unwrapped tokens, indexed by the task name. 2449 func (c *Client) deriveToken(alloc *structs.Allocation, taskNames []string, vclient *vaultapi.Client) (map[string]string, error) { 2450 vlogger := c.logger.Named("vault") 2451 2452 verifiedTasks, err := verifiedTasks(vlogger, alloc, taskNames) 2453 if err != nil { 2454 return nil, err 2455 } 2456 2457 // DeriveVaultToken of nomad server can take in a set of tasks and 2458 // creates tokens for all the tasks. 2459 req := &structs.DeriveVaultTokenRequest{ 2460 NodeID: c.NodeID(), 2461 SecretID: c.secretNodeID(), 2462 AllocID: alloc.ID, 2463 Tasks: verifiedTasks, 2464 QueryOptions: structs.QueryOptions{ 2465 Region: c.Region(), 2466 AllowStale: false, 2467 }, 2468 } 2469 2470 // Derive the tokens 2471 var resp structs.DeriveVaultTokenResponse 2472 if err := c.RPC("Node.DeriveVaultToken", &req, &resp); err != nil { 2473 vlogger.Error("error making derive token RPC", "error", err) 2474 return nil, fmt.Errorf("DeriveVaultToken RPC failed: %v", err) 2475 } 2476 if resp.Error != nil { 2477 vlogger.Error("error deriving vault tokens", "error", resp.Error) 2478 return nil, structs.NewWrappedServerError(resp.Error) 2479 } 2480 if resp.Tasks == nil { 2481 vlogger.Error("error derivng vault token", "error", "invalid response") 2482 return nil, fmt.Errorf("failed to derive vault tokens: invalid response") 2483 } 2484 2485 unwrappedTokens := make(map[string]string) 2486 2487 // Retrieve the wrapped tokens from the response and unwrap it 2488 for _, taskName := range verifiedTasks { 2489 // Get the wrapped token 2490 wrappedToken, ok := resp.Tasks[taskName] 2491 if !ok { 2492 vlogger.Error("wrapped token missing for task", "task_name", taskName) 2493 return nil, fmt.Errorf("wrapped token missing for task %q", taskName) 2494 } 2495 2496 // Unwrap the vault token 2497 unwrapResp, err := vclient.Logical().Unwrap(wrappedToken) 2498 if err != nil { 2499 if structs.VaultUnrecoverableError.MatchString(err.Error()) { 2500 return nil, err 2501 } 2502 2503 // The error is recoverable 2504 return nil, structs.NewRecoverableError( 2505 fmt.Errorf("failed to unwrap the token for task %q: %v", taskName, err), true) 2506 } 2507 2508 // Validate the response 2509 var validationErr error 2510 if unwrapResp == nil { 2511 validationErr = fmt.Errorf("Vault returned nil secret when unwrapping") 2512 } else if unwrapResp.Auth == nil { 2513 validationErr = fmt.Errorf("Vault returned unwrap secret with nil Auth. Secret warnings: %v", unwrapResp.Warnings) 2514 } else if unwrapResp.Auth.ClientToken == "" { 2515 validationErr = fmt.Errorf("Vault returned unwrap secret with empty Auth.ClientToken. Secret warnings: %v", unwrapResp.Warnings) 2516 } 2517 if validationErr != nil { 2518 vlogger.Warn("error unwrapping token", "error", err) 2519 return nil, structs.NewRecoverableError(validationErr, true) 2520 } 2521 2522 // Append the unwrapped token to the return value 2523 unwrappedTokens[taskName] = unwrapResp.Auth.ClientToken 2524 } 2525 2526 return unwrappedTokens, nil 2527 } 2528 2529 // deriveSIToken takes an allocation and a set of tasks and derives Consul 2530 // Service Identity tokens for each of the tasks by requesting them from the 2531 // Nomad Server. 2532 func (c *Client) deriveSIToken(alloc *structs.Allocation, taskNames []string) (map[string]string, error) { 2533 tasks, err := verifiedTasks(c.logger, alloc, taskNames) 2534 if err != nil { 2535 return nil, err 2536 } 2537 2538 req := &structs.DeriveSITokenRequest{ 2539 NodeID: c.NodeID(), 2540 SecretID: c.secretNodeID(), 2541 AllocID: alloc.ID, 2542 Tasks: tasks, 2543 QueryOptions: structs.QueryOptions{Region: c.Region()}, 2544 } 2545 2546 // Nicely ask Nomad Server for the tokens. 2547 var resp structs.DeriveSITokenResponse 2548 if err := c.RPC("Node.DeriveSIToken", &req, &resp); err != nil { 2549 c.logger.Error("error making derive token RPC", "error", err) 2550 return nil, fmt.Errorf("DeriveSIToken RPC failed: %v", err) 2551 } 2552 if err := resp.Error; err != nil { 2553 c.logger.Error("error deriving SI tokens", "error", err) 2554 return nil, structs.NewWrappedServerError(err) 2555 } 2556 if len(resp.Tokens) == 0 { 2557 c.logger.Error("error deriving SI tokens", "error", "invalid_response") 2558 return nil, fmt.Errorf("failed to derive SI tokens: invalid response") 2559 } 2560 2561 // NOTE: Unlike with the Vault integration, Nomad Server replies with the 2562 // actual Consul SI token (.SecretID), because otherwise each Nomad 2563 // Client would need to be blessed with 'acl:write' permissions to read the 2564 // secret value given the .AccessorID, which does not fit well in the Consul 2565 // security model. 2566 // 2567 // https://www.consul.io/api/acl/tokens.html#read-a-token 2568 // https://www.consul.io/docs/internals/security.html 2569 2570 m := helper.CopyMapStringString(resp.Tokens) 2571 return m, nil 2572 } 2573 2574 // verifiedTasks asserts each task in taskNames actually exists in the given alloc, 2575 // otherwise an error is returned. 2576 func verifiedTasks(logger hclog.Logger, alloc *structs.Allocation, taskNames []string) ([]string, error) { 2577 if alloc == nil { 2578 return nil, fmt.Errorf("nil allocation") 2579 } 2580 2581 if len(taskNames) == 0 { 2582 return nil, fmt.Errorf("missing task names") 2583 } 2584 2585 group := alloc.Job.LookupTaskGroup(alloc.TaskGroup) 2586 if group == nil { 2587 return nil, fmt.Errorf("group name in allocation is not present in job") 2588 } 2589 2590 verifiedTasks := make([]string, 0, len(taskNames)) 2591 2592 // confirm the requested task names actually exist in the allocation 2593 for _, taskName := range taskNames { 2594 if !taskIsPresent(taskName, group.Tasks) { 2595 logger.Error("task not found in the allocation", "task_name", taskName) 2596 return nil, fmt.Errorf("task %q not found in allocation", taskName) 2597 } 2598 verifiedTasks = append(verifiedTasks, taskName) 2599 } 2600 2601 return verifiedTasks, nil 2602 } 2603 2604 func taskIsPresent(taskName string, tasks []*structs.Task) bool { 2605 for _, task := range tasks { 2606 if task.Name == taskName { 2607 return true 2608 } 2609 } 2610 return false 2611 } 2612 2613 // triggerDiscovery causes a Consul discovery to begin (if one hasn't already) 2614 func (c *Client) triggerDiscovery() { 2615 select { 2616 case c.triggerDiscoveryCh <- struct{}{}: 2617 // Discovery goroutine was released to execute 2618 default: 2619 // Discovery goroutine was already running 2620 } 2621 } 2622 2623 // consulDiscovery waits for the signal to attempt server discovery via Consul. 2624 // It's intended to be started in a goroutine. See triggerDiscovery() for 2625 // causing consul discovery from other code locations. 2626 func (c *Client) consulDiscovery() { 2627 for { 2628 select { 2629 case <-c.triggerDiscoveryCh: 2630 if err := c.consulDiscoveryImpl(); err != nil { 2631 c.logger.Error("error discovering nomad servers", "error", err) 2632 } 2633 case <-c.shutdownCh: 2634 return 2635 } 2636 } 2637 } 2638 2639 func (c *Client) consulDiscoveryImpl() error { 2640 consulLogger := c.logger.Named("consul") 2641 2642 dcs, err := c.consulCatalog.Datacenters() 2643 if err != nil { 2644 return fmt.Errorf("client.consul: unable to query Consul datacenters: %v", err) 2645 } 2646 if len(dcs) > 2 { 2647 // Query the local DC first, then shuffle the 2648 // remaining DCs. Future heartbeats will cause Nomad 2649 // Clients to fixate on their local datacenter so 2650 // it's okay to talk with remote DCs. If the no 2651 // Nomad servers are available within 2652 // datacenterQueryLimit, the next heartbeat will pick 2653 // a new set of servers so it's okay. 2654 shuffleStrings(dcs[1:]) 2655 dcs = dcs[0:lib.MinInt(len(dcs), datacenterQueryLimit)] 2656 } 2657 2658 // Query for servers in this client's region only 2659 region := c.Region() 2660 rpcargs := structs.GenericRequest{ 2661 QueryOptions: structs.QueryOptions{ 2662 Region: region, 2663 }, 2664 } 2665 2666 serviceName := c.configCopy.ConsulConfig.ServerServiceName 2667 var mErr multierror.Error 2668 var nomadServers servers.Servers 2669 consulLogger.Debug("bootstrap contacting Consul DCs", "consul_dcs", dcs) 2670 DISCOLOOP: 2671 for _, dc := range dcs { 2672 consulOpts := &consulapi.QueryOptions{ 2673 AllowStale: true, 2674 Datacenter: dc, 2675 Near: "_agent", 2676 WaitTime: consul.DefaultQueryWaitDuration, 2677 } 2678 consulServices, _, err := c.consulCatalog.Service(serviceName, consul.ServiceTagRPC, consulOpts) 2679 if err != nil { 2680 mErr.Errors = append(mErr.Errors, fmt.Errorf("unable to query service %+q from Consul datacenter %+q: %v", serviceName, dc, err)) 2681 continue 2682 } 2683 2684 for _, s := range consulServices { 2685 port := strconv.Itoa(s.ServicePort) 2686 addrstr := s.ServiceAddress 2687 if addrstr == "" { 2688 addrstr = s.Address 2689 } 2690 addr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(addrstr, port)) 2691 if err != nil { 2692 mErr.Errors = append(mErr.Errors, err) 2693 continue 2694 } 2695 var peers []string 2696 if err := c.connPool.RPC(region, addr, c.RPCMajorVersion(), "Status.Peers", rpcargs, &peers); err != nil { 2697 mErr.Errors = append(mErr.Errors, err) 2698 continue 2699 } 2700 2701 // Successfully received the Server peers list of the correct 2702 // region 2703 for _, p := range peers { 2704 addr, err := net.ResolveTCPAddr("tcp", p) 2705 if err != nil { 2706 mErr.Errors = append(mErr.Errors, err) 2707 } 2708 srv := &servers.Server{Addr: addr} 2709 nomadServers = append(nomadServers, srv) 2710 } 2711 if len(nomadServers) > 0 { 2712 break DISCOLOOP 2713 } 2714 } 2715 } 2716 if len(nomadServers) == 0 { 2717 if len(mErr.Errors) > 0 { 2718 return mErr.ErrorOrNil() 2719 } 2720 return fmt.Errorf("no Nomad Servers advertising service %q in Consul datacenters: %+q", serviceName, dcs) 2721 } 2722 2723 consulLogger.Info("discovered following servers", "servers", nomadServers) 2724 2725 // Fire the retry trigger if we have updated the set of servers. 2726 if c.servers.SetServers(nomadServers) { 2727 // Start rebalancing 2728 c.servers.RebalanceServers() 2729 2730 // Notify waiting rpc calls. If a goroutine just failed an RPC call and 2731 // isn't receiving on this chan yet they'll still retry eventually. 2732 // This is a shortcircuit for the longer retry intervals. 2733 c.fireRpcRetryWatcher() 2734 } 2735 2736 return nil 2737 } 2738 2739 // emitStats collects host resource usage stats periodically 2740 func (c *Client) emitStats() { 2741 // Determining NodeClass to be emitted 2742 var emittedNodeClass string 2743 if emittedNodeClass = c.Node().NodeClass; emittedNodeClass == "" { 2744 emittedNodeClass = "none" 2745 } 2746 2747 // Assign labels directly before emitting stats so the information expected 2748 // is ready 2749 c.baseLabels = []metrics.Label{ 2750 {Name: "node_id", Value: c.NodeID()}, 2751 {Name: "datacenter", Value: c.Datacenter()}, 2752 {Name: "node_class", Value: emittedNodeClass}, 2753 } 2754 2755 // Start collecting host stats right away and then keep collecting every 2756 // collection interval 2757 next := time.NewTimer(0) 2758 defer next.Stop() 2759 for { 2760 select { 2761 case <-next.C: 2762 err := c.hostStatsCollector.Collect() 2763 next.Reset(c.config.StatsCollectionInterval) 2764 if err != nil { 2765 c.logger.Warn("error fetching host resource usage stats", "error", err) 2766 } else { 2767 // Publish Node metrics if operator has opted in 2768 if c.config.PublishNodeMetrics { 2769 c.emitHostStats() 2770 } 2771 } 2772 2773 c.emitClientMetrics() 2774 case <-c.shutdownCh: 2775 return 2776 } 2777 } 2778 } 2779 2780 // setGaugeForMemoryStats proxies metrics for memory specific statistics 2781 func (c *Client) setGaugeForMemoryStats(nodeID string, hStats *stats.HostStats, baseLabels []metrics.Label) { 2782 if !c.config.DisableTaggedMetrics { 2783 metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "total"}, float32(hStats.Memory.Total), baseLabels) 2784 metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "available"}, float32(hStats.Memory.Available), baseLabels) 2785 metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "used"}, float32(hStats.Memory.Used), baseLabels) 2786 metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "free"}, float32(hStats.Memory.Free), baseLabels) 2787 } 2788 2789 if c.config.BackwardsCompatibleMetrics { 2790 metrics.SetGauge([]string{"client", "host", "memory", nodeID, "total"}, float32(hStats.Memory.Total)) 2791 metrics.SetGauge([]string{"client", "host", "memory", nodeID, "available"}, float32(hStats.Memory.Available)) 2792 metrics.SetGauge([]string{"client", "host", "memory", nodeID, "used"}, float32(hStats.Memory.Used)) 2793 metrics.SetGauge([]string{"client", "host", "memory", nodeID, "free"}, float32(hStats.Memory.Free)) 2794 } 2795 } 2796 2797 // setGaugeForCPUStats proxies metrics for CPU specific statistics 2798 func (c *Client) setGaugeForCPUStats(nodeID string, hStats *stats.HostStats, baseLabels []metrics.Label) { 2799 for _, cpu := range hStats.CPU { 2800 if !c.config.DisableTaggedMetrics { 2801 labels := append(baseLabels, metrics.Label{ 2802 Name: "cpu", 2803 Value: cpu.CPU, 2804 }) 2805 2806 metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "total"}, float32(cpu.Total), labels) 2807 metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "user"}, float32(cpu.User), labels) 2808 metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "idle"}, float32(cpu.Idle), labels) 2809 metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "system"}, float32(cpu.System), labels) 2810 } 2811 2812 if c.config.BackwardsCompatibleMetrics { 2813 metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "total"}, float32(cpu.Total)) 2814 metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "user"}, float32(cpu.User)) 2815 metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "idle"}, float32(cpu.Idle)) 2816 metrics.SetGauge([]string{"client", "host", "cpu", nodeID, cpu.CPU, "system"}, float32(cpu.System)) 2817 } 2818 } 2819 } 2820 2821 // setGaugeForDiskStats proxies metrics for disk specific statistics 2822 func (c *Client) setGaugeForDiskStats(nodeID string, hStats *stats.HostStats, baseLabels []metrics.Label) { 2823 for _, disk := range hStats.DiskStats { 2824 if !c.config.DisableTaggedMetrics { 2825 labels := append(baseLabels, metrics.Label{ 2826 Name: "disk", 2827 Value: disk.Device, 2828 }) 2829 2830 metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "size"}, float32(disk.Size), labels) 2831 metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "used"}, float32(disk.Used), labels) 2832 metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "available"}, float32(disk.Available), labels) 2833 metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "used_percent"}, float32(disk.UsedPercent), labels) 2834 metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "inodes_percent"}, float32(disk.InodesUsedPercent), labels) 2835 } 2836 2837 if c.config.BackwardsCompatibleMetrics { 2838 metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "size"}, float32(disk.Size)) 2839 metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "used"}, float32(disk.Used)) 2840 metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "available"}, float32(disk.Available)) 2841 metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "used_percent"}, float32(disk.UsedPercent)) 2842 metrics.SetGauge([]string{"client", "host", "disk", nodeID, disk.Device, "inodes_percent"}, float32(disk.InodesUsedPercent)) 2843 } 2844 } 2845 } 2846 2847 // setGaugeForAllocationStats proxies metrics for allocation specific statistics 2848 func (c *Client) setGaugeForAllocationStats(nodeID string) { 2849 c.configLock.RLock() 2850 node := c.configCopy.Node 2851 c.configLock.RUnlock() 2852 total := node.NodeResources 2853 res := node.ReservedResources 2854 allocated := c.getAllocatedResources(node) 2855 2856 // Emit allocated 2857 if !c.config.DisableTaggedMetrics { 2858 metrics.SetGaugeWithLabels([]string{"client", "allocated", "memory"}, float32(allocated.Flattened.Memory.MemoryMB), c.baseLabels) 2859 metrics.SetGaugeWithLabels([]string{"client", "allocated", "disk"}, float32(allocated.Shared.DiskMB), c.baseLabels) 2860 metrics.SetGaugeWithLabels([]string{"client", "allocated", "cpu"}, float32(allocated.Flattened.Cpu.CpuShares), c.baseLabels) 2861 } 2862 2863 if c.config.BackwardsCompatibleMetrics { 2864 metrics.SetGauge([]string{"client", "allocated", "memory", nodeID}, float32(allocated.Flattened.Memory.MemoryMB)) 2865 metrics.SetGauge([]string{"client", "allocated", "disk", nodeID}, float32(allocated.Shared.DiskMB)) 2866 metrics.SetGauge([]string{"client", "allocated", "cpu", nodeID}, float32(allocated.Flattened.Cpu.CpuShares)) 2867 } 2868 2869 for _, n := range allocated.Flattened.Networks { 2870 if !c.config.DisableTaggedMetrics { 2871 labels := append(c.baseLabels, metrics.Label{ 2872 Name: "device", 2873 Value: n.Device, 2874 }) 2875 metrics.SetGaugeWithLabels([]string{"client", "allocated", "network"}, float32(n.MBits), labels) 2876 } 2877 2878 if c.config.BackwardsCompatibleMetrics { 2879 metrics.SetGauge([]string{"client", "allocated", "network", n.Device, nodeID}, float32(n.MBits)) 2880 } 2881 } 2882 2883 // Emit unallocated 2884 unallocatedMem := total.Memory.MemoryMB - res.Memory.MemoryMB - allocated.Flattened.Memory.MemoryMB 2885 unallocatedDisk := total.Disk.DiskMB - res.Disk.DiskMB - allocated.Shared.DiskMB 2886 unallocatedCpu := total.Cpu.CpuShares - res.Cpu.CpuShares - allocated.Flattened.Cpu.CpuShares 2887 2888 if !c.config.DisableTaggedMetrics { 2889 metrics.SetGaugeWithLabels([]string{"client", "unallocated", "memory"}, float32(unallocatedMem), c.baseLabels) 2890 metrics.SetGaugeWithLabels([]string{"client", "unallocated", "disk"}, float32(unallocatedDisk), c.baseLabels) 2891 metrics.SetGaugeWithLabels([]string{"client", "unallocated", "cpu"}, float32(unallocatedCpu), c.baseLabels) 2892 } 2893 2894 if c.config.BackwardsCompatibleMetrics { 2895 metrics.SetGauge([]string{"client", "unallocated", "memory", nodeID}, float32(unallocatedMem)) 2896 metrics.SetGauge([]string{"client", "unallocated", "disk", nodeID}, float32(unallocatedDisk)) 2897 metrics.SetGauge([]string{"client", "unallocated", "cpu", nodeID}, float32(unallocatedCpu)) 2898 } 2899 2900 totalComparable := total.Comparable() 2901 for _, n := range totalComparable.Flattened.Networks { 2902 // Determined the used resources 2903 var usedMbits int 2904 totalIdx := allocated.Flattened.Networks.NetIndex(n) 2905 if totalIdx != -1 { 2906 usedMbits = allocated.Flattened.Networks[totalIdx].MBits 2907 } 2908 2909 unallocatedMbits := n.MBits - usedMbits 2910 if !c.config.DisableTaggedMetrics { 2911 labels := append(c.baseLabels, metrics.Label{ 2912 Name: "device", 2913 Value: n.Device, 2914 }) 2915 metrics.SetGaugeWithLabels([]string{"client", "unallocated", "network"}, float32(unallocatedMbits), labels) 2916 } 2917 2918 if c.config.BackwardsCompatibleMetrics { 2919 metrics.SetGauge([]string{"client", "unallocated", "network", n.Device, nodeID}, float32(unallocatedMbits)) 2920 } 2921 } 2922 } 2923 2924 // No labels are required so we emit with only a key/value syntax 2925 func (c *Client) setGaugeForUptime(hStats *stats.HostStats, baseLabels []metrics.Label) { 2926 if !c.config.DisableTaggedMetrics { 2927 metrics.SetGaugeWithLabels([]string{"client", "uptime"}, float32(hStats.Uptime), baseLabels) 2928 } 2929 if c.config.BackwardsCompatibleMetrics { 2930 metrics.SetGauge([]string{"client", "uptime"}, float32(hStats.Uptime)) 2931 } 2932 } 2933 2934 // emitHostStats pushes host resource usage stats to remote metrics collection sinks 2935 func (c *Client) emitHostStats() { 2936 nodeID := c.NodeID() 2937 hStats := c.hostStatsCollector.Stats() 2938 2939 c.configLock.RLock() 2940 nodeStatus := c.configCopy.Node.Status 2941 nodeEligibility := c.configCopy.Node.SchedulingEligibility 2942 c.configLock.RUnlock() 2943 2944 labels := append(c.baseLabels, 2945 metrics.Label{Name: "node_status", Value: nodeStatus}, 2946 metrics.Label{Name: "node_scheduling_eligibility", Value: nodeEligibility}, 2947 ) 2948 2949 c.setGaugeForMemoryStats(nodeID, hStats, labels) 2950 c.setGaugeForUptime(hStats, labels) 2951 c.setGaugeForCPUStats(nodeID, hStats, labels) 2952 c.setGaugeForDiskStats(nodeID, hStats, labels) 2953 } 2954 2955 // emitClientMetrics emits lower volume client metrics 2956 func (c *Client) emitClientMetrics() { 2957 nodeID := c.NodeID() 2958 2959 c.setGaugeForAllocationStats(nodeID) 2960 2961 // Emit allocation metrics 2962 blocked, migrating, pending, running, terminal := 0, 0, 0, 0, 0 2963 for _, ar := range c.getAllocRunners() { 2964 switch ar.AllocState().ClientStatus { 2965 case structs.AllocClientStatusPending: 2966 switch { 2967 case ar.IsWaiting(): 2968 blocked++ 2969 case ar.IsMigrating(): 2970 migrating++ 2971 default: 2972 pending++ 2973 } 2974 case structs.AllocClientStatusRunning: 2975 running++ 2976 case structs.AllocClientStatusComplete, structs.AllocClientStatusFailed: 2977 terminal++ 2978 } 2979 } 2980 2981 if !c.config.DisableTaggedMetrics { 2982 metrics.SetGaugeWithLabels([]string{"client", "allocations", "migrating"}, float32(migrating), c.baseLabels) 2983 metrics.SetGaugeWithLabels([]string{"client", "allocations", "blocked"}, float32(blocked), c.baseLabels) 2984 metrics.SetGaugeWithLabels([]string{"client", "allocations", "pending"}, float32(pending), c.baseLabels) 2985 metrics.SetGaugeWithLabels([]string{"client", "allocations", "running"}, float32(running), c.baseLabels) 2986 metrics.SetGaugeWithLabels([]string{"client", "allocations", "terminal"}, float32(terminal), c.baseLabels) 2987 } 2988 2989 if c.config.BackwardsCompatibleMetrics { 2990 metrics.SetGauge([]string{"client", "allocations", "migrating", nodeID}, float32(migrating)) 2991 metrics.SetGauge([]string{"client", "allocations", "blocked", nodeID}, float32(blocked)) 2992 metrics.SetGauge([]string{"client", "allocations", "pending", nodeID}, float32(pending)) 2993 metrics.SetGauge([]string{"client", "allocations", "running", nodeID}, float32(running)) 2994 metrics.SetGauge([]string{"client", "allocations", "terminal", nodeID}, float32(terminal)) 2995 } 2996 } 2997 2998 func (c *Client) getAllocatedResources(selfNode *structs.Node) *structs.ComparableResources { 2999 // Unfortunately the allocs only have IP so we need to match them to the 3000 // device 3001 cidrToDevice := make(map[*net.IPNet]string, len(selfNode.Resources.Networks)) 3002 for _, n := range selfNode.NodeResources.Networks { 3003 _, ipnet, err := net.ParseCIDR(n.CIDR) 3004 if err != nil { 3005 continue 3006 } 3007 cidrToDevice[ipnet] = n.Device 3008 } 3009 3010 // Sum the allocated resources 3011 var allocated structs.ComparableResources 3012 allocatedDeviceMbits := make(map[string]int) 3013 for _, ar := range c.getAllocRunners() { 3014 alloc := ar.Alloc() 3015 if alloc.ServerTerminalStatus() || ar.AllocState().ClientTerminalStatus() { 3016 continue 3017 } 3018 3019 // Add the resources 3020 // COMPAT(0.11): Just use the allocated resources 3021 allocated.Add(alloc.ComparableResources()) 3022 3023 // Add the used network 3024 if alloc.AllocatedResources != nil { 3025 for _, tr := range alloc.AllocatedResources.Tasks { 3026 for _, allocatedNetwork := range tr.Networks { 3027 for cidr, dev := range cidrToDevice { 3028 ip := net.ParseIP(allocatedNetwork.IP) 3029 if cidr.Contains(ip) { 3030 allocatedDeviceMbits[dev] += allocatedNetwork.MBits 3031 break 3032 } 3033 } 3034 } 3035 } 3036 } else if alloc.Resources != nil { 3037 for _, allocatedNetwork := range alloc.Resources.Networks { 3038 for cidr, dev := range cidrToDevice { 3039 ip := net.ParseIP(allocatedNetwork.IP) 3040 if cidr.Contains(ip) { 3041 allocatedDeviceMbits[dev] += allocatedNetwork.MBits 3042 break 3043 } 3044 } 3045 } 3046 } 3047 } 3048 3049 // Clear the networks 3050 allocated.Flattened.Networks = nil 3051 for dev, speed := range allocatedDeviceMbits { 3052 net := &structs.NetworkResource{ 3053 Device: dev, 3054 MBits: speed, 3055 } 3056 allocated.Flattened.Networks = append(allocated.Flattened.Networks, net) 3057 } 3058 3059 return &allocated 3060 } 3061 3062 // GetTaskEventHandler returns an event handler for the given allocID and task name 3063 func (c *Client) GetTaskEventHandler(allocID, taskName string) drivermanager.EventHandler { 3064 c.allocLock.RLock() 3065 defer c.allocLock.RUnlock() 3066 if ar, ok := c.allocs[allocID]; ok { 3067 return ar.GetTaskEventHandler(taskName) 3068 } 3069 return nil 3070 } 3071 3072 // group wraps a func() in a goroutine and provides a way to block until it 3073 // exits. Inspired by https://godoc.org/golang.org/x/sync/errgroup 3074 type group struct { 3075 wg sync.WaitGroup 3076 } 3077 3078 // Go starts f in a goroutine and must be called before Wait. 3079 func (g *group) Go(f func()) { 3080 g.wg.Add(1) 3081 go func() { 3082 defer g.wg.Done() 3083 f() 3084 }() 3085 } 3086 3087 func (c *group) AddCh(ch <-chan struct{}) { 3088 c.Go(func() { 3089 <-ch 3090 }) 3091 } 3092 3093 // Wait for all goroutines to exit. Must be called after all calls to Go 3094 // complete. 3095 func (g *group) Wait() { 3096 g.wg.Wait() 3097 }