github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/client/client.go (about) 1 package client 2 3 import ( 4 "fmt" 5 "io/ioutil" 6 "net" 7 "net/rpc" 8 "os" 9 "path/filepath" 10 "sort" 11 "strconv" 12 "strings" 13 "sync" 14 "time" 15 16 metrics "github.com/armon/go-metrics" 17 consulapi "github.com/hashicorp/consul/api" 18 "github.com/hashicorp/consul/lib" 19 hclog "github.com/hashicorp/go-hclog" 20 multierror "github.com/hashicorp/go-multierror" 21 "github.com/hashicorp/nomad/helper/envoy" 22 vaultapi "github.com/hashicorp/vault/api" 23 "github.com/pkg/errors" 24 "github.com/shirou/gopsutil/host" 25 26 "github.com/hashicorp/nomad/client/allocdir" 27 "github.com/hashicorp/nomad/client/allocrunner" 28 "github.com/hashicorp/nomad/client/allocrunner/interfaces" 29 arstate "github.com/hashicorp/nomad/client/allocrunner/state" 30 "github.com/hashicorp/nomad/client/allocwatcher" 31 "github.com/hashicorp/nomad/client/config" 32 consulApi "github.com/hashicorp/nomad/client/consul" 33 "github.com/hashicorp/nomad/client/devicemanager" 34 "github.com/hashicorp/nomad/client/dynamicplugins" 35 "github.com/hashicorp/nomad/client/fingerprint" 36 "github.com/hashicorp/nomad/client/pluginmanager" 37 "github.com/hashicorp/nomad/client/pluginmanager/csimanager" 38 "github.com/hashicorp/nomad/client/pluginmanager/drivermanager" 39 "github.com/hashicorp/nomad/client/servers" 40 "github.com/hashicorp/nomad/client/state" 41 "github.com/hashicorp/nomad/client/stats" 42 cstructs "github.com/hashicorp/nomad/client/structs" 43 "github.com/hashicorp/nomad/client/vaultclient" 44 "github.com/hashicorp/nomad/command/agent/consul" 45 "github.com/hashicorp/nomad/helper" 46 "github.com/hashicorp/nomad/helper/pool" 47 hstats "github.com/hashicorp/nomad/helper/stats" 48 "github.com/hashicorp/nomad/helper/tlsutil" 49 "github.com/hashicorp/nomad/helper/uuid" 50 "github.com/hashicorp/nomad/nomad/structs" 51 nconfig "github.com/hashicorp/nomad/nomad/structs/config" 52 "github.com/hashicorp/nomad/plugins/csi" 53 "github.com/hashicorp/nomad/plugins/device" 54 "github.com/hashicorp/nomad/plugins/drivers" 55 ) 56 57 const ( 58 // clientRPCCache controls how long we keep an idle connection 59 // open to a server 60 clientRPCCache = 5 * time.Minute 61 62 // clientMaxStreams controls how many idle streams we keep 63 // open to a server 64 clientMaxStreams = 2 65 66 // datacenterQueryLimit searches through up to this many adjacent 67 // datacenters looking for the Nomad server service. 68 datacenterQueryLimit = 9 69 70 // registerRetryIntv is minimum interval on which we retry 71 // registration. We pick a value between this and 2x this. 72 registerRetryIntv = 15 * time.Second 73 74 // getAllocRetryIntv is minimum interval on which we retry 75 // to fetch allocations. We pick a value between this and 2x this. 76 getAllocRetryIntv = 30 * time.Second 77 78 // devModeRetryIntv is the retry interval used for development 79 devModeRetryIntv = time.Second 80 81 // stateSnapshotIntv is how often the client snapshots state 82 stateSnapshotIntv = 60 * time.Second 83 84 // initialHeartbeatStagger is used to stagger the interval between 85 // starting and the initial heartbeat. After the initial heartbeat, 86 // we switch to using the TTL specified by the servers. 87 initialHeartbeatStagger = 10 * time.Second 88 89 // nodeUpdateRetryIntv is how often the client checks for updates to the 90 // node attributes or meta map. 91 nodeUpdateRetryIntv = 5 * time.Second 92 93 // allocSyncIntv is the batching period of allocation updates before they 94 // are synced with the server. 95 allocSyncIntv = 200 * time.Millisecond 96 97 // allocSyncRetryIntv is the interval on which we retry updating 98 // the status of the allocation 99 allocSyncRetryIntv = 5 * time.Second 100 101 // defaultConnectLogLevel is the log level set in the node meta by default 102 // to be used by Consul Connect sidecar tasks. 103 defaultConnectLogLevel = "info" 104 105 // defaultConnectProxyConcurrency is the default number of worker threads the 106 // connect sidecar should be configured to use. 107 // 108 // https://www.envoyproxy.io/docs/envoy/latest/operations/cli#cmdoption-concurrency 109 defaultConnectProxyConcurrency = "1" 110 ) 111 112 var ( 113 // grace period to allow for batch fingerprint processing 114 batchFirstFingerprintsProcessingGrace = batchFirstFingerprintsTimeout + 5*time.Second 115 ) 116 117 // ClientStatsReporter exposes all the APIs related to resource usage of a Nomad 118 // Client 119 type ClientStatsReporter interface { 120 // GetAllocStats returns the AllocStatsReporter for the passed allocation. 121 // If it does not exist an error is reported. 122 GetAllocStats(allocID string) (interfaces.AllocStatsReporter, error) 123 124 // LatestHostStats returns the latest resource usage stats for the host 125 LatestHostStats() *stats.HostStats 126 } 127 128 // AllocRunner is the interface implemented by the core alloc runner. 129 //TODO Create via factory to allow testing Client with mock AllocRunners. 130 type AllocRunner interface { 131 Alloc() *structs.Allocation 132 AllocState() *arstate.State 133 Destroy() 134 Shutdown() 135 GetAllocDir() *allocdir.AllocDir 136 IsDestroyed() bool 137 IsMigrating() bool 138 IsWaiting() bool 139 Listener() *cstructs.AllocListener 140 Restore() error 141 Run() 142 StatsReporter() interfaces.AllocStatsReporter 143 Update(*structs.Allocation) 144 WaitCh() <-chan struct{} 145 DestroyCh() <-chan struct{} 146 ShutdownCh() <-chan struct{} 147 Signal(taskName, signal string) error 148 GetTaskEventHandler(taskName string) drivermanager.EventHandler 149 PersistState() error 150 151 RestartTask(taskName string, taskEvent *structs.TaskEvent) error 152 RestartAll(taskEvent *structs.TaskEvent) error 153 154 GetTaskExecHandler(taskName string) drivermanager.TaskExecHandler 155 GetTaskDriverCapabilities(taskName string) (*drivers.Capabilities, error) 156 } 157 158 // Client is used to implement the client interaction with Nomad. Clients 159 // are expected to register as a schedulable node to the servers, and to 160 // run allocations as determined by the servers. 161 type Client struct { 162 config *config.Config 163 start time.Time 164 165 // stateDB is used to efficiently store client state. 166 stateDB state.StateDB 167 168 // configCopy is a copy that should be passed to alloc-runners. 169 configCopy *config.Config 170 configLock sync.RWMutex 171 172 logger hclog.InterceptLogger 173 rpcLogger hclog.Logger 174 175 connPool *pool.ConnPool 176 177 // tlsWrap is used to wrap outbound connections using TLS. It should be 178 // accessed using the lock. 179 tlsWrap tlsutil.RegionWrapper 180 tlsWrapLock sync.RWMutex 181 182 // servers is the list of nomad servers 183 servers *servers.Manager 184 185 // heartbeat related times for tracking how often to heartbeat 186 heartbeatTTL time.Duration 187 haveHeartbeated bool 188 heartbeatLock sync.Mutex 189 heartbeatStop *heartbeatStop 190 191 // triggerDiscoveryCh triggers Consul discovery; see triggerDiscovery 192 triggerDiscoveryCh chan struct{} 193 194 // triggerNodeUpdate triggers the client to mark the Node as changed and 195 // update it. 196 triggerNodeUpdate chan struct{} 197 198 // triggerEmitNodeEvent sends an event and triggers the client to update the 199 // server for the node event 200 triggerEmitNodeEvent chan *structs.NodeEvent 201 202 // rpcRetryCh is closed when there an event such as server discovery or a 203 // successful RPC occurring happens such that a retry should happen. Access 204 // should only occur via the getter method 205 rpcRetryCh chan struct{} 206 rpcRetryLock sync.Mutex 207 208 // allocs maps alloc IDs to their AllocRunner. This map includes all 209 // AllocRunners - running and GC'd - until the server GCs them. 210 allocs map[string]AllocRunner 211 allocLock sync.RWMutex 212 213 // invalidAllocs is a map that tracks allocations that failed because 214 // the client couldn't initialize alloc or task runners for it. This can 215 // happen due to driver errors 216 invalidAllocs map[string]struct{} 217 invalidAllocsLock sync.Mutex 218 219 // allocUpdates stores allocations that need to be synced to the server. 220 allocUpdates chan *structs.Allocation 221 222 // consulService is Nomad's custom Consul client for managing services 223 // and checks. 224 consulService consulApi.ConsulServiceAPI 225 226 // consulProxies is Nomad's custom Consul client for looking up supported 227 // envoy versions 228 consulProxies consulApi.SupportedProxiesAPI 229 230 // consulCatalog is the subset of Consul's Catalog API Nomad uses. 231 consulCatalog consul.CatalogAPI 232 233 // HostStatsCollector collects host resource usage stats 234 hostStatsCollector *stats.HostStatsCollector 235 236 // shutdown is true when the Client has been shutdown. Must hold 237 // shutdownLock to access. 238 shutdown bool 239 240 // shutdownCh is closed to signal the Client is shutting down. 241 shutdownCh chan struct{} 242 243 shutdownLock sync.Mutex 244 245 // shutdownGroup are goroutines that exit when shutdownCh is closed. 246 // Shutdown() blocks on Wait() after closing shutdownCh. 247 shutdownGroup group 248 249 // tokensClient is Nomad Client's custom Consul client for requesting Consul 250 // Service Identity tokens through Nomad Server. 251 tokensClient consulApi.ServiceIdentityAPI 252 253 // vaultClient is used to interact with Vault for token and secret renewals 254 vaultClient vaultclient.VaultClient 255 256 // garbageCollector is used to garbage collect terminal allocations present 257 // in the node automatically 258 garbageCollector *AllocGarbageCollector 259 260 // clientACLResolver holds the ACL resolution state 261 clientACLResolver 262 263 // rpcServer is used to serve RPCs by the local agent. 264 rpcServer *rpc.Server 265 endpoints rpcEndpoints 266 streamingRpcs *structs.StreamingRpcRegistry 267 268 // pluginManagers is the set of PluginManagers registered by the client 269 pluginManagers *pluginmanager.PluginGroup 270 271 // csimanager is responsible for managing csi plugins. 272 csimanager csimanager.Manager 273 274 // devicemanger is responsible for managing device plugins. 275 devicemanager devicemanager.Manager 276 277 // drivermanager is responsible for managing driver plugins 278 drivermanager drivermanager.Manager 279 280 // baseLabels are used when emitting tagged metrics. All client metrics will 281 // have these tags, and optionally more. 282 baseLabels []metrics.Label 283 284 // batchNodeUpdates is used to batch initial updates to the node 285 batchNodeUpdates *batchNodeUpdates 286 287 // fpInitialized chan is closed when the first batch of fingerprints are 288 // applied to the node and the server is updated 289 fpInitialized chan struct{} 290 291 // serversContactedCh is closed when GetClientAllocs and runAllocs have 292 // successfully run once. 293 serversContactedCh chan struct{} 294 serversContactedOnce sync.Once 295 296 // dynamicRegistry provides access to plugins that are dynamically registered 297 // with a nomad client. Currently only used for CSI. 298 dynamicRegistry dynamicplugins.Registry 299 300 // EnterpriseClient is used to set and check enterprise features for clients 301 EnterpriseClient *EnterpriseClient 302 } 303 304 var ( 305 // noServersErr is returned by the RPC method when the client has no 306 // configured servers. This is used to trigger Consul discovery if 307 // enabled. 308 noServersErr = errors.New("no servers") 309 ) 310 311 // NewClient is used to create a new client from the given configuration 312 func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxies consulApi.SupportedProxiesAPI, consulService consulApi.ConsulServiceAPI) (*Client, error) { 313 // Create the tls wrapper 314 var tlsWrap tlsutil.RegionWrapper 315 if cfg.TLSConfig.EnableRPC { 316 tw, err := tlsutil.NewTLSConfiguration(cfg.TLSConfig, true, true) 317 if err != nil { 318 return nil, err 319 } 320 tlsWrap, err = tw.OutgoingTLSWrapper() 321 if err != nil { 322 return nil, err 323 } 324 } 325 326 if cfg.StateDBFactory == nil { 327 cfg.StateDBFactory = state.GetStateDBFactory(cfg.DevMode) 328 } 329 330 // Create the logger 331 logger := cfg.Logger.ResetNamedIntercept("client") 332 333 // Create the client 334 c := &Client{ 335 config: cfg, 336 consulCatalog: consulCatalog, 337 consulProxies: consulProxies, 338 consulService: consulService, 339 start: time.Now(), 340 connPool: pool.NewPool(logger, clientRPCCache, clientMaxStreams, tlsWrap), 341 tlsWrap: tlsWrap, 342 streamingRpcs: structs.NewStreamingRpcRegistry(), 343 logger: logger, 344 rpcLogger: logger.Named("rpc"), 345 allocs: make(map[string]AllocRunner), 346 allocUpdates: make(chan *structs.Allocation, 64), 347 shutdownCh: make(chan struct{}), 348 triggerDiscoveryCh: make(chan struct{}), 349 triggerNodeUpdate: make(chan struct{}, 8), 350 triggerEmitNodeEvent: make(chan *structs.NodeEvent, 8), 351 fpInitialized: make(chan struct{}), 352 invalidAllocs: make(map[string]struct{}), 353 serversContactedCh: make(chan struct{}), 354 serversContactedOnce: sync.Once{}, 355 EnterpriseClient: newEnterpriseClient(logger), 356 } 357 358 c.batchNodeUpdates = newBatchNodeUpdates( 359 c.updateNodeFromDriver, 360 c.updateNodeFromDevices, 361 c.updateNodeFromCSI, 362 ) 363 364 // Initialize the server manager 365 c.servers = servers.New(c.logger, c.shutdownCh, c) 366 367 // Start server manager rebalancing go routine 368 go c.servers.Start() 369 370 // initialize the client 371 if err := c.init(); err != nil { 372 return nil, fmt.Errorf("failed to initialize client: %v", err) 373 } 374 375 // initialize the dynamic registry (needs to happen after init) 376 c.dynamicRegistry = 377 dynamicplugins.NewRegistry(c.stateDB, map[string]dynamicplugins.PluginDispenser{ 378 dynamicplugins.PluginTypeCSIController: func(info *dynamicplugins.PluginInfo) (interface{}, error) { 379 return csi.NewClient(info.ConnectionInfo.SocketPath, logger.Named("csi_client").With("plugin.name", info.Name, "plugin.type", "controller")) 380 }, 381 dynamicplugins.PluginTypeCSINode: func(info *dynamicplugins.PluginInfo) (interface{}, error) { 382 return csi.NewClient(info.ConnectionInfo.SocketPath, logger.Named("csi_client").With("plugin.name", info.Name, "plugin.type", "client")) 383 }, // TODO(tgross): refactor these dispenser constructors into csimanager to tidy it up 384 }) 385 386 // Setup the clients RPC server 387 c.setupClientRpc() 388 389 // Initialize the ACL state 390 if err := c.clientACLResolver.init(); err != nil { 391 return nil, fmt.Errorf("failed to initialize ACL state: %v", err) 392 } 393 394 // Setup the node 395 if err := c.setupNode(); err != nil { 396 return nil, fmt.Errorf("node setup failed: %v", err) 397 } 398 399 // Store the config copy before restoring state but after it has been 400 // initialized. 401 c.configLock.Lock() 402 c.configCopy = c.config.Copy() 403 c.configLock.Unlock() 404 405 fingerprintManager := NewFingerprintManager( 406 c.configCopy.PluginSingletonLoader, c.GetConfig, c.configCopy.Node, 407 c.shutdownCh, c.updateNodeFromFingerprint, c.logger) 408 409 c.pluginManagers = pluginmanager.New(c.logger) 410 411 // Fingerprint the node and scan for drivers 412 if err := fingerprintManager.Run(); err != nil { 413 return nil, fmt.Errorf("fingerprinting failed: %v", err) 414 } 415 416 // Build the allow/denylists of drivers. 417 // COMPAT(1.0) uses inclusive language. white/blacklist are there for backward compatible reasons only. 418 allowlistDrivers := cfg.ReadStringListToMap("driver.allowlist", "driver.whitelist") 419 blocklistDrivers := cfg.ReadStringListToMap("driver.denylist", "driver.blacklist") 420 421 // Setup the csi manager 422 csiConfig := &csimanager.Config{ 423 Logger: c.logger, 424 DynamicRegistry: c.dynamicRegistry, 425 UpdateNodeCSIInfoFunc: c.batchNodeUpdates.updateNodeFromCSI, 426 TriggerNodeEvent: c.triggerNodeEvent, 427 } 428 csiManager := csimanager.New(csiConfig) 429 c.csimanager = csiManager 430 c.pluginManagers.RegisterAndRun(csiManager.PluginManager()) 431 432 // Setup the driver manager 433 driverConfig := &drivermanager.Config{ 434 Logger: c.logger, 435 Loader: c.configCopy.PluginSingletonLoader, 436 PluginConfig: c.configCopy.NomadPluginConfig(), 437 Updater: c.batchNodeUpdates.updateNodeFromDriver, 438 EventHandlerFactory: c.GetTaskEventHandler, 439 State: c.stateDB, 440 AllowedDrivers: allowlistDrivers, 441 BlockedDrivers: blocklistDrivers, 442 } 443 drvManager := drivermanager.New(driverConfig) 444 c.drivermanager = drvManager 445 c.pluginManagers.RegisterAndRun(drvManager) 446 447 // Setup the device manager 448 devConfig := &devicemanager.Config{ 449 Logger: c.logger, 450 Loader: c.configCopy.PluginSingletonLoader, 451 PluginConfig: c.configCopy.NomadPluginConfig(), 452 Updater: c.batchNodeUpdates.updateNodeFromDevices, 453 StatsInterval: c.configCopy.StatsCollectionInterval, 454 State: c.stateDB, 455 } 456 devManager := devicemanager.New(devConfig) 457 c.devicemanager = devManager 458 c.pluginManagers.RegisterAndRun(devManager) 459 460 // Batching of initial fingerprints is done to reduce the number of node 461 // updates sent to the server on startup. This is the first RPC to the servers 462 go c.batchFirstFingerprints() 463 464 // create heartbeatStop. We go after the first attempt to connect to the server, so 465 // that our grace period for connection goes for the full time 466 c.heartbeatStop = newHeartbeatStop(c.getAllocRunner, batchFirstFingerprintsTimeout, logger, c.shutdownCh) 467 468 // Watch for disconnection, and heartbeatStopAllocs configured to have a maximum 469 // lifetime when out of touch with the server 470 go c.heartbeatStop.watch() 471 472 // Add the stats collector 473 statsCollector := stats.NewHostStatsCollector(c.logger, c.config.AllocDir, c.devicemanager.AllStats) 474 c.hostStatsCollector = statsCollector 475 476 // Add the garbage collector 477 gcConfig := &GCConfig{ 478 MaxAllocs: cfg.GCMaxAllocs, 479 DiskUsageThreshold: cfg.GCDiskUsageThreshold, 480 InodeUsageThreshold: cfg.GCInodeUsageThreshold, 481 Interval: cfg.GCInterval, 482 ParallelDestroys: cfg.GCParallelDestroys, 483 ReservedDiskMB: cfg.Node.Reserved.DiskMB, 484 } 485 c.garbageCollector = NewAllocGarbageCollector(c.logger, statsCollector, c, gcConfig) 486 go c.garbageCollector.Run() 487 488 // Set the preconfigured list of static servers 489 c.configLock.RLock() 490 if len(c.configCopy.Servers) > 0 { 491 if _, err := c.setServersImpl(c.configCopy.Servers, true); err != nil { 492 logger.Warn("none of the configured servers are valid", "error", err) 493 } 494 } 495 c.configLock.RUnlock() 496 497 // Setup Consul discovery if enabled 498 if c.configCopy.ConsulConfig.ClientAutoJoin != nil && *c.configCopy.ConsulConfig.ClientAutoJoin { 499 c.shutdownGroup.Go(c.consulDiscovery) 500 if c.servers.NumServers() == 0 { 501 // No configured servers; trigger discovery manually 502 c.triggerDiscoveryCh <- struct{}{} 503 } 504 } 505 506 if err := c.setupConsulTokenClient(); err != nil { 507 return nil, errors.Wrap(err, "failed to setup consul tokens client") 508 } 509 510 // Setup the vault client for token and secret renewals 511 if err := c.setupVaultClient(); err != nil { 512 return nil, fmt.Errorf("failed to setup vault client: %v", err) 513 } 514 515 // wait until drivers are healthy before restoring or registering with servers 516 select { 517 case <-c.Ready(): 518 case <-time.After(batchFirstFingerprintsProcessingGrace): 519 logger.Warn("batch fingerprint operation timed out; proceeding to register with fingerprinted plugins so far") 520 } 521 522 // Register and then start heartbeating to the servers. 523 c.shutdownGroup.Go(c.registerAndHeartbeat) 524 525 // Restore the state 526 if err := c.restoreState(); err != nil { 527 logger.Error("failed to restore state", "error", err) 528 logger.Error("Nomad is unable to start due to corrupt state. "+ 529 "The safest way to proceed is to manually stop running task processes "+ 530 "and remove Nomad's state and alloc directories before "+ 531 "restarting. Lost allocations will be rescheduled.", 532 "state_dir", c.config.StateDir, "alloc_dir", c.config.AllocDir) 533 logger.Error("Corrupt state is often caused by a bug. Please " + 534 "report as much information as possible to " + 535 "https://github.com/hashicorp/nomad/issues") 536 return nil, fmt.Errorf("failed to restore state") 537 } 538 539 // Begin periodic snapshotting of state. 540 c.shutdownGroup.Go(c.periodicSnapshot) 541 542 // Begin syncing allocations to the server 543 c.shutdownGroup.Go(c.allocSync) 544 545 // Start the client! Don't use the shutdownGroup as run handles 546 // shutdowns manually to prevent updates from being applied during 547 // shutdown. 548 go c.run() 549 550 // Start collecting stats 551 c.shutdownGroup.Go(c.emitStats) 552 553 c.logger.Info("started client", "node_id", c.NodeID()) 554 return c, nil 555 } 556 557 // Ready returns a chan that is closed when the client is fully initialized 558 func (c *Client) Ready() <-chan struct{} { 559 return c.fpInitialized 560 } 561 562 // init is used to initialize the client and perform any setup 563 // needed before we begin starting its various components. 564 func (c *Client) init() error { 565 // Ensure the state dir exists if we have one 566 if c.config.StateDir != "" { 567 if err := os.MkdirAll(c.config.StateDir, 0700); err != nil { 568 return fmt.Errorf("failed creating state dir: %s", err) 569 } 570 571 } else { 572 // Otherwise make a temp directory to use. 573 p, err := ioutil.TempDir("", "NomadClient") 574 if err != nil { 575 return fmt.Errorf("failed creating temporary directory for the StateDir: %v", err) 576 } 577 578 p, err = filepath.EvalSymlinks(p) 579 if err != nil { 580 return fmt.Errorf("failed to find temporary directory for the StateDir: %v", err) 581 } 582 583 c.config.StateDir = p 584 } 585 c.logger.Info("using state directory", "state_dir", c.config.StateDir) 586 587 // Open the state database 588 db, err := c.config.StateDBFactory(c.logger, c.config.StateDir) 589 if err != nil { 590 return fmt.Errorf("failed to open state database: %v", err) 591 } 592 593 // Upgrade the state database 594 if err := db.Upgrade(); err != nil { 595 // Upgrade only returns an error on critical persistence 596 // failures in which an operator should intervene before the 597 // node is accessible. Upgrade drops and logs corrupt state it 598 // encounters, so failing to start the agent should be extremely 599 // rare. 600 return fmt.Errorf("failed to upgrade state database: %v", err) 601 } 602 603 c.stateDB = db 604 605 // Ensure the alloc dir exists if we have one 606 if c.config.AllocDir != "" { 607 if err := os.MkdirAll(c.config.AllocDir, 0711); err != nil { 608 return fmt.Errorf("failed creating alloc dir: %s", err) 609 } 610 } else { 611 // Otherwise make a temp directory to use. 612 p, err := ioutil.TempDir("", "NomadClient") 613 if err != nil { 614 return fmt.Errorf("failed creating temporary directory for the AllocDir: %v", err) 615 } 616 617 p, err = filepath.EvalSymlinks(p) 618 if err != nil { 619 return fmt.Errorf("failed to find temporary directory for the AllocDir: %v", err) 620 } 621 622 // Change the permissions to have the execute bit 623 if err := os.Chmod(p, 0711); err != nil { 624 return fmt.Errorf("failed to change directory permissions for the AllocDir: %v", err) 625 } 626 627 c.config.AllocDir = p 628 } 629 630 c.logger.Info("using alloc directory", "alloc_dir", c.config.AllocDir) 631 return nil 632 } 633 634 // reloadTLSConnections allows a client to reload its TLS configuration on the 635 // fly 636 func (c *Client) reloadTLSConnections(newConfig *nconfig.TLSConfig) error { 637 var tlsWrap tlsutil.RegionWrapper 638 if newConfig != nil && newConfig.EnableRPC { 639 tw, err := tlsutil.NewTLSConfiguration(newConfig, true, true) 640 if err != nil { 641 return err 642 } 643 644 twWrap, err := tw.OutgoingTLSWrapper() 645 if err != nil { 646 return err 647 } 648 tlsWrap = twWrap 649 } 650 651 // Store the new tls wrapper. 652 c.tlsWrapLock.Lock() 653 c.tlsWrap = tlsWrap 654 c.tlsWrapLock.Unlock() 655 656 // Keep the client configuration up to date as we use configuration values to 657 // decide on what type of connections to accept 658 c.configLock.Lock() 659 c.config.TLSConfig = newConfig 660 c.configLock.Unlock() 661 662 c.connPool.ReloadTLS(tlsWrap) 663 664 return nil 665 } 666 667 // Reload allows a client to reload its configuration on the fly 668 func (c *Client) Reload(newConfig *config.Config) error { 669 shouldReloadTLS, err := tlsutil.ShouldReloadRPCConnections(c.config.TLSConfig, newConfig.TLSConfig) 670 if err != nil { 671 c.logger.Error("error parsing TLS configuration", "error", err) 672 return err 673 } 674 675 if shouldReloadTLS { 676 return c.reloadTLSConnections(newConfig.TLSConfig) 677 } 678 679 return nil 680 } 681 682 // Leave is used to prepare the client to leave the cluster 683 func (c *Client) Leave() error { 684 // TODO 685 return nil 686 } 687 688 // GetConfig returns the config of the client 689 func (c *Client) GetConfig() *config.Config { 690 c.configLock.Lock() 691 defer c.configLock.Unlock() 692 return c.configCopy 693 } 694 695 // Datacenter returns the datacenter for the given client 696 func (c *Client) Datacenter() string { 697 return c.config.Node.Datacenter 698 } 699 700 // Region returns the region for the given client 701 func (c *Client) Region() string { 702 return c.config.Region 703 } 704 705 // NodeID returns the node ID for the given client 706 func (c *Client) NodeID() string { 707 return c.config.Node.ID 708 } 709 710 // secretNodeID returns the secret node ID for the given client 711 func (c *Client) secretNodeID() string { 712 return c.config.Node.SecretID 713 } 714 715 // RPCMajorVersion returns the structs.ApiMajorVersion supported by the 716 // client. 717 func (c *Client) RPCMajorVersion() int { 718 return structs.ApiMajorVersion 719 } 720 721 // RPCMinorVersion returns the structs.ApiMinorVersion supported by the 722 // client. 723 func (c *Client) RPCMinorVersion() int { 724 return structs.ApiMinorVersion 725 } 726 727 // Shutdown is used to tear down the client 728 func (c *Client) Shutdown() error { 729 c.shutdownLock.Lock() 730 defer c.shutdownLock.Unlock() 731 732 if c.shutdown { 733 c.logger.Info("already shutdown") 734 return nil 735 } 736 c.logger.Info("shutting down") 737 738 // Stop renewing tokens and secrets 739 if c.vaultClient != nil { 740 c.vaultClient.Stop() 741 } 742 743 // Stop Garbage collector 744 c.garbageCollector.Stop() 745 746 arGroup := group{} 747 if c.config.DevMode { 748 // In DevMode destroy all the running allocations. 749 for _, ar := range c.getAllocRunners() { 750 ar.Destroy() 751 arGroup.AddCh(ar.DestroyCh()) 752 } 753 } else { 754 // In normal mode call shutdown 755 for _, ar := range c.getAllocRunners() { 756 ar.Shutdown() 757 arGroup.AddCh(ar.ShutdownCh()) 758 } 759 } 760 arGroup.Wait() 761 762 // Shutdown the plugin managers 763 c.pluginManagers.Shutdown() 764 765 c.shutdown = true 766 close(c.shutdownCh) 767 768 // Must close connection pool to unblock alloc watcher 769 c.connPool.Shutdown() 770 771 // Wait for goroutines to stop 772 c.shutdownGroup.Wait() 773 774 // One final save state 775 c.saveState() 776 return c.stateDB.Close() 777 } 778 779 // Stats is used to return statistics for debugging and insight 780 // for various sub-systems 781 func (c *Client) Stats() map[string]map[string]string { 782 c.heartbeatLock.Lock() 783 defer c.heartbeatLock.Unlock() 784 stats := map[string]map[string]string{ 785 "client": { 786 "node_id": c.NodeID(), 787 "known_servers": strings.Join(c.GetServers(), ","), 788 "num_allocations": strconv.Itoa(c.NumAllocs()), 789 "last_heartbeat": fmt.Sprintf("%v", time.Since(c.lastHeartbeat())), 790 "heartbeat_ttl": fmt.Sprintf("%v", c.heartbeatTTL), 791 }, 792 "runtime": hstats.RuntimeStats(), 793 } 794 return stats 795 } 796 797 // GetAlloc returns an allocation or an error. 798 func (c *Client) GetAlloc(allocID string) (*structs.Allocation, error) { 799 ar, err := c.getAllocRunner(allocID) 800 if err != nil { 801 return nil, err 802 } 803 804 return ar.Alloc(), nil 805 } 806 807 // SignalAllocation sends a signal to the tasks within an allocation. 808 // If the provided task is empty, then every allocation will be signalled. 809 // If a task is provided, then only an exactly matching task will be signalled. 810 func (c *Client) SignalAllocation(allocID, task, signal string) error { 811 ar, err := c.getAllocRunner(allocID) 812 if err != nil { 813 return err 814 } 815 816 return ar.Signal(task, signal) 817 } 818 819 // CollectAllocation garbage collects a single allocation on a node. Returns 820 // true if alloc was found and garbage collected; otherwise false. 821 func (c *Client) CollectAllocation(allocID string) bool { 822 return c.garbageCollector.Collect(allocID) 823 } 824 825 // CollectAllAllocs garbage collects all allocations on a node in the terminal 826 // state 827 func (c *Client) CollectAllAllocs() { 828 c.garbageCollector.CollectAll() 829 } 830 831 func (c *Client) RestartAllocation(allocID, taskName string) error { 832 ar, err := c.getAllocRunner(allocID) 833 if err != nil { 834 return err 835 } 836 837 event := structs.NewTaskEvent(structs.TaskRestartSignal). 838 SetRestartReason("User requested restart") 839 840 if taskName != "" { 841 return ar.RestartTask(taskName, event) 842 } 843 844 return ar.RestartAll(event) 845 } 846 847 // Node returns the locally registered node 848 func (c *Client) Node() *structs.Node { 849 c.configLock.RLock() 850 defer c.configLock.RUnlock() 851 return c.configCopy.Node 852 } 853 854 // getAllocRunner returns an AllocRunner or an UnknownAllocation error if the 855 // client has no runner for the given alloc ID. 856 func (c *Client) getAllocRunner(allocID string) (AllocRunner, error) { 857 c.allocLock.RLock() 858 defer c.allocLock.RUnlock() 859 860 ar, ok := c.allocs[allocID] 861 if !ok { 862 return nil, structs.NewErrUnknownAllocation(allocID) 863 } 864 865 return ar, nil 866 } 867 868 // StatsReporter exposes the various APIs related resource usage of a Nomad 869 // client 870 func (c *Client) StatsReporter() ClientStatsReporter { 871 return c 872 } 873 874 func (c *Client) GetAllocStats(allocID string) (interfaces.AllocStatsReporter, error) { 875 ar, err := c.getAllocRunner(allocID) 876 if err != nil { 877 return nil, err 878 } 879 return ar.StatsReporter(), nil 880 } 881 882 // HostStats returns all the stats related to a Nomad client 883 func (c *Client) LatestHostStats() *stats.HostStats { 884 return c.hostStatsCollector.Stats() 885 } 886 887 func (c *Client) LatestDeviceResourceStats(devices []*structs.AllocatedDeviceResource) []*device.DeviceGroupStats { 888 return c.computeAllocatedDeviceGroupStats(devices, c.LatestHostStats().DeviceStats) 889 } 890 891 func (c *Client) computeAllocatedDeviceGroupStats(devices []*structs.AllocatedDeviceResource, hostDeviceGroupStats []*device.DeviceGroupStats) []*device.DeviceGroupStats { 892 // basic optimization for the usual case 893 if len(devices) == 0 || len(hostDeviceGroupStats) == 0 { 894 return nil 895 } 896 897 // Build an index of allocated devices 898 adIdx := map[structs.DeviceIdTuple][]string{} 899 900 total := 0 901 for _, ds := range devices { 902 adIdx[*ds.ID()] = ds.DeviceIDs 903 total += len(ds.DeviceIDs) 904 } 905 906 // Collect allocated device stats from host stats 907 result := make([]*device.DeviceGroupStats, 0, len(adIdx)) 908 909 for _, dg := range hostDeviceGroupStats { 910 k := structs.DeviceIdTuple{ 911 Vendor: dg.Vendor, 912 Type: dg.Type, 913 Name: dg.Name, 914 } 915 916 allocatedDeviceIDs, ok := adIdx[k] 917 if !ok { 918 continue 919 } 920 921 rdgStats := &device.DeviceGroupStats{ 922 Vendor: dg.Vendor, 923 Type: dg.Type, 924 Name: dg.Name, 925 InstanceStats: map[string]*device.DeviceStats{}, 926 } 927 928 for _, adID := range allocatedDeviceIDs { 929 deviceStats, ok := dg.InstanceStats[adID] 930 if !ok || deviceStats == nil { 931 c.logger.Warn("device not found in stats", "device_id", adID, "device_group_id", k) 932 continue 933 } 934 935 rdgStats.InstanceStats[adID] = deviceStats 936 } 937 result = append(result, rdgStats) 938 } 939 940 return result 941 } 942 943 // ValidateMigrateToken verifies that a token is for a specific client and 944 // allocation, and has been created by a trusted party that has privileged 945 // knowledge of the client's secret identifier 946 func (c *Client) ValidateMigrateToken(allocID, migrateToken string) bool { 947 if !c.config.ACLEnabled { 948 return true 949 } 950 951 return structs.CompareMigrateToken(allocID, c.secretNodeID(), migrateToken) 952 } 953 954 // GetAllocFS returns the AllocFS interface for the alloc dir of an allocation 955 func (c *Client) GetAllocFS(allocID string) (allocdir.AllocDirFS, error) { 956 ar, err := c.getAllocRunner(allocID) 957 if err != nil { 958 return nil, err 959 } 960 return ar.GetAllocDir(), nil 961 } 962 963 // GetAllocState returns a copy of an allocation's state on this client. It 964 // returns either an AllocState or an unknown allocation error. 965 func (c *Client) GetAllocState(allocID string) (*arstate.State, error) { 966 ar, err := c.getAllocRunner(allocID) 967 if err != nil { 968 return nil, err 969 } 970 971 return ar.AllocState(), nil 972 } 973 974 // GetServers returns the list of nomad servers this client is aware of. 975 func (c *Client) GetServers() []string { 976 endpoints := c.servers.GetServers() 977 res := make([]string, len(endpoints)) 978 for i := range endpoints { 979 res[i] = endpoints[i].String() 980 } 981 sort.Strings(res) 982 return res 983 } 984 985 // SetServers sets a new list of nomad servers to connect to. As long as one 986 // server is resolvable no error is returned. 987 func (c *Client) SetServers(in []string) (int, error) { 988 return c.setServersImpl(in, false) 989 } 990 991 // setServersImpl sets a new list of nomad servers to connect to. If force is 992 // set, we add the server to the internal serverlist even if the server could not 993 // be pinged. An error is returned if no endpoints were valid when non-forcing. 994 // 995 // Force should be used when setting the servers from the initial configuration 996 // since the server may be starting up in parallel and initial pings may fail. 997 func (c *Client) setServersImpl(in []string, force bool) (int, error) { 998 var mu sync.Mutex 999 var wg sync.WaitGroup 1000 var merr multierror.Error 1001 1002 endpoints := make([]*servers.Server, 0, len(in)) 1003 wg.Add(len(in)) 1004 1005 for _, s := range in { 1006 go func(srv string) { 1007 defer wg.Done() 1008 addr, err := resolveServer(srv) 1009 if err != nil { 1010 mu.Lock() 1011 c.logger.Debug("ignoring server due to resolution error", "error", err, "server", srv) 1012 merr.Errors = append(merr.Errors, err) 1013 mu.Unlock() 1014 return 1015 } 1016 1017 // Try to ping to check if it is a real server 1018 if err := c.Ping(addr); err != nil { 1019 mu.Lock() 1020 merr.Errors = append(merr.Errors, fmt.Errorf("Server at address %s failed ping: %v", addr, err)) 1021 mu.Unlock() 1022 1023 // If we are forcing the setting of the servers, inject it to 1024 // the serverlist even if we can't ping immediately. 1025 if !force { 1026 return 1027 } 1028 } 1029 1030 mu.Lock() 1031 endpoints = append(endpoints, &servers.Server{Addr: addr}) 1032 mu.Unlock() 1033 }(s) 1034 } 1035 1036 wg.Wait() 1037 1038 // Only return errors if no servers are valid 1039 if len(endpoints) == 0 { 1040 if len(merr.Errors) > 0 { 1041 return 0, merr.ErrorOrNil() 1042 } 1043 return 0, noServersErr 1044 } 1045 1046 c.servers.SetServers(endpoints) 1047 return len(endpoints), nil 1048 } 1049 1050 // restoreState is used to restore our state from the data dir 1051 // If there are errors restoring a specific allocation it is marked 1052 // as failed whenever possible. 1053 func (c *Client) restoreState() error { 1054 if c.config.DevMode { 1055 return nil 1056 } 1057 1058 //XXX REMOVED! make a note in backward compat / upgrading doc 1059 // COMPAT: Remove in 0.7.0 1060 // 0.6.0 transitioned from individual state files to a single bolt-db. 1061 // The upgrade path is to: 1062 // Check if old state exists 1063 // If so, restore from that and delete old state 1064 // Restore using state database 1065 1066 // Restore allocations 1067 allocs, allocErrs, err := c.stateDB.GetAllAllocations() 1068 if err != nil { 1069 return err 1070 } 1071 1072 for allocID, err := range allocErrs { 1073 c.logger.Error("error restoring alloc", "error", err, "alloc_id", allocID) 1074 //TODO Cleanup 1075 // Try to clean up alloc dir 1076 // Remove boltdb entries? 1077 // Send to server with clientstatus=failed 1078 } 1079 1080 // Load each alloc back 1081 for _, alloc := range allocs { 1082 1083 // COMPAT(0.12): remove once upgrading from 0.9.5 is no longer supported 1084 // See hasLocalState for details. Skipping suspicious allocs 1085 // now. If allocs should be run, they will be started when the client 1086 // gets allocs from servers. 1087 if !c.hasLocalState(alloc) { 1088 c.logger.Warn("found a alloc without any local state, skipping restore", "alloc_id", alloc.ID) 1089 continue 1090 } 1091 1092 //XXX On Restore we give up on watching previous allocs because 1093 // we need the local AllocRunners initialized first. We could 1094 // add a second loop to initialize just the alloc watcher. 1095 prevAllocWatcher := allocwatcher.NoopPrevAlloc{} 1096 prevAllocMigrator := allocwatcher.NoopPrevAlloc{} 1097 1098 c.configLock.RLock() 1099 arConf := &allocrunner.Config{ 1100 Alloc: alloc, 1101 Logger: c.logger, 1102 ClientConfig: c.configCopy, 1103 StateDB: c.stateDB, 1104 StateUpdater: c, 1105 DeviceStatsReporter: c, 1106 Consul: c.consulService, 1107 ConsulSI: c.tokensClient, 1108 ConsulProxies: c.consulProxies, 1109 Vault: c.vaultClient, 1110 PrevAllocWatcher: prevAllocWatcher, 1111 PrevAllocMigrator: prevAllocMigrator, 1112 DynamicRegistry: c.dynamicRegistry, 1113 CSIManager: c.csimanager, 1114 DeviceManager: c.devicemanager, 1115 DriverManager: c.drivermanager, 1116 ServersContactedCh: c.serversContactedCh, 1117 RPCClient: c, 1118 } 1119 c.configLock.RUnlock() 1120 1121 ar, err := allocrunner.NewAllocRunner(arConf) 1122 if err != nil { 1123 c.logger.Error("error running alloc", "error", err, "alloc_id", alloc.ID) 1124 c.handleInvalidAllocs(alloc, err) 1125 continue 1126 } 1127 1128 // Restore state 1129 if err := ar.Restore(); err != nil { 1130 c.logger.Error("error restoring alloc", "error", err, "alloc_id", alloc.ID) 1131 // Override the status of the alloc to failed 1132 ar.SetClientStatus(structs.AllocClientStatusFailed) 1133 // Destroy the alloc runner since this is a failed restore 1134 ar.Destroy() 1135 continue 1136 } 1137 1138 // Maybe mark the alloc for halt on missing server heartbeats 1139 if c.heartbeatStop.shouldStop(alloc) { 1140 err = c.heartbeatStop.stopAlloc(alloc.ID) 1141 if err != nil { 1142 c.logger.Error("error stopping alloc", "error", err, "alloc_id", alloc.ID) 1143 } 1144 continue 1145 } 1146 1147 //XXX is this locking necessary? 1148 c.allocLock.Lock() 1149 c.allocs[alloc.ID] = ar 1150 c.allocLock.Unlock() 1151 1152 c.heartbeatStop.allocHook(alloc) 1153 } 1154 1155 // All allocs restored successfully, run them! 1156 c.allocLock.Lock() 1157 for _, ar := range c.allocs { 1158 go ar.Run() 1159 } 1160 c.allocLock.Unlock() 1161 return nil 1162 } 1163 1164 // hasLocalState returns true if we have any other associated state 1165 // with alloc beyond the task itself 1166 // 1167 // Useful for detecting if a potentially completed alloc got resurrected 1168 // after AR was destroyed. In such cases, re-running the alloc lead to 1169 // unexpected reruns and may lead to process and task exhaustion on node. 1170 // 1171 // The heuristic used here is an alloc is suspect if we see no other information 1172 // and no other task/status info is found. 1173 // 1174 // Also, an alloc without any client state will not be restored correctly; there will 1175 // be no tasks processes to reattach to, etc. In such cases, client should 1176 // wait until it gets allocs from server to launch them. 1177 // 1178 // See: 1179 // * https://github.com/hashicorp/nomad/pull/6207 1180 // * https://github.com/hashicorp/nomad/issues/5984 1181 // 1182 // COMPAT(0.12): remove once upgrading from 0.9.5 is no longer supported 1183 func (c *Client) hasLocalState(alloc *structs.Allocation) bool { 1184 tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) 1185 if tg == nil { 1186 // corrupt alloc?! 1187 return false 1188 } 1189 1190 for _, task := range tg.Tasks { 1191 ls, tr, _ := c.stateDB.GetTaskRunnerState(alloc.ID, task.Name) 1192 if ls != nil || tr != nil { 1193 return true 1194 } 1195 } 1196 1197 return false 1198 } 1199 1200 func (c *Client) handleInvalidAllocs(alloc *structs.Allocation, err error) { 1201 c.invalidAllocsLock.Lock() 1202 c.invalidAllocs[alloc.ID] = struct{}{} 1203 c.invalidAllocsLock.Unlock() 1204 1205 // Mark alloc as failed so server can handle this 1206 failed := makeFailedAlloc(alloc, err) 1207 select { 1208 case c.allocUpdates <- failed: 1209 case <-c.shutdownCh: 1210 } 1211 } 1212 1213 // saveState is used to snapshot our state into the data dir. 1214 func (c *Client) saveState() error { 1215 var wg sync.WaitGroup 1216 var l sync.Mutex 1217 var mErr multierror.Error 1218 runners := c.getAllocRunners() 1219 wg.Add(len(runners)) 1220 1221 for id, ar := range runners { 1222 go func(id string, ar AllocRunner) { 1223 err := ar.PersistState() 1224 if err != nil { 1225 c.logger.Error("error saving alloc state", "error", err, "alloc_id", id) 1226 l.Lock() 1227 _ = multierror.Append(&mErr, err) 1228 l.Unlock() 1229 } 1230 wg.Done() 1231 }(id, ar) 1232 } 1233 1234 wg.Wait() 1235 return mErr.ErrorOrNil() 1236 } 1237 1238 // getAllocRunners returns a snapshot of the current set of alloc runners. 1239 func (c *Client) getAllocRunners() map[string]AllocRunner { 1240 c.allocLock.RLock() 1241 defer c.allocLock.RUnlock() 1242 runners := make(map[string]AllocRunner, len(c.allocs)) 1243 for id, ar := range c.allocs { 1244 runners[id] = ar 1245 } 1246 return runners 1247 } 1248 1249 // NumAllocs returns the number of un-GC'd allocs this client has. Used to 1250 // fulfill the AllocCounter interface for the GC. 1251 func (c *Client) NumAllocs() int { 1252 n := 0 1253 c.allocLock.RLock() 1254 for _, a := range c.allocs { 1255 if !a.IsDestroyed() { 1256 n++ 1257 } 1258 } 1259 c.allocLock.RUnlock() 1260 return n 1261 } 1262 1263 // nodeID restores, or generates if necessary, a unique node ID and SecretID. 1264 // The node ID is, if available, a persistent unique ID. The secret ID is a 1265 // high-entropy random UUID. 1266 func (c *Client) nodeID() (id, secret string, err error) { 1267 var hostID string 1268 hostInfo, err := host.Info() 1269 if !c.config.NoHostUUID && err == nil { 1270 if hashed, ok := helper.HashUUID(hostInfo.HostID); ok { 1271 hostID = hashed 1272 } 1273 } 1274 1275 if hostID == "" { 1276 // Generate a random hostID if no constant ID is available on 1277 // this platform. 1278 hostID = uuid.Generate() 1279 } 1280 1281 // Do not persist in dev mode 1282 if c.config.DevMode { 1283 return hostID, uuid.Generate(), nil 1284 } 1285 1286 // Attempt to read existing ID 1287 idPath := filepath.Join(c.config.StateDir, "client-id") 1288 idBuf, err := ioutil.ReadFile(idPath) 1289 if err != nil && !os.IsNotExist(err) { 1290 return "", "", err 1291 } 1292 1293 // Attempt to read existing secret ID 1294 secretPath := filepath.Join(c.config.StateDir, "secret-id") 1295 secretBuf, err := ioutil.ReadFile(secretPath) 1296 if err != nil && !os.IsNotExist(err) { 1297 return "", "", err 1298 } 1299 1300 // Use existing ID if any 1301 if len(idBuf) != 0 { 1302 id = strings.ToLower(string(idBuf)) 1303 } else { 1304 id = hostID 1305 1306 // Persist the ID 1307 if err := ioutil.WriteFile(idPath, []byte(id), 0700); err != nil { 1308 return "", "", err 1309 } 1310 } 1311 1312 if len(secretBuf) != 0 { 1313 secret = string(secretBuf) 1314 } else { 1315 // Generate new ID 1316 secret = uuid.Generate() 1317 1318 // Persist the ID 1319 if err := ioutil.WriteFile(secretPath, []byte(secret), 0700); err != nil { 1320 return "", "", err 1321 } 1322 } 1323 1324 return id, secret, nil 1325 } 1326 1327 // setupNode is used to setup the initial node 1328 func (c *Client) setupNode() error { 1329 node := c.config.Node 1330 if node == nil { 1331 node = &structs.Node{} 1332 c.config.Node = node 1333 } 1334 // Generate an ID and secret for the node 1335 id, secretID, err := c.nodeID() 1336 if err != nil { 1337 return fmt.Errorf("node ID setup failed: %v", err) 1338 } 1339 1340 node.ID = id 1341 node.SecretID = secretID 1342 if node.Attributes == nil { 1343 node.Attributes = make(map[string]string) 1344 } 1345 if node.Links == nil { 1346 node.Links = make(map[string]string) 1347 } 1348 if node.Drivers == nil { 1349 node.Drivers = make(map[string]*structs.DriverInfo) 1350 } 1351 if node.CSIControllerPlugins == nil { 1352 node.CSIControllerPlugins = make(map[string]*structs.CSIInfo) 1353 } 1354 if node.CSINodePlugins == nil { 1355 node.CSINodePlugins = make(map[string]*structs.CSIInfo) 1356 } 1357 if node.Meta == nil { 1358 node.Meta = make(map[string]string) 1359 } 1360 if node.NodeResources == nil { 1361 node.NodeResources = &structs.NodeResources{} 1362 } 1363 if node.ReservedResources == nil { 1364 node.ReservedResources = &structs.NodeReservedResources{} 1365 } 1366 if node.Resources == nil { 1367 node.Resources = &structs.Resources{} 1368 } 1369 if node.Reserved == nil { 1370 node.Reserved = &structs.Resources{} 1371 } 1372 if node.Datacenter == "" { 1373 node.Datacenter = "dc1" 1374 } 1375 if node.Name == "" { 1376 node.Name, _ = os.Hostname() 1377 } 1378 if node.HostVolumes == nil { 1379 if l := len(c.config.HostVolumes); l != 0 { 1380 node.HostVolumes = make(map[string]*structs.ClientHostVolumeConfig, l) 1381 for k, v := range c.config.HostVolumes { 1382 if _, err := os.Stat(v.Path); err != nil { 1383 return fmt.Errorf("failed to validate volume %s, err: %v", v.Name, err) 1384 } 1385 node.HostVolumes[k] = v.Copy() 1386 } 1387 } 1388 } 1389 1390 if node.Name == "" { 1391 node.Name = node.ID 1392 } 1393 node.Status = structs.NodeStatusInit 1394 1395 // Setup default meta 1396 if _, ok := node.Meta[envoy.SidecarMetaParam]; !ok { 1397 node.Meta[envoy.SidecarMetaParam] = envoy.ImageFormat 1398 } 1399 if _, ok := node.Meta[envoy.GatewayMetaParam]; !ok { 1400 node.Meta[envoy.GatewayMetaParam] = envoy.ImageFormat 1401 } 1402 if _, ok := node.Meta["connect.log_level"]; !ok { 1403 node.Meta["connect.log_level"] = defaultConnectLogLevel 1404 } 1405 if _, ok := node.Meta["connect.proxy_concurrency"]; !ok { 1406 node.Meta["connect.proxy_concurrency"] = defaultConnectProxyConcurrency 1407 } 1408 1409 return nil 1410 } 1411 1412 // updateNodeFromFingerprint updates the node with the result of 1413 // fingerprinting the node from the diff that was created 1414 func (c *Client) updateNodeFromFingerprint(response *fingerprint.FingerprintResponse) *structs.Node { 1415 c.configLock.Lock() 1416 defer c.configLock.Unlock() 1417 1418 nodeHasChanged := false 1419 1420 for name, newVal := range response.Attributes { 1421 oldVal := c.config.Node.Attributes[name] 1422 if oldVal == newVal { 1423 continue 1424 } 1425 1426 nodeHasChanged = true 1427 if newVal == "" { 1428 delete(c.config.Node.Attributes, name) 1429 } else { 1430 c.config.Node.Attributes[name] = newVal 1431 } 1432 } 1433 1434 // update node links and resources from the diff created from 1435 // fingerprinting 1436 for name, newVal := range response.Links { 1437 oldVal := c.config.Node.Links[name] 1438 if oldVal == newVal { 1439 continue 1440 } 1441 1442 nodeHasChanged = true 1443 if newVal == "" { 1444 delete(c.config.Node.Links, name) 1445 } else { 1446 c.config.Node.Links[name] = newVal 1447 } 1448 } 1449 1450 // COMPAT(0.10): Remove in 0.10 1451 // update the response networks with the config 1452 // if we still have node changes, merge them 1453 if response.Resources != nil { 1454 response.Resources.Networks = updateNetworks( 1455 response.Resources.Networks, 1456 c.config) 1457 if !c.config.Node.Resources.Equals(response.Resources) { 1458 c.config.Node.Resources.Merge(response.Resources) 1459 nodeHasChanged = true 1460 } 1461 } 1462 1463 // update the response networks with the config 1464 // if we still have node changes, merge them 1465 if response.NodeResources != nil { 1466 response.NodeResources.Networks = updateNetworks( 1467 response.NodeResources.Networks, 1468 c.config) 1469 if !c.config.Node.NodeResources.Equals(response.NodeResources) { 1470 c.config.Node.NodeResources.Merge(response.NodeResources) 1471 nodeHasChanged = true 1472 } 1473 } 1474 1475 if nodeHasChanged { 1476 c.updateNodeLocked() 1477 } 1478 1479 return c.configCopy.Node 1480 } 1481 1482 // updateNetworks filters and overrides network speed of host networks based 1483 // on configured settings 1484 func updateNetworks(up structs.Networks, c *config.Config) structs.Networks { 1485 if up == nil { 1486 return nil 1487 } 1488 1489 if c.NetworkInterface != "" { 1490 // For host networks, if a network device is configured filter up to contain details for only 1491 // that device 1492 upd := []*structs.NetworkResource{} 1493 for _, n := range up { 1494 switch n.Mode { 1495 case "host": 1496 if c.NetworkInterface == n.Device { 1497 upd = append(upd, n) 1498 } 1499 default: 1500 upd = append(upd, n) 1501 1502 } 1503 } 1504 up = upd 1505 } 1506 1507 // if set, apply the config NetworkSpeed to networks in host mode 1508 if c.NetworkSpeed != 0 { 1509 for _, n := range up { 1510 if n.Mode == "host" { 1511 n.MBits = c.NetworkSpeed 1512 } 1513 } 1514 } 1515 return up 1516 } 1517 1518 // retryIntv calculates a retry interval value given the base 1519 func (c *Client) retryIntv(base time.Duration) time.Duration { 1520 if c.config.DevMode { 1521 return devModeRetryIntv 1522 } 1523 return base + lib.RandomStagger(base) 1524 } 1525 1526 // registerAndHeartbeat is a long lived goroutine used to register the client 1527 // and then start heartbeating to the server. 1528 func (c *Client) registerAndHeartbeat() { 1529 // Register the node 1530 c.retryRegisterNode() 1531 1532 // Start watching changes for node changes 1533 go c.watchNodeUpdates() 1534 1535 // Start watching for emitting node events 1536 go c.watchNodeEvents() 1537 1538 // Setup the heartbeat timer, for the initial registration 1539 // we want to do this quickly. We want to do it extra quickly 1540 // in development mode. 1541 var heartbeat <-chan time.Time 1542 if c.config.DevMode { 1543 heartbeat = time.After(0) 1544 } else { 1545 heartbeat = time.After(lib.RandomStagger(initialHeartbeatStagger)) 1546 } 1547 1548 for { 1549 select { 1550 case <-c.rpcRetryWatcher(): 1551 case <-heartbeat: 1552 case <-c.shutdownCh: 1553 return 1554 } 1555 if err := c.updateNodeStatus(); err != nil { 1556 // The servers have changed such that this node has not been 1557 // registered before 1558 if strings.Contains(err.Error(), "node not found") { 1559 // Re-register the node 1560 c.logger.Info("re-registering node") 1561 c.retryRegisterNode() 1562 heartbeat = time.After(lib.RandomStagger(initialHeartbeatStagger)) 1563 } else { 1564 intv := c.getHeartbeatRetryIntv(err) 1565 c.logger.Error("error heartbeating. retrying", "error", err, "period", intv) 1566 heartbeat = time.After(intv) 1567 1568 // If heartbeating fails, trigger Consul discovery 1569 c.triggerDiscovery() 1570 } 1571 } else { 1572 c.heartbeatLock.Lock() 1573 heartbeat = time.After(c.heartbeatTTL) 1574 c.heartbeatLock.Unlock() 1575 } 1576 } 1577 } 1578 1579 func (c *Client) lastHeartbeat() time.Time { 1580 return c.heartbeatStop.getLastOk() 1581 } 1582 1583 // getHeartbeatRetryIntv is used to retrieve the time to wait before attempting 1584 // another heartbeat. 1585 func (c *Client) getHeartbeatRetryIntv(err error) time.Duration { 1586 if c.config.DevMode { 1587 return devModeRetryIntv 1588 } 1589 1590 // Collect the useful heartbeat info 1591 c.heartbeatLock.Lock() 1592 haveHeartbeated := c.haveHeartbeated 1593 last := c.lastHeartbeat() 1594 ttl := c.heartbeatTTL 1595 c.heartbeatLock.Unlock() 1596 1597 // If we haven't even successfully heartbeated once or there is no leader 1598 // treat it as a registration. In the case that there is a leadership loss, 1599 // we will have our heartbeat timer reset to a much larger threshold, so 1600 // do not put unnecessary pressure on the new leader. 1601 if !haveHeartbeated || err == structs.ErrNoLeader { 1602 return c.retryIntv(registerRetryIntv) 1603 } 1604 1605 // Determine how much time we have left to heartbeat 1606 left := time.Until(last.Add(ttl)) 1607 1608 // Logic for retrying is: 1609 // * Do not retry faster than once a second 1610 // * Do not retry less that once every 30 seconds 1611 // * If we have missed the heartbeat by more than 30 seconds, start to use 1612 // the absolute time since we do not want to retry indefinitely 1613 switch { 1614 case left < -30*time.Second: 1615 // Make left the absolute value so we delay and jitter properly. 1616 left *= -1 1617 case left < 0: 1618 return time.Second + lib.RandomStagger(time.Second) 1619 default: 1620 } 1621 1622 stagger := lib.RandomStagger(left) 1623 switch { 1624 case stagger < time.Second: 1625 return time.Second + lib.RandomStagger(time.Second) 1626 case stagger > 30*time.Second: 1627 return 25*time.Second + lib.RandomStagger(5*time.Second) 1628 default: 1629 return stagger 1630 } 1631 } 1632 1633 // periodicSnapshot is a long lived goroutine used to periodically snapshot the 1634 // state of the client 1635 func (c *Client) periodicSnapshot() { 1636 // Create a snapshot timer 1637 snapshot := time.After(stateSnapshotIntv) 1638 1639 for { 1640 select { 1641 case <-snapshot: 1642 snapshot = time.After(stateSnapshotIntv) 1643 if err := c.saveState(); err != nil { 1644 c.logger.Error("error saving state", "error", err) 1645 } 1646 1647 case <-c.shutdownCh: 1648 return 1649 } 1650 } 1651 } 1652 1653 // run is a long lived goroutine used to run the client. Shutdown() stops it first 1654 func (c *Client) run() { 1655 // Watch for changes in allocations 1656 allocUpdates := make(chan *allocUpdates, 8) 1657 go c.watchAllocations(allocUpdates) 1658 1659 for { 1660 select { 1661 case update := <-allocUpdates: 1662 // Don't apply updates while shutting down. 1663 c.shutdownLock.Lock() 1664 if c.shutdown { 1665 c.shutdownLock.Unlock() 1666 return 1667 } 1668 1669 // Apply updates inside lock to prevent a concurrent 1670 // shutdown. 1671 c.runAllocs(update) 1672 c.shutdownLock.Unlock() 1673 1674 case <-c.shutdownCh: 1675 return 1676 } 1677 } 1678 } 1679 1680 // submitNodeEvents is used to submit a client-side node event. Examples of 1681 // these kinds of events include when a driver moves from healthy to unhealthy 1682 // (and vice versa) 1683 func (c *Client) submitNodeEvents(events []*structs.NodeEvent) error { 1684 nodeID := c.NodeID() 1685 nodeEvents := map[string][]*structs.NodeEvent{ 1686 nodeID: events, 1687 } 1688 req := structs.EmitNodeEventsRequest{ 1689 NodeEvents: nodeEvents, 1690 WriteRequest: structs.WriteRequest{Region: c.Region()}, 1691 } 1692 var resp structs.EmitNodeEventsResponse 1693 if err := c.RPC("Node.EmitEvents", &req, &resp); err != nil { 1694 return fmt.Errorf("Emitting node events failed: %v", err) 1695 } 1696 return nil 1697 } 1698 1699 // watchNodeEvents is a handler which receives node events and on a interval 1700 // and submits them in batch format to the server 1701 func (c *Client) watchNodeEvents() { 1702 // batchEvents stores events that have yet to be published 1703 var batchEvents []*structs.NodeEvent 1704 1705 timer := stoppedTimer() 1706 defer timer.Stop() 1707 1708 for { 1709 select { 1710 case event := <-c.triggerEmitNodeEvent: 1711 if l := len(batchEvents); l <= structs.MaxRetainedNodeEvents { 1712 batchEvents = append(batchEvents, event) 1713 } else { 1714 // Drop the oldest event 1715 c.logger.Warn("dropping node event", "node_event", batchEvents[0]) 1716 batchEvents = append(batchEvents[1:], event) 1717 } 1718 timer.Reset(c.retryIntv(nodeUpdateRetryIntv)) 1719 case <-timer.C: 1720 if err := c.submitNodeEvents(batchEvents); err != nil { 1721 c.logger.Error("error submitting node events", "error", err) 1722 timer.Reset(c.retryIntv(nodeUpdateRetryIntv)) 1723 } else { 1724 // Reset the events since we successfully sent them. 1725 batchEvents = []*structs.NodeEvent{} 1726 } 1727 case <-c.shutdownCh: 1728 return 1729 } 1730 } 1731 } 1732 1733 // triggerNodeEvent triggers a emit node event 1734 func (c *Client) triggerNodeEvent(nodeEvent *structs.NodeEvent) { 1735 select { 1736 case c.triggerEmitNodeEvent <- nodeEvent: 1737 // emit node event goroutine was released to execute 1738 default: 1739 // emit node event goroutine was already running 1740 } 1741 } 1742 1743 // retryRegisterNode is used to register the node or update the registration and 1744 // retry in case of failure. 1745 func (c *Client) retryRegisterNode() { 1746 for { 1747 err := c.registerNode() 1748 if err == nil { 1749 // Registered! 1750 return 1751 } 1752 1753 if err == noServersErr { 1754 c.logger.Debug("registration waiting on servers") 1755 c.triggerDiscovery() 1756 } else { 1757 c.logger.Error("error registering", "error", err) 1758 } 1759 select { 1760 case <-c.rpcRetryWatcher(): 1761 case <-time.After(c.retryIntv(registerRetryIntv)): 1762 case <-c.shutdownCh: 1763 return 1764 } 1765 } 1766 } 1767 1768 // registerNode is used to register the node or update the registration 1769 func (c *Client) registerNode() error { 1770 node := c.Node() 1771 req := structs.NodeRegisterRequest{ 1772 Node: node, 1773 WriteRequest: structs.WriteRequest{Region: c.Region()}, 1774 } 1775 var resp structs.NodeUpdateResponse 1776 if err := c.RPC("Node.Register", &req, &resp); err != nil { 1777 return err 1778 } 1779 1780 // Update the node status to ready after we register. 1781 c.configLock.Lock() 1782 node.Status = structs.NodeStatusReady 1783 c.config.Node.Status = structs.NodeStatusReady 1784 c.configLock.Unlock() 1785 1786 c.logger.Info("node registration complete") 1787 if len(resp.EvalIDs) != 0 { 1788 c.logger.Debug("evaluations triggered by node registration", "num_evals", len(resp.EvalIDs)) 1789 } 1790 1791 c.heartbeatLock.Lock() 1792 defer c.heartbeatLock.Unlock() 1793 c.heartbeatStop.setLastOk(time.Now()) 1794 c.heartbeatTTL = resp.HeartbeatTTL 1795 return nil 1796 } 1797 1798 // updateNodeStatus is used to heartbeat and update the status of the node 1799 func (c *Client) updateNodeStatus() error { 1800 start := time.Now() 1801 req := structs.NodeUpdateStatusRequest{ 1802 NodeID: c.NodeID(), 1803 Status: structs.NodeStatusReady, 1804 WriteRequest: structs.WriteRequest{Region: c.Region()}, 1805 } 1806 var resp structs.NodeUpdateResponse 1807 if err := c.RPC("Node.UpdateStatus", &req, &resp); err != nil { 1808 c.triggerDiscovery() 1809 return fmt.Errorf("failed to update status: %v", err) 1810 } 1811 end := time.Now() 1812 1813 if len(resp.EvalIDs) != 0 { 1814 c.logger.Debug("evaluations triggered by node update", "num_evals", len(resp.EvalIDs)) 1815 } 1816 1817 // Update the last heartbeat and the new TTL, capturing the old values 1818 c.heartbeatLock.Lock() 1819 last := c.lastHeartbeat() 1820 oldTTL := c.heartbeatTTL 1821 haveHeartbeated := c.haveHeartbeated 1822 c.heartbeatStop.setLastOk(time.Now()) 1823 c.heartbeatTTL = resp.HeartbeatTTL 1824 c.haveHeartbeated = true 1825 c.heartbeatLock.Unlock() 1826 c.logger.Trace("next heartbeat", "period", resp.HeartbeatTTL) 1827 1828 if resp.Index != 0 { 1829 c.logger.Debug("state updated", "node_status", req.Status) 1830 1831 // We have potentially missed our TTL log how delayed we were 1832 if haveHeartbeated { 1833 c.logger.Warn("missed heartbeat", 1834 "req_latency", end.Sub(start), "heartbeat_ttl", oldTTL, "since_last_heartbeat", time.Since(last)) 1835 } 1836 } 1837 1838 // Update the number of nodes in the cluster so we can adjust our server 1839 // rebalance rate. 1840 c.servers.SetNumNodes(resp.NumNodes) 1841 1842 // Convert []*NodeServerInfo to []*servers.Server 1843 nomadServers := make([]*servers.Server, 0, len(resp.Servers)) 1844 for _, s := range resp.Servers { 1845 addr, err := resolveServer(s.RPCAdvertiseAddr) 1846 if err != nil { 1847 c.logger.Warn("ignoring invalid server", "error", err, "server", s.RPCAdvertiseAddr) 1848 continue 1849 } 1850 e := &servers.Server{Addr: addr} 1851 nomadServers = append(nomadServers, e) 1852 } 1853 if len(nomadServers) == 0 { 1854 return fmt.Errorf("heartbeat response returned no valid servers") 1855 } 1856 c.servers.SetServers(nomadServers) 1857 1858 // Begin polling Consul if there is no Nomad leader. We could be 1859 // heartbeating to a Nomad server that is in the minority of a 1860 // partition of the Nomad server quorum, but this Nomad Agent still 1861 // has connectivity to the existing majority of Nomad Servers, but 1862 // only if it queries Consul. 1863 if resp.LeaderRPCAddr == "" { 1864 c.triggerDiscovery() 1865 } 1866 1867 c.EnterpriseClient.SetFeatures(resp.Features) 1868 return nil 1869 } 1870 1871 // AllocStateUpdated asynchronously updates the server with the current state 1872 // of an allocations and its tasks. 1873 func (c *Client) AllocStateUpdated(alloc *structs.Allocation) { 1874 if alloc.Terminated() { 1875 // Terminated, mark for GC if we're still tracking this alloc 1876 // runner. If it's not being tracked that means the server has 1877 // already GC'd it (see removeAlloc). 1878 ar, err := c.getAllocRunner(alloc.ID) 1879 1880 if err == nil { 1881 c.garbageCollector.MarkForCollection(alloc.ID, ar) 1882 1883 // Trigger a GC in case we're over thresholds and just 1884 // waiting for eligible allocs. 1885 c.garbageCollector.Trigger() 1886 } 1887 } 1888 1889 // Strip all the information that can be reconstructed at the server. Only 1890 // send the fields that are updatable by the client. 1891 stripped := new(structs.Allocation) 1892 stripped.ID = alloc.ID 1893 stripped.NodeID = c.NodeID() 1894 stripped.TaskStates = alloc.TaskStates 1895 stripped.ClientStatus = alloc.ClientStatus 1896 stripped.ClientDescription = alloc.ClientDescription 1897 stripped.DeploymentStatus = alloc.DeploymentStatus 1898 stripped.NetworkStatus = alloc.NetworkStatus 1899 1900 select { 1901 case c.allocUpdates <- stripped: 1902 case <-c.shutdownCh: 1903 } 1904 } 1905 1906 // allocSync is a long lived function that batches allocation updates to the 1907 // server. 1908 func (c *Client) allocSync() { 1909 syncTicker := time.NewTicker(allocSyncIntv) 1910 updates := make(map[string]*structs.Allocation) 1911 for { 1912 select { 1913 case <-c.shutdownCh: 1914 syncTicker.Stop() 1915 return 1916 case alloc := <-c.allocUpdates: 1917 // Batch the allocation updates until the timer triggers. 1918 updates[alloc.ID] = alloc 1919 case <-syncTicker.C: 1920 // Fast path if there are no updates 1921 if len(updates) == 0 { 1922 continue 1923 } 1924 1925 sync := make([]*structs.Allocation, 0, len(updates)) 1926 for _, alloc := range updates { 1927 sync = append(sync, alloc) 1928 } 1929 1930 // Send to server. 1931 args := structs.AllocUpdateRequest{ 1932 Alloc: sync, 1933 WriteRequest: structs.WriteRequest{Region: c.Region()}, 1934 } 1935 1936 var resp structs.GenericResponse 1937 err := c.RPC("Node.UpdateAlloc", &args, &resp) 1938 if err != nil { 1939 // Error updating allocations, do *not* clear 1940 // updates and retry after backoff 1941 c.logger.Error("error updating allocations", "error", err) 1942 syncTicker.Stop() 1943 syncTicker = time.NewTicker(c.retryIntv(allocSyncRetryIntv)) 1944 continue 1945 } 1946 1947 // Successfully updated allocs, reset map and ticker. 1948 // Always reset ticker to give loop time to receive 1949 // alloc updates. If the RPC took the ticker interval 1950 // we may call it in a tight loop before draining 1951 // buffered updates. 1952 updates = make(map[string]*structs.Allocation, len(updates)) 1953 syncTicker.Stop() 1954 syncTicker = time.NewTicker(allocSyncIntv) 1955 } 1956 } 1957 } 1958 1959 // allocUpdates holds the results of receiving updated allocations from the 1960 // servers. 1961 type allocUpdates struct { 1962 // pulled is the set of allocations that were downloaded from the servers. 1963 pulled map[string]*structs.Allocation 1964 1965 // filtered is the set of allocations that were not pulled because their 1966 // AllocModifyIndex didn't change. 1967 filtered map[string]struct{} 1968 1969 // migrateTokens are a list of tokens necessary for when clients pull data 1970 // from authorized volumes 1971 migrateTokens map[string]string 1972 } 1973 1974 // watchAllocations is used to scan for updates to allocations 1975 func (c *Client) watchAllocations(updates chan *allocUpdates) { 1976 // The request and response for getting the map of allocations that should 1977 // be running on the Node to their AllocModifyIndex which is incremented 1978 // when the allocation is updated by the servers. 1979 req := structs.NodeSpecificRequest{ 1980 NodeID: c.NodeID(), 1981 SecretID: c.secretNodeID(), 1982 QueryOptions: structs.QueryOptions{ 1983 Region: c.Region(), 1984 AllowStale: true, 1985 }, 1986 } 1987 var resp structs.NodeClientAllocsResponse 1988 1989 // The request and response for pulling down the set of allocations that are 1990 // new, or updated server side. 1991 allocsReq := structs.AllocsGetRequest{ 1992 QueryOptions: structs.QueryOptions{ 1993 Region: c.Region(), 1994 AllowStale: true, 1995 AuthToken: c.secretNodeID(), 1996 }, 1997 } 1998 var allocsResp structs.AllocsGetResponse 1999 2000 OUTER: 2001 for { 2002 // Get the allocation modify index map, blocking for updates. We will 2003 // use this to determine exactly what allocations need to be downloaded 2004 // in full. 2005 resp = structs.NodeClientAllocsResponse{} 2006 err := c.RPC("Node.GetClientAllocs", &req, &resp) 2007 if err != nil { 2008 // Shutdown often causes EOF errors, so check for shutdown first 2009 select { 2010 case <-c.shutdownCh: 2011 return 2012 default: 2013 } 2014 2015 // COMPAT: Remove in 0.6. This is to allow the case in which the 2016 // servers are not fully upgraded before the clients register. This 2017 // can cause the SecretID to be lost 2018 if strings.Contains(err.Error(), "node secret ID does not match") { 2019 c.logger.Debug("secret mismatch; re-registering node", "error", err) 2020 c.retryRegisterNode() 2021 } else if err != noServersErr { 2022 c.logger.Error("error querying node allocations", "error", err) 2023 } 2024 retry := c.retryIntv(getAllocRetryIntv) 2025 select { 2026 case <-c.rpcRetryWatcher(): 2027 continue 2028 case <-time.After(retry): 2029 continue 2030 case <-c.shutdownCh: 2031 return 2032 } 2033 } 2034 2035 // Check for shutdown 2036 select { 2037 case <-c.shutdownCh: 2038 return 2039 default: 2040 } 2041 2042 // Filter all allocations whose AllocModifyIndex was not incremented. 2043 // These are the allocations who have either not been updated, or whose 2044 // updates are a result of the client sending an update for the alloc. 2045 // This lets us reduce the network traffic to the server as we don't 2046 // need to pull all the allocations. 2047 var pull []string 2048 filtered := make(map[string]struct{}) 2049 var pullIndex uint64 2050 for allocID, modifyIndex := range resp.Allocs { 2051 // Pull the allocation if we don't have an alloc runner for the 2052 // allocation or if the alloc runner requires an updated allocation. 2053 //XXX Part of Client alloc index tracking exp 2054 c.allocLock.RLock() 2055 currentAR, ok := c.allocs[allocID] 2056 c.allocLock.RUnlock() 2057 2058 // Ignore alloc updates for allocs that are invalid because of initialization errors 2059 c.invalidAllocsLock.Lock() 2060 _, isInvalid := c.invalidAllocs[allocID] 2061 c.invalidAllocsLock.Unlock() 2062 2063 if (!ok || modifyIndex > currentAR.Alloc().AllocModifyIndex) && !isInvalid { 2064 // Only pull allocs that are required. Filtered 2065 // allocs might be at a higher index, so ignore 2066 // it. 2067 if modifyIndex > pullIndex { 2068 pullIndex = modifyIndex 2069 } 2070 pull = append(pull, allocID) 2071 } else { 2072 filtered[allocID] = struct{}{} 2073 } 2074 } 2075 2076 // Pull the allocations that passed filtering. 2077 allocsResp.Allocs = nil 2078 var pulledAllocs map[string]*structs.Allocation 2079 if len(pull) != 0 { 2080 // Pull the allocations that need to be updated. 2081 allocsReq.AllocIDs = pull 2082 allocsReq.MinQueryIndex = pullIndex - 1 2083 allocsResp = structs.AllocsGetResponse{} 2084 if err := c.RPC("Alloc.GetAllocs", &allocsReq, &allocsResp); err != nil { 2085 c.logger.Error("error querying updated allocations", "error", err) 2086 retry := c.retryIntv(getAllocRetryIntv) 2087 select { 2088 case <-c.rpcRetryWatcher(): 2089 continue 2090 case <-time.After(retry): 2091 continue 2092 case <-c.shutdownCh: 2093 return 2094 } 2095 } 2096 2097 // Ensure that we received all the allocations we wanted 2098 pulledAllocs = make(map[string]*structs.Allocation, len(allocsResp.Allocs)) 2099 for _, alloc := range allocsResp.Allocs { 2100 2101 // handle an old Server 2102 alloc.Canonicalize() 2103 2104 pulledAllocs[alloc.ID] = alloc 2105 } 2106 2107 for _, desiredID := range pull { 2108 if _, ok := pulledAllocs[desiredID]; !ok { 2109 // We didn't get everything we wanted. Do not update the 2110 // MinQueryIndex, sleep and then retry. 2111 wait := c.retryIntv(2 * time.Second) 2112 select { 2113 case <-time.After(wait): 2114 // Wait for the server we contact to receive the 2115 // allocations 2116 continue OUTER 2117 case <-c.shutdownCh: 2118 return 2119 } 2120 } 2121 } 2122 2123 // Check for shutdown 2124 select { 2125 case <-c.shutdownCh: 2126 return 2127 default: 2128 } 2129 } 2130 2131 c.logger.Debug("updated allocations", "index", resp.Index, 2132 "total", len(resp.Allocs), "pulled", len(allocsResp.Allocs), "filtered", len(filtered)) 2133 2134 // Update the query index. 2135 if resp.Index > req.MinQueryIndex { 2136 req.MinQueryIndex = resp.Index 2137 } 2138 2139 // Push the updates. 2140 update := &allocUpdates{ 2141 filtered: filtered, 2142 pulled: pulledAllocs, 2143 migrateTokens: resp.MigrateTokens, 2144 } 2145 2146 select { 2147 case updates <- update: 2148 case <-c.shutdownCh: 2149 return 2150 } 2151 } 2152 } 2153 2154 // updateNode updates the Node copy and triggers the client to send the updated 2155 // Node to the server. This should be done while the caller holds the 2156 // configLock lock. 2157 func (c *Client) updateNodeLocked() { 2158 // Update the config copy. 2159 node := c.config.Node.Copy() 2160 c.configCopy.Node = node 2161 2162 select { 2163 case c.triggerNodeUpdate <- struct{}{}: 2164 // Node update goroutine was released to execute 2165 default: 2166 // Node update goroutine was already running 2167 } 2168 } 2169 2170 // watchNodeUpdates blocks until it is edge triggered. Once triggered, 2171 // it will update the client node copy and re-register the node. 2172 func (c *Client) watchNodeUpdates() { 2173 var hasChanged bool 2174 2175 timer := stoppedTimer() 2176 defer timer.Stop() 2177 2178 for { 2179 select { 2180 case <-timer.C: 2181 c.logger.Debug("state changed, updating node and re-registering") 2182 c.retryRegisterNode() 2183 hasChanged = false 2184 case <-c.triggerNodeUpdate: 2185 if hasChanged { 2186 continue 2187 } 2188 hasChanged = true 2189 timer.Reset(c.retryIntv(nodeUpdateRetryIntv)) 2190 case <-c.shutdownCh: 2191 return 2192 } 2193 } 2194 } 2195 2196 // runAllocs is invoked when we get an updated set of allocations 2197 func (c *Client) runAllocs(update *allocUpdates) { 2198 // Get the existing allocs 2199 c.allocLock.RLock() 2200 existing := make(map[string]uint64, len(c.allocs)) 2201 for id, ar := range c.allocs { 2202 existing[id] = ar.Alloc().AllocModifyIndex 2203 } 2204 c.allocLock.RUnlock() 2205 2206 // Diff the existing and updated allocations 2207 diff := diffAllocs(existing, update) 2208 c.logger.Debug("allocation updates", "added", len(diff.added), "removed", len(diff.removed), 2209 "updated", len(diff.updated), "ignored", len(diff.ignore)) 2210 2211 errs := 0 2212 2213 // Remove the old allocations 2214 for _, remove := range diff.removed { 2215 c.removeAlloc(remove) 2216 } 2217 2218 // Update the existing allocations 2219 for _, update := range diff.updated { 2220 c.logger.Trace("updating alloc", "alloc_id", update.ID, "index", update.AllocModifyIndex) 2221 c.updateAlloc(update) 2222 } 2223 2224 // Make room for new allocations before running 2225 if err := c.garbageCollector.MakeRoomFor(diff.added); err != nil { 2226 c.logger.Error("error making room for new allocations", "error", err) 2227 errs++ 2228 } 2229 2230 // Start the new allocations 2231 for _, add := range diff.added { 2232 migrateToken := update.migrateTokens[add.ID] 2233 if err := c.addAlloc(add, migrateToken); err != nil { 2234 c.logger.Error("error adding alloc", "error", err, "alloc_id", add.ID) 2235 errs++ 2236 // We mark the alloc as failed and send an update to the server 2237 // We track the fact that creating an allocrunner failed so that we don't send updates again 2238 if add.ClientStatus != structs.AllocClientStatusFailed { 2239 c.handleInvalidAllocs(add, err) 2240 } 2241 } 2242 } 2243 2244 // Mark servers as having been contacted so blocked tasks that failed 2245 // to restore can now restart. 2246 c.serversContactedOnce.Do(func() { 2247 close(c.serversContactedCh) 2248 }) 2249 2250 // Trigger the GC once more now that new allocs are started that could 2251 // have caused thresholds to be exceeded 2252 c.garbageCollector.Trigger() 2253 c.logger.Debug("allocation updates applied", "added", len(diff.added), "removed", len(diff.removed), 2254 "updated", len(diff.updated), "ignored", len(diff.ignore), "errors", errs) 2255 } 2256 2257 // makeFailedAlloc creates a stripped down version of the allocation passed in 2258 // with its status set to failed and other fields needed for the server to be 2259 // able to examine deployment and task states 2260 func makeFailedAlloc(add *structs.Allocation, err error) *structs.Allocation { 2261 stripped := new(structs.Allocation) 2262 stripped.ID = add.ID 2263 stripped.NodeID = add.NodeID 2264 stripped.ClientStatus = structs.AllocClientStatusFailed 2265 stripped.ClientDescription = fmt.Sprintf("Unable to add allocation due to error: %v", err) 2266 2267 // Copy task states if it exists in the original allocation 2268 if add.TaskStates != nil { 2269 stripped.TaskStates = add.TaskStates 2270 } else { 2271 stripped.TaskStates = make(map[string]*structs.TaskState) 2272 } 2273 2274 failTime := time.Now() 2275 if add.DeploymentStatus.HasHealth() { 2276 // Never change deployment health once it has been set 2277 stripped.DeploymentStatus = add.DeploymentStatus.Copy() 2278 } else { 2279 stripped.DeploymentStatus = &structs.AllocDeploymentStatus{ 2280 Healthy: helper.BoolToPtr(false), 2281 Timestamp: failTime, 2282 } 2283 } 2284 2285 taskGroup := add.Job.LookupTaskGroup(add.TaskGroup) 2286 if taskGroup == nil { 2287 return stripped 2288 } 2289 for _, task := range taskGroup.Tasks { 2290 ts, ok := stripped.TaskStates[task.Name] 2291 if !ok { 2292 ts = &structs.TaskState{} 2293 stripped.TaskStates[task.Name] = ts 2294 } 2295 if ts.FinishedAt.IsZero() { 2296 ts.FinishedAt = failTime 2297 } 2298 } 2299 return stripped 2300 } 2301 2302 // removeAlloc is invoked when we should remove an allocation because it has 2303 // been removed by the server. 2304 func (c *Client) removeAlloc(allocID string) { 2305 c.allocLock.Lock() 2306 defer c.allocLock.Unlock() 2307 2308 ar, ok := c.allocs[allocID] 2309 if !ok { 2310 c.invalidAllocsLock.Lock() 2311 if _, ok := c.invalidAllocs[allocID]; ok { 2312 // Removing from invalid allocs map if present 2313 delete(c.invalidAllocs, allocID) 2314 } else { 2315 // Alloc is unknown, log a warning. 2316 c.logger.Warn("cannot remove nonexistent alloc", "alloc_id", allocID, "error", "alloc not found") 2317 } 2318 c.invalidAllocsLock.Unlock() 2319 return 2320 } 2321 2322 // Stop tracking alloc runner as it's been GC'd by the server 2323 delete(c.allocs, allocID) 2324 2325 // Ensure the GC has a reference and then collect. Collecting through the GC 2326 // applies rate limiting 2327 c.garbageCollector.MarkForCollection(allocID, ar) 2328 2329 // GC immediately since the server has GC'd it 2330 go c.garbageCollector.Collect(allocID) 2331 } 2332 2333 // updateAlloc is invoked when we should update an allocation 2334 func (c *Client) updateAlloc(update *structs.Allocation) { 2335 ar, err := c.getAllocRunner(update.ID) 2336 if err != nil { 2337 c.logger.Warn("cannot update nonexistent alloc", "alloc_id", update.ID) 2338 return 2339 } 2340 2341 // Update local copy of alloc 2342 if err := c.stateDB.PutAllocation(update); err != nil { 2343 c.logger.Error("error persisting updated alloc locally", "error", err, "alloc_id", update.ID) 2344 } 2345 2346 // Update alloc runner 2347 ar.Update(update) 2348 } 2349 2350 // addAlloc is invoked when we should add an allocation 2351 func (c *Client) addAlloc(alloc *structs.Allocation, migrateToken string) error { 2352 c.allocLock.Lock() 2353 defer c.allocLock.Unlock() 2354 2355 // Check if we already have an alloc runner 2356 if _, ok := c.allocs[alloc.ID]; ok { 2357 c.logger.Debug("dropping duplicate add allocation request", "alloc_id", alloc.ID) 2358 return nil 2359 } 2360 2361 // Initialize local copy of alloc before creating the alloc runner so 2362 // we can't end up with an alloc runner that does not have an alloc. 2363 if err := c.stateDB.PutAllocation(alloc); err != nil { 2364 return err 2365 } 2366 2367 // Collect any preempted allocations to pass into the previous alloc watcher 2368 var preemptedAllocs map[string]allocwatcher.AllocRunnerMeta 2369 if len(alloc.PreemptedAllocations) > 0 { 2370 preemptedAllocs = make(map[string]allocwatcher.AllocRunnerMeta) 2371 for _, palloc := range alloc.PreemptedAllocations { 2372 preemptedAllocs[palloc] = c.allocs[palloc] 2373 } 2374 } 2375 2376 // Since only the Client has access to other AllocRunners and the RPC 2377 // client, create the previous allocation watcher here. 2378 watcherConfig := allocwatcher.Config{ 2379 Alloc: alloc, 2380 PreviousRunner: c.allocs[alloc.PreviousAllocation], 2381 PreemptedRunners: preemptedAllocs, 2382 RPC: c, 2383 Config: c.configCopy, 2384 MigrateToken: migrateToken, 2385 Logger: c.logger, 2386 } 2387 prevAllocWatcher, prevAllocMigrator := allocwatcher.NewAllocWatcher(watcherConfig) 2388 2389 // Copy the config since the node can be swapped out as it is being updated. 2390 // The long term fix is to pass in the config and node separately and then 2391 // we don't have to do a copy. 2392 c.configLock.RLock() 2393 arConf := &allocrunner.Config{ 2394 Alloc: alloc, 2395 Logger: c.logger, 2396 ClientConfig: c.configCopy, 2397 StateDB: c.stateDB, 2398 Consul: c.consulService, 2399 ConsulProxies: c.consulProxies, 2400 ConsulSI: c.tokensClient, 2401 Vault: c.vaultClient, 2402 StateUpdater: c, 2403 DeviceStatsReporter: c, 2404 PrevAllocWatcher: prevAllocWatcher, 2405 PrevAllocMigrator: prevAllocMigrator, 2406 DynamicRegistry: c.dynamicRegistry, 2407 CSIManager: c.csimanager, 2408 DeviceManager: c.devicemanager, 2409 DriverManager: c.drivermanager, 2410 RPCClient: c, 2411 } 2412 c.configLock.RUnlock() 2413 2414 ar, err := allocrunner.NewAllocRunner(arConf) 2415 if err != nil { 2416 return err 2417 } 2418 2419 // Store the alloc runner. 2420 c.allocs[alloc.ID] = ar 2421 2422 // Maybe mark the alloc for halt on missing server heartbeats 2423 c.heartbeatStop.allocHook(alloc) 2424 2425 go ar.Run() 2426 return nil 2427 } 2428 2429 // setupConsulTokenClient configures a tokenClient for managing consul service 2430 // identity tokens. 2431 func (c *Client) setupConsulTokenClient() error { 2432 tc := consulApi.NewIdentitiesClient(c.logger, c.deriveSIToken) 2433 c.tokensClient = tc 2434 return nil 2435 } 2436 2437 // setupVaultClient creates an object to periodically renew tokens and secrets 2438 // with vault. 2439 func (c *Client) setupVaultClient() error { 2440 var err error 2441 c.vaultClient, err = vaultclient.NewVaultClient(c.config.VaultConfig, c.logger, c.deriveToken) 2442 if err != nil { 2443 return err 2444 } 2445 2446 if c.vaultClient == nil { 2447 c.logger.Error("failed to create vault client") 2448 return fmt.Errorf("failed to create vault client") 2449 } 2450 2451 // Start renewing tokens and secrets 2452 c.vaultClient.Start() 2453 2454 return nil 2455 } 2456 2457 // deriveToken takes in an allocation and a set of tasks and derives vault 2458 // tokens for each of the tasks, unwraps all of them using the supplied vault 2459 // client and returns a map of unwrapped tokens, indexed by the task name. 2460 func (c *Client) deriveToken(alloc *structs.Allocation, taskNames []string, vclient *vaultapi.Client) (map[string]string, error) { 2461 vlogger := c.logger.Named("vault") 2462 2463 verifiedTasks, err := verifiedTasks(vlogger, alloc, taskNames) 2464 if err != nil { 2465 return nil, err 2466 } 2467 2468 // DeriveVaultToken of nomad server can take in a set of tasks and 2469 // creates tokens for all the tasks. 2470 req := &structs.DeriveVaultTokenRequest{ 2471 NodeID: c.NodeID(), 2472 SecretID: c.secretNodeID(), 2473 AllocID: alloc.ID, 2474 Tasks: verifiedTasks, 2475 QueryOptions: structs.QueryOptions{ 2476 Region: c.Region(), 2477 AllowStale: false, 2478 }, 2479 } 2480 2481 // Derive the tokens 2482 // namespace is handled via nomad/vault 2483 var resp structs.DeriveVaultTokenResponse 2484 if err := c.RPC("Node.DeriveVaultToken", &req, &resp); err != nil { 2485 vlogger.Error("error making derive token RPC", "error", err) 2486 return nil, fmt.Errorf("DeriveVaultToken RPC failed: %v", err) 2487 } 2488 if resp.Error != nil { 2489 vlogger.Error("error deriving vault tokens", "error", resp.Error) 2490 return nil, structs.NewWrappedServerError(resp.Error) 2491 } 2492 if resp.Tasks == nil { 2493 vlogger.Error("error derivng vault token", "error", "invalid response") 2494 return nil, fmt.Errorf("failed to derive vault tokens: invalid response") 2495 } 2496 2497 unwrappedTokens := make(map[string]string) 2498 2499 // Retrieve the wrapped tokens from the response and unwrap it 2500 for _, taskName := range verifiedTasks { 2501 // Get the wrapped token 2502 wrappedToken, ok := resp.Tasks[taskName] 2503 if !ok { 2504 vlogger.Error("wrapped token missing for task", "task_name", taskName) 2505 return nil, fmt.Errorf("wrapped token missing for task %q", taskName) 2506 } 2507 2508 // Unwrap the vault token 2509 unwrapResp, err := vclient.Logical().Unwrap(wrappedToken) 2510 if err != nil { 2511 if structs.VaultUnrecoverableError.MatchString(err.Error()) { 2512 return nil, err 2513 } 2514 2515 // The error is recoverable 2516 return nil, structs.NewRecoverableError( 2517 fmt.Errorf("failed to unwrap the token for task %q: %v", taskName, err), true) 2518 } 2519 2520 // Validate the response 2521 var validationErr error 2522 if unwrapResp == nil { 2523 validationErr = fmt.Errorf("Vault returned nil secret when unwrapping") 2524 } else if unwrapResp.Auth == nil { 2525 validationErr = fmt.Errorf("Vault returned unwrap secret with nil Auth. Secret warnings: %v", unwrapResp.Warnings) 2526 } else if unwrapResp.Auth.ClientToken == "" { 2527 validationErr = fmt.Errorf("Vault returned unwrap secret with empty Auth.ClientToken. Secret warnings: %v", unwrapResp.Warnings) 2528 } 2529 if validationErr != nil { 2530 vlogger.Warn("error unwrapping token", "error", err) 2531 return nil, structs.NewRecoverableError(validationErr, true) 2532 } 2533 2534 // Append the unwrapped token to the return value 2535 unwrappedTokens[taskName] = unwrapResp.Auth.ClientToken 2536 } 2537 2538 return unwrappedTokens, nil 2539 } 2540 2541 // deriveSIToken takes an allocation and a set of tasks and derives Consul 2542 // Service Identity tokens for each of the tasks by requesting them from the 2543 // Nomad Server. 2544 func (c *Client) deriveSIToken(alloc *structs.Allocation, taskNames []string) (map[string]string, error) { 2545 tasks, err := verifiedTasks(c.logger, alloc, taskNames) 2546 if err != nil { 2547 return nil, err 2548 } 2549 2550 req := &structs.DeriveSITokenRequest{ 2551 NodeID: c.NodeID(), 2552 SecretID: c.secretNodeID(), 2553 AllocID: alloc.ID, 2554 Tasks: tasks, 2555 QueryOptions: structs.QueryOptions{Region: c.Region()}, 2556 } 2557 2558 // Nicely ask Nomad Server for the tokens. 2559 var resp structs.DeriveSITokenResponse 2560 if err := c.RPC("Node.DeriveSIToken", &req, &resp); err != nil { 2561 c.logger.Error("error making derive token RPC", "error", err) 2562 return nil, fmt.Errorf("DeriveSIToken RPC failed: %v", err) 2563 } 2564 if err := resp.Error; err != nil { 2565 c.logger.Error("error deriving SI tokens", "error", err) 2566 return nil, structs.NewWrappedServerError(err) 2567 } 2568 if len(resp.Tokens) == 0 { 2569 c.logger.Error("error deriving SI tokens", "error", "invalid_response") 2570 return nil, fmt.Errorf("failed to derive SI tokens: invalid response") 2571 } 2572 2573 // NOTE: Unlike with the Vault integration, Nomad Server replies with the 2574 // actual Consul SI token (.SecretID), because otherwise each Nomad 2575 // Client would need to be blessed with 'acl:write' permissions to read the 2576 // secret value given the .AccessorID, which does not fit well in the Consul 2577 // security model. 2578 // 2579 // https://www.consul.io/api/acl/tokens.html#read-a-token 2580 // https://www.consul.io/docs/internals/security.html 2581 2582 m := helper.CopyMapStringString(resp.Tokens) 2583 return m, nil 2584 } 2585 2586 // verifiedTasks asserts each task in taskNames actually exists in the given alloc, 2587 // otherwise an error is returned. 2588 func verifiedTasks(logger hclog.Logger, alloc *structs.Allocation, taskNames []string) ([]string, error) { 2589 if alloc == nil { 2590 return nil, fmt.Errorf("nil allocation") 2591 } 2592 2593 if len(taskNames) == 0 { 2594 return nil, fmt.Errorf("missing task names") 2595 } 2596 2597 group := alloc.Job.LookupTaskGroup(alloc.TaskGroup) 2598 if group == nil { 2599 return nil, fmt.Errorf("group name in allocation is not present in job") 2600 } 2601 2602 verifiedTasks := make([]string, 0, len(taskNames)) 2603 2604 // confirm the requested task names actually exist in the allocation 2605 for _, taskName := range taskNames { 2606 if !taskIsPresent(taskName, group.Tasks) { 2607 logger.Error("task not found in the allocation", "task_name", taskName) 2608 return nil, fmt.Errorf("task %q not found in allocation", taskName) 2609 } 2610 verifiedTasks = append(verifiedTasks, taskName) 2611 } 2612 2613 return verifiedTasks, nil 2614 } 2615 2616 func taskIsPresent(taskName string, tasks []*structs.Task) bool { 2617 for _, task := range tasks { 2618 if task.Name == taskName { 2619 return true 2620 } 2621 } 2622 return false 2623 } 2624 2625 // triggerDiscovery causes a Consul discovery to begin (if one hasn't already) 2626 func (c *Client) triggerDiscovery() { 2627 select { 2628 case c.triggerDiscoveryCh <- struct{}{}: 2629 // Discovery goroutine was released to execute 2630 default: 2631 // Discovery goroutine was already running 2632 } 2633 } 2634 2635 // consulDiscovery waits for the signal to attempt server discovery via Consul. 2636 // It's intended to be started in a goroutine. See triggerDiscovery() for 2637 // causing consul discovery from other code locations. 2638 func (c *Client) consulDiscovery() { 2639 for { 2640 select { 2641 case <-c.triggerDiscoveryCh: 2642 if err := c.consulDiscoveryImpl(); err != nil { 2643 c.logger.Error("error discovering nomad servers", "error", err) 2644 } 2645 case <-c.shutdownCh: 2646 return 2647 } 2648 } 2649 } 2650 2651 func (c *Client) consulDiscoveryImpl() error { 2652 consulLogger := c.logger.Named("consul") 2653 2654 dcs, err := c.consulCatalog.Datacenters() 2655 if err != nil { 2656 return fmt.Errorf("client.consul: unable to query Consul datacenters: %v", err) 2657 } 2658 if len(dcs) > 2 { 2659 // Query the local DC first, then shuffle the 2660 // remaining DCs. Future heartbeats will cause Nomad 2661 // Clients to fixate on their local datacenter so 2662 // it's okay to talk with remote DCs. If the no 2663 // Nomad servers are available within 2664 // datacenterQueryLimit, the next heartbeat will pick 2665 // a new set of servers so it's okay. 2666 shuffleStrings(dcs[1:]) 2667 dcs = dcs[0:lib.MinInt(len(dcs), datacenterQueryLimit)] 2668 } 2669 2670 // Query for servers in this client's region only 2671 region := c.Region() 2672 rpcargs := structs.GenericRequest{ 2673 QueryOptions: structs.QueryOptions{ 2674 Region: region, 2675 }, 2676 } 2677 2678 serviceName := c.configCopy.ConsulConfig.ServerServiceName 2679 var mErr multierror.Error 2680 var nomadServers servers.Servers 2681 consulLogger.Debug("bootstrap contacting Consul DCs", "consul_dcs", dcs) 2682 DISCOLOOP: 2683 for _, dc := range dcs { 2684 consulOpts := &consulapi.QueryOptions{ 2685 AllowStale: true, 2686 Datacenter: dc, 2687 Near: "_agent", 2688 WaitTime: consul.DefaultQueryWaitDuration, 2689 } 2690 consulServices, _, err := c.consulCatalog.Service(serviceName, consul.ServiceTagRPC, consulOpts) 2691 if err != nil { 2692 mErr.Errors = append(mErr.Errors, fmt.Errorf("unable to query service %+q from Consul datacenter %+q: %v", serviceName, dc, err)) 2693 continue 2694 } 2695 2696 for _, s := range consulServices { 2697 port := strconv.Itoa(s.ServicePort) 2698 addrstr := s.ServiceAddress 2699 if addrstr == "" { 2700 addrstr = s.Address 2701 } 2702 addr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(addrstr, port)) 2703 if err != nil { 2704 mErr.Errors = append(mErr.Errors, err) 2705 continue 2706 } 2707 var peers []string 2708 if err := c.connPool.RPC(region, addr, c.RPCMajorVersion(), "Status.Peers", rpcargs, &peers); err != nil { 2709 mErr.Errors = append(mErr.Errors, err) 2710 continue 2711 } 2712 2713 // Successfully received the Server peers list of the correct 2714 // region 2715 for _, p := range peers { 2716 addr, err := net.ResolveTCPAddr("tcp", p) 2717 if err != nil { 2718 mErr.Errors = append(mErr.Errors, err) 2719 } 2720 srv := &servers.Server{Addr: addr} 2721 nomadServers = append(nomadServers, srv) 2722 } 2723 if len(nomadServers) > 0 { 2724 break DISCOLOOP 2725 } 2726 } 2727 } 2728 if len(nomadServers) == 0 { 2729 if len(mErr.Errors) > 0 { 2730 return mErr.ErrorOrNil() 2731 } 2732 return fmt.Errorf("no Nomad Servers advertising service %q in Consul datacenters: %+q", serviceName, dcs) 2733 } 2734 2735 consulLogger.Info("discovered following servers", "servers", nomadServers) 2736 2737 // Fire the retry trigger if we have updated the set of servers. 2738 if c.servers.SetServers(nomadServers) { 2739 // Start rebalancing 2740 c.servers.RebalanceServers() 2741 2742 // Notify waiting rpc calls. If a goroutine just failed an RPC call and 2743 // isn't receiving on this chan yet they'll still retry eventually. 2744 // This is a shortcircuit for the longer retry intervals. 2745 c.fireRpcRetryWatcher() 2746 } 2747 2748 return nil 2749 } 2750 2751 // emitStats collects host resource usage stats periodically 2752 func (c *Client) emitStats() { 2753 // Determining NodeClass to be emitted 2754 var emittedNodeClass string 2755 if emittedNodeClass = c.Node().NodeClass; emittedNodeClass == "" { 2756 emittedNodeClass = "none" 2757 } 2758 2759 // Assign labels directly before emitting stats so the information expected 2760 // is ready 2761 c.baseLabels = []metrics.Label{ 2762 {Name: "node_id", Value: c.NodeID()}, 2763 {Name: "datacenter", Value: c.Datacenter()}, 2764 {Name: "node_class", Value: emittedNodeClass}, 2765 } 2766 2767 // Start collecting host stats right away and then keep collecting every 2768 // collection interval 2769 next := time.NewTimer(0) 2770 defer next.Stop() 2771 for { 2772 select { 2773 case <-next.C: 2774 err := c.hostStatsCollector.Collect() 2775 next.Reset(c.config.StatsCollectionInterval) 2776 if err != nil { 2777 c.logger.Warn("error fetching host resource usage stats", "error", err) 2778 } else if c.config.PublishNodeMetrics { 2779 // Publish Node metrics if operator has opted in 2780 c.emitHostStats() 2781 } 2782 2783 c.emitClientMetrics() 2784 case <-c.shutdownCh: 2785 return 2786 } 2787 } 2788 } 2789 2790 // setGaugeForMemoryStats proxies metrics for memory specific statistics 2791 func (c *Client) setGaugeForMemoryStats(nodeID string, hStats *stats.HostStats, baseLabels []metrics.Label) { 2792 metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "total"}, float32(hStats.Memory.Total), baseLabels) 2793 metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "available"}, float32(hStats.Memory.Available), baseLabels) 2794 metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "used"}, float32(hStats.Memory.Used), baseLabels) 2795 metrics.SetGaugeWithLabels([]string{"client", "host", "memory", "free"}, float32(hStats.Memory.Free), baseLabels) 2796 } 2797 2798 // setGaugeForCPUStats proxies metrics for CPU specific statistics 2799 func (c *Client) setGaugeForCPUStats(nodeID string, hStats *stats.HostStats, baseLabels []metrics.Label) { 2800 for _, cpu := range hStats.CPU { 2801 labels := append(baseLabels, metrics.Label{ 2802 Name: "cpu", 2803 Value: cpu.CPU, 2804 }) 2805 2806 metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "total"}, float32(cpu.Total), labels) 2807 metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "user"}, float32(cpu.User), labels) 2808 metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "idle"}, float32(cpu.Idle), labels) 2809 metrics.SetGaugeWithLabels([]string{"client", "host", "cpu", "system"}, float32(cpu.System), labels) 2810 } 2811 } 2812 2813 // setGaugeForDiskStats proxies metrics for disk specific statistics 2814 func (c *Client) setGaugeForDiskStats(nodeID string, hStats *stats.HostStats, baseLabels []metrics.Label) { 2815 for _, disk := range hStats.DiskStats { 2816 labels := append(baseLabels, metrics.Label{ 2817 Name: "disk", 2818 Value: disk.Device, 2819 }) 2820 2821 metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "size"}, float32(disk.Size), labels) 2822 metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "used"}, float32(disk.Used), labels) 2823 metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "available"}, float32(disk.Available), labels) 2824 metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "used_percent"}, float32(disk.UsedPercent), labels) 2825 metrics.SetGaugeWithLabels([]string{"client", "host", "disk", "inodes_percent"}, float32(disk.InodesUsedPercent), labels) 2826 } 2827 } 2828 2829 // setGaugeForAllocationStats proxies metrics for allocation specific statistics 2830 func (c *Client) setGaugeForAllocationStats(nodeID string, baseLabels []metrics.Label) { 2831 c.configLock.RLock() 2832 node := c.configCopy.Node 2833 c.configLock.RUnlock() 2834 total := node.NodeResources 2835 res := node.ReservedResources 2836 allocated := c.getAllocatedResources(node) 2837 2838 // Emit allocated 2839 metrics.SetGaugeWithLabels([]string{"client", "allocated", "memory"}, float32(allocated.Flattened.Memory.MemoryMB), baseLabels) 2840 metrics.SetGaugeWithLabels([]string{"client", "allocated", "disk"}, float32(allocated.Shared.DiskMB), baseLabels) 2841 metrics.SetGaugeWithLabels([]string{"client", "allocated", "cpu"}, float32(allocated.Flattened.Cpu.CpuShares), baseLabels) 2842 2843 for _, n := range allocated.Flattened.Networks { 2844 labels := append(baseLabels, metrics.Label{ 2845 Name: "device", 2846 Value: n.Device, 2847 }) 2848 metrics.SetGaugeWithLabels([]string{"client", "allocated", "network"}, float32(n.MBits), labels) 2849 } 2850 2851 // Emit unallocated 2852 unallocatedMem := total.Memory.MemoryMB - res.Memory.MemoryMB - allocated.Flattened.Memory.MemoryMB 2853 unallocatedDisk := total.Disk.DiskMB - res.Disk.DiskMB - allocated.Shared.DiskMB 2854 unallocatedCpu := total.Cpu.CpuShares - res.Cpu.CpuShares - allocated.Flattened.Cpu.CpuShares 2855 2856 metrics.SetGaugeWithLabels([]string{"client", "unallocated", "memory"}, float32(unallocatedMem), baseLabels) 2857 metrics.SetGaugeWithLabels([]string{"client", "unallocated", "disk"}, float32(unallocatedDisk), baseLabels) 2858 metrics.SetGaugeWithLabels([]string{"client", "unallocated", "cpu"}, float32(unallocatedCpu), baseLabels) 2859 2860 totalComparable := total.Comparable() 2861 for _, n := range totalComparable.Flattened.Networks { 2862 // Determined the used resources 2863 var usedMbits int 2864 totalIdx := allocated.Flattened.Networks.NetIndex(n) 2865 if totalIdx != -1 { 2866 usedMbits = allocated.Flattened.Networks[totalIdx].MBits 2867 } 2868 2869 unallocatedMbits := n.MBits - usedMbits 2870 labels := append(baseLabels, metrics.Label{ 2871 Name: "device", 2872 Value: n.Device, 2873 }) 2874 metrics.SetGaugeWithLabels([]string{"client", "unallocated", "network"}, float32(unallocatedMbits), labels) 2875 } 2876 } 2877 2878 // No labels are required so we emit with only a key/value syntax 2879 func (c *Client) setGaugeForUptime(hStats *stats.HostStats, baseLabels []metrics.Label) { 2880 metrics.SetGaugeWithLabels([]string{"client", "uptime"}, float32(hStats.Uptime), baseLabels) 2881 } 2882 2883 // emitHostStats pushes host resource usage stats to remote metrics collection sinks 2884 func (c *Client) emitHostStats() { 2885 nodeID := c.NodeID() 2886 hStats := c.hostStatsCollector.Stats() 2887 labels := c.labels() 2888 2889 c.setGaugeForMemoryStats(nodeID, hStats, labels) 2890 c.setGaugeForUptime(hStats, labels) 2891 c.setGaugeForCPUStats(nodeID, hStats, labels) 2892 c.setGaugeForDiskStats(nodeID, hStats, labels) 2893 } 2894 2895 // emitClientMetrics emits lower volume client metrics 2896 func (c *Client) emitClientMetrics() { 2897 nodeID := c.NodeID() 2898 labels := c.labels() 2899 2900 c.setGaugeForAllocationStats(nodeID, labels) 2901 2902 // Emit allocation metrics 2903 blocked, migrating, pending, running, terminal := 0, 0, 0, 0, 0 2904 for _, ar := range c.getAllocRunners() { 2905 switch ar.AllocState().ClientStatus { 2906 case structs.AllocClientStatusPending: 2907 switch { 2908 case ar.IsWaiting(): 2909 blocked++ 2910 case ar.IsMigrating(): 2911 migrating++ 2912 default: 2913 pending++ 2914 } 2915 case structs.AllocClientStatusRunning: 2916 running++ 2917 case structs.AllocClientStatusComplete, structs.AllocClientStatusFailed: 2918 terminal++ 2919 } 2920 } 2921 2922 metrics.SetGaugeWithLabels([]string{"client", "allocations", "migrating"}, float32(migrating), labels) 2923 metrics.SetGaugeWithLabels([]string{"client", "allocations", "blocked"}, float32(blocked), labels) 2924 metrics.SetGaugeWithLabels([]string{"client", "allocations", "pending"}, float32(pending), labels) 2925 metrics.SetGaugeWithLabels([]string{"client", "allocations", "running"}, float32(running), labels) 2926 metrics.SetGaugeWithLabels([]string{"client", "allocations", "terminal"}, float32(terminal), labels) 2927 } 2928 2929 // labels takes the base labels and appends the node state 2930 func (c *Client) labels() []metrics.Label { 2931 c.configLock.RLock() 2932 nodeStatus := c.configCopy.Node.Status 2933 nodeEligibility := c.configCopy.Node.SchedulingEligibility 2934 c.configLock.RUnlock() 2935 2936 return append(c.baseLabels, 2937 metrics.Label{Name: "node_status", Value: nodeStatus}, 2938 metrics.Label{Name: "node_scheduling_eligibility", Value: nodeEligibility}, 2939 ) 2940 } 2941 2942 func (c *Client) getAllocatedResources(selfNode *structs.Node) *structs.ComparableResources { 2943 // Unfortunately the allocs only have IP so we need to match them to the 2944 // device 2945 cidrToDevice := make(map[*net.IPNet]string, len(selfNode.Resources.Networks)) 2946 for _, n := range selfNode.NodeResources.Networks { 2947 _, ipnet, err := net.ParseCIDR(n.CIDR) 2948 if err != nil { 2949 continue 2950 } 2951 cidrToDevice[ipnet] = n.Device 2952 } 2953 2954 // Sum the allocated resources 2955 var allocated structs.ComparableResources 2956 allocatedDeviceMbits := make(map[string]int) 2957 for _, ar := range c.getAllocRunners() { 2958 alloc := ar.Alloc() 2959 if alloc.ServerTerminalStatus() || ar.AllocState().ClientTerminalStatus() { 2960 continue 2961 } 2962 2963 // Add the resources 2964 // COMPAT(0.11): Just use the allocated resources 2965 allocated.Add(alloc.ComparableResources()) 2966 2967 // Add the used network 2968 if alloc.AllocatedResources != nil { 2969 for _, tr := range alloc.AllocatedResources.Tasks { 2970 for _, allocatedNetwork := range tr.Networks { 2971 for cidr, dev := range cidrToDevice { 2972 ip := net.ParseIP(allocatedNetwork.IP) 2973 if cidr.Contains(ip) { 2974 allocatedDeviceMbits[dev] += allocatedNetwork.MBits 2975 break 2976 } 2977 } 2978 } 2979 } 2980 } else if alloc.Resources != nil { 2981 for _, allocatedNetwork := range alloc.Resources.Networks { 2982 for cidr, dev := range cidrToDevice { 2983 ip := net.ParseIP(allocatedNetwork.IP) 2984 if cidr.Contains(ip) { 2985 allocatedDeviceMbits[dev] += allocatedNetwork.MBits 2986 break 2987 } 2988 } 2989 } 2990 } 2991 } 2992 2993 // Clear the networks 2994 allocated.Flattened.Networks = nil 2995 for dev, speed := range allocatedDeviceMbits { 2996 net := &structs.NetworkResource{ 2997 Device: dev, 2998 MBits: speed, 2999 } 3000 allocated.Flattened.Networks = append(allocated.Flattened.Networks, net) 3001 } 3002 3003 return &allocated 3004 } 3005 3006 // GetTaskEventHandler returns an event handler for the given allocID and task name 3007 func (c *Client) GetTaskEventHandler(allocID, taskName string) drivermanager.EventHandler { 3008 c.allocLock.RLock() 3009 defer c.allocLock.RUnlock() 3010 if ar, ok := c.allocs[allocID]; ok { 3011 return ar.GetTaskEventHandler(taskName) 3012 } 3013 return nil 3014 } 3015 3016 // group wraps a func() in a goroutine and provides a way to block until it 3017 // exits. Inspired by https://godoc.org/golang.org/x/sync/errgroup 3018 type group struct { 3019 wg sync.WaitGroup 3020 } 3021 3022 // Go starts f in a goroutine and must be called before Wait. 3023 func (g *group) Go(f func()) { 3024 g.wg.Add(1) 3025 go func() { 3026 defer g.wg.Done() 3027 f() 3028 }() 3029 } 3030 3031 func (c *group) AddCh(ch <-chan struct{}) { 3032 c.Go(func() { 3033 <-ch 3034 }) 3035 } 3036 3037 // Wait for all goroutines to exit. Must be called after all calls to Go 3038 // complete. 3039 func (g *group) Wait() { 3040 g.wg.Wait() 3041 }