github.com/hooklift/nomad@v0.5.7-0.20170407200202-db11e7dd7b55/nomad/node_endpoint.go (about) 1 package nomad 2 3 import ( 4 "context" 5 "fmt" 6 "strings" 7 "sync" 8 "time" 9 10 "golang.org/x/sync/errgroup" 11 12 "github.com/armon/go-metrics" 13 "github.com/hashicorp/go-memdb" 14 "github.com/hashicorp/go-multierror" 15 "github.com/hashicorp/nomad/nomad/state" 16 "github.com/hashicorp/nomad/nomad/structs" 17 "github.com/hashicorp/raft" 18 vapi "github.com/hashicorp/vault/api" 19 ) 20 21 const ( 22 // batchUpdateInterval is how long we wait to batch updates 23 batchUpdateInterval = 50 * time.Millisecond 24 25 // maxParallelRequestsPerDerive is the maximum number of parallel Vault 26 // create token requests that may be outstanding per derive request 27 maxParallelRequestsPerDerive = 16 28 ) 29 30 // Node endpoint is used for client interactions 31 type Node struct { 32 srv *Server 33 34 // updates holds pending client status updates for allocations 35 updates []*structs.Allocation 36 37 // updateFuture is used to wait for the pending batch update 38 // to complete. This may be nil if no batch is pending. 39 updateFuture *batchFuture 40 41 // updateTimer is the timer that will trigger the next batch 42 // update, and may be nil if there is no batch pending. 43 updateTimer *time.Timer 44 45 // updatesLock synchronizes access to the updates list, 46 // the future and the timer. 47 updatesLock sync.Mutex 48 } 49 50 // Register is used to upsert a client that is available for scheduling 51 func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUpdateResponse) error { 52 if done, err := n.srv.forward("Node.Register", args, args, reply); done { 53 return err 54 } 55 defer metrics.MeasureSince([]string{"nomad", "client", "register"}, time.Now()) 56 57 // Validate the arguments 58 if args.Node == nil { 59 return fmt.Errorf("missing node for client registration") 60 } 61 if args.Node.ID == "" { 62 return fmt.Errorf("missing node ID for client registration") 63 } 64 if args.Node.Datacenter == "" { 65 return fmt.Errorf("missing datacenter for client registration") 66 } 67 if args.Node.Name == "" { 68 return fmt.Errorf("missing node name for client registration") 69 } 70 if len(args.Node.Attributes) == 0 { 71 return fmt.Errorf("missing attributes for client registration") 72 } 73 74 // COMPAT: Remove after 0.6 75 // Need to check if this node is <0.4.x since SecretID is new in 0.5 76 pre, err := nodePreSecretID(args.Node) 77 if err != nil { 78 return err 79 } 80 if args.Node.SecretID == "" && !pre { 81 return fmt.Errorf("missing node secret ID for client registration") 82 } 83 84 // Default the status if none is given 85 if args.Node.Status == "" { 86 args.Node.Status = structs.NodeStatusInit 87 } 88 if !structs.ValidNodeStatus(args.Node.Status) { 89 return fmt.Errorf("invalid status for node") 90 } 91 92 // Set the timestamp when the node is registered 93 args.Node.StatusUpdatedAt = time.Now().Unix() 94 95 // Compute the node class 96 if err := args.Node.ComputeClass(); err != nil { 97 return fmt.Errorf("failed to computed node class: %v", err) 98 } 99 100 // Look for the node so we can detect a state transistion 101 snap, err := n.srv.fsm.State().Snapshot() 102 if err != nil { 103 return err 104 } 105 106 ws := memdb.NewWatchSet() 107 originalNode, err := snap.NodeByID(ws, args.Node.ID) 108 if err != nil { 109 return err 110 } 111 112 // Check if the SecretID has been tampered with 113 if !pre && originalNode != nil { 114 if args.Node.SecretID != originalNode.SecretID && originalNode.SecretID != "" { 115 return fmt.Errorf("node secret ID does not match. Not registering node.") 116 } 117 } 118 119 // Commit this update via Raft 120 _, index, err := n.srv.raftApply(structs.NodeRegisterRequestType, args) 121 if err != nil { 122 n.srv.logger.Printf("[ERR] nomad.client: Register failed: %v", err) 123 return err 124 } 125 reply.NodeModifyIndex = index 126 127 // Check if we should trigger evaluations 128 originalStatus := structs.NodeStatusInit 129 if originalNode != nil { 130 originalStatus = originalNode.Status 131 } 132 transitionToReady := transitionedToReady(args.Node.Status, originalStatus) 133 if structs.ShouldDrainNode(args.Node.Status) || transitionToReady { 134 evalIDs, evalIndex, err := n.createNodeEvals(args.Node.ID, index) 135 if err != nil { 136 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 137 return err 138 } 139 reply.EvalIDs = evalIDs 140 reply.EvalCreateIndex = evalIndex 141 } 142 143 // Check if we need to setup a heartbeat 144 if !args.Node.TerminalStatus() { 145 ttl, err := n.srv.resetHeartbeatTimer(args.Node.ID) 146 if err != nil { 147 n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err) 148 return err 149 } 150 reply.HeartbeatTTL = ttl 151 } 152 153 // Set the reply index 154 reply.Index = index 155 snap, err = n.srv.fsm.State().Snapshot() 156 if err != nil { 157 return err 158 } 159 160 n.srv.peerLock.RLock() 161 defer n.srv.peerLock.RUnlock() 162 if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { 163 n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err) 164 return err 165 } 166 167 return nil 168 } 169 170 // nodePreSecretID is a helper that returns whether the node is on a version 171 // that is before SecretIDs were introduced 172 func nodePreSecretID(node *structs.Node) (bool, error) { 173 a := node.Attributes 174 if a == nil { 175 return false, fmt.Errorf("node doesn't have attributes set") 176 } 177 178 v, ok := a["nomad.version"] 179 if !ok { 180 return false, fmt.Errorf("missing Nomad version in attributes") 181 } 182 183 return !strings.HasPrefix(v, "0.5"), nil 184 } 185 186 // updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading. 187 func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error { 188 reply.LeaderRPCAddr = string(n.srv.raft.Leader()) 189 190 // Reply with config information required for future RPC requests 191 reply.Servers = make([]*structs.NodeServerInfo, 0, len(n.srv.localPeers)) 192 for k, v := range n.srv.localPeers { 193 reply.Servers = append(reply.Servers, 194 &structs.NodeServerInfo{ 195 RPCAdvertiseAddr: string(k), 196 RPCMajorVersion: int32(v.MajorVersion), 197 RPCMinorVersion: int32(v.MinorVersion), 198 Datacenter: v.Datacenter, 199 }) 200 } 201 202 // TODO(sean@): Use an indexed node count instead 203 // 204 // Snapshot is used only to iterate over all nodes to create a node 205 // count to send back to Nomad Clients in their heartbeat so Clients 206 // can estimate the size of the cluster. 207 ws := memdb.NewWatchSet() 208 iter, err := snap.Nodes(ws) 209 if err == nil { 210 for { 211 raw := iter.Next() 212 if raw == nil { 213 break 214 } 215 reply.NumNodes++ 216 } 217 } 218 219 return nil 220 } 221 222 // Deregister is used to remove a client from the cluster. If a client should 223 // just be made unavailable for scheduling, a status update is preferred. 224 func (n *Node) Deregister(args *structs.NodeDeregisterRequest, reply *structs.NodeUpdateResponse) error { 225 if done, err := n.srv.forward("Node.Deregister", args, args, reply); done { 226 return err 227 } 228 defer metrics.MeasureSince([]string{"nomad", "client", "deregister"}, time.Now()) 229 230 // Verify the arguments 231 if args.NodeID == "" { 232 return fmt.Errorf("missing node ID for client deregistration") 233 } 234 235 // Commit this update via Raft 236 _, index, err := n.srv.raftApply(structs.NodeDeregisterRequestType, args) 237 if err != nil { 238 n.srv.logger.Printf("[ERR] nomad.client: Deregister failed: %v", err) 239 return err 240 } 241 242 // Clear the heartbeat timer if any 243 n.srv.clearHeartbeatTimer(args.NodeID) 244 245 // Create the evaluations for this node 246 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) 247 if err != nil { 248 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 249 return err 250 } 251 252 // Determine if there are any Vault accessors on the node 253 ws := memdb.NewWatchSet() 254 accessors, err := n.srv.State().VaultAccessorsByNode(ws, args.NodeID) 255 if err != nil { 256 n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for node %q failed: %v", args.NodeID, err) 257 return err 258 } 259 260 if l := len(accessors); l != 0 { 261 n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors on node %q due to deregister", l, args.NodeID) 262 if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil { 263 n.srv.logger.Printf("[ERR] nomad.client: revoking accessors for node %q failed: %v", args.NodeID, err) 264 return err 265 } 266 } 267 268 // Setup the reply 269 reply.EvalIDs = evalIDs 270 reply.EvalCreateIndex = evalIndex 271 reply.NodeModifyIndex = index 272 reply.Index = index 273 return nil 274 } 275 276 // UpdateStatus is used to update the status of a client node 277 func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *structs.NodeUpdateResponse) error { 278 if done, err := n.srv.forward("Node.UpdateStatus", args, args, reply); done { 279 return err 280 } 281 defer metrics.MeasureSince([]string{"nomad", "client", "update_status"}, time.Now()) 282 283 // Verify the arguments 284 if args.NodeID == "" { 285 return fmt.Errorf("missing node ID for client status update") 286 } 287 if !structs.ValidNodeStatus(args.Status) { 288 return fmt.Errorf("invalid status for node") 289 } 290 291 // Look for the node 292 snap, err := n.srv.fsm.State().Snapshot() 293 if err != nil { 294 return err 295 } 296 297 ws := memdb.NewWatchSet() 298 node, err := snap.NodeByID(ws, args.NodeID) 299 if err != nil { 300 return err 301 } 302 if node == nil { 303 return fmt.Errorf("node not found") 304 } 305 306 // XXX: Could use the SecretID here but have to update the heartbeat system 307 // to track SecretIDs. 308 309 // Update the timestamp of when the node status was updated 310 node.StatusUpdatedAt = time.Now().Unix() 311 312 // Commit this update via Raft 313 var index uint64 314 if node.Status != args.Status { 315 _, index, err = n.srv.raftApply(structs.NodeUpdateStatusRequestType, args) 316 if err != nil { 317 n.srv.logger.Printf("[ERR] nomad.client: status update failed: %v", err) 318 return err 319 } 320 reply.NodeModifyIndex = index 321 } 322 323 // Check if we should trigger evaluations 324 transitionToReady := transitionedToReady(args.Status, node.Status) 325 if structs.ShouldDrainNode(args.Status) || transitionToReady { 326 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) 327 if err != nil { 328 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 329 return err 330 } 331 reply.EvalIDs = evalIDs 332 reply.EvalCreateIndex = evalIndex 333 } 334 335 // Check if we need to setup a heartbeat 336 switch args.Status { 337 case structs.NodeStatusDown: 338 // Determine if there are any Vault accessors on the node 339 accessors, err := n.srv.State().VaultAccessorsByNode(ws, args.NodeID) 340 if err != nil { 341 n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for node %q failed: %v", args.NodeID, err) 342 return err 343 } 344 345 if l := len(accessors); l != 0 { 346 n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors on node %q due to down state", l, args.NodeID) 347 if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil { 348 n.srv.logger.Printf("[ERR] nomad.client: revoking accessors for node %q failed: %v", args.NodeID, err) 349 return err 350 } 351 } 352 default: 353 ttl, err := n.srv.resetHeartbeatTimer(args.NodeID) 354 if err != nil { 355 n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err) 356 return err 357 } 358 reply.HeartbeatTTL = ttl 359 } 360 361 // Set the reply index and leader 362 reply.Index = index 363 n.srv.peerLock.RLock() 364 defer n.srv.peerLock.RUnlock() 365 if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { 366 n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err) 367 return err 368 } 369 370 return nil 371 } 372 373 // transitionedToReady is a helper that takes a nodes new and old status and 374 // returns whether it has transistioned to ready. 375 func transitionedToReady(newStatus, oldStatus string) bool { 376 initToReady := oldStatus == structs.NodeStatusInit && newStatus == structs.NodeStatusReady 377 terminalToReady := oldStatus == structs.NodeStatusDown && newStatus == structs.NodeStatusReady 378 return initToReady || terminalToReady 379 } 380 381 // UpdateDrain is used to update the drain mode of a client node 382 func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest, 383 reply *structs.NodeDrainUpdateResponse) error { 384 if done, err := n.srv.forward("Node.UpdateDrain", args, args, reply); done { 385 return err 386 } 387 defer metrics.MeasureSince([]string{"nomad", "client", "update_drain"}, time.Now()) 388 389 // Verify the arguments 390 if args.NodeID == "" { 391 return fmt.Errorf("missing node ID for drain update") 392 } 393 394 // Look for the node 395 snap, err := n.srv.fsm.State().Snapshot() 396 if err != nil { 397 return err 398 } 399 ws := memdb.NewWatchSet() 400 node, err := snap.NodeByID(ws, args.NodeID) 401 if err != nil { 402 return err 403 } 404 if node == nil { 405 return fmt.Errorf("node not found") 406 } 407 408 // Update the timestamp to 409 node.StatusUpdatedAt = time.Now().Unix() 410 411 // Commit this update via Raft 412 var index uint64 413 if node.Drain != args.Drain { 414 _, index, err = n.srv.raftApply(structs.NodeUpdateDrainRequestType, args) 415 if err != nil { 416 n.srv.logger.Printf("[ERR] nomad.client: drain update failed: %v", err) 417 return err 418 } 419 reply.NodeModifyIndex = index 420 } 421 422 // Always attempt to create Node evaluations because there may be a System 423 // job registered that should be evaluated. 424 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) 425 if err != nil { 426 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 427 return err 428 } 429 reply.EvalIDs = evalIDs 430 reply.EvalCreateIndex = evalIndex 431 432 // Set the reply index 433 reply.Index = index 434 return nil 435 } 436 437 // Evaluate is used to force a re-evaluation of the node 438 func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUpdateResponse) error { 439 if done, err := n.srv.forward("Node.Evaluate", args, args, reply); done { 440 return err 441 } 442 defer metrics.MeasureSince([]string{"nomad", "client", "evaluate"}, time.Now()) 443 444 // Verify the arguments 445 if args.NodeID == "" { 446 return fmt.Errorf("missing node ID for evaluation") 447 } 448 449 // Look for the node 450 snap, err := n.srv.fsm.State().Snapshot() 451 if err != nil { 452 return err 453 } 454 ws := memdb.NewWatchSet() 455 node, err := snap.NodeByID(ws, args.NodeID) 456 if err != nil { 457 return err 458 } 459 if node == nil { 460 return fmt.Errorf("node not found") 461 } 462 463 // Create the evaluation 464 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, node.ModifyIndex) 465 if err != nil { 466 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 467 return err 468 } 469 reply.EvalIDs = evalIDs 470 reply.EvalCreateIndex = evalIndex 471 472 // Set the reply index 473 reply.Index = evalIndex 474 475 n.srv.peerLock.RLock() 476 defer n.srv.peerLock.RUnlock() 477 if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { 478 n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err) 479 return err 480 } 481 return nil 482 } 483 484 // GetNode is used to request information about a specific node 485 func (n *Node) GetNode(args *structs.NodeSpecificRequest, 486 reply *structs.SingleNodeResponse) error { 487 if done, err := n.srv.forward("Node.GetNode", args, args, reply); done { 488 return err 489 } 490 defer metrics.MeasureSince([]string{"nomad", "client", "get_node"}, time.Now()) 491 492 // Setup the blocking query 493 opts := blockingOptions{ 494 queryOpts: &args.QueryOptions, 495 queryMeta: &reply.QueryMeta, 496 run: func(ws memdb.WatchSet, state *state.StateStore) error { 497 // Verify the arguments 498 if args.NodeID == "" { 499 return fmt.Errorf("missing node ID") 500 } 501 502 // Look for the node 503 out, err := state.NodeByID(ws, args.NodeID) 504 if err != nil { 505 return err 506 } 507 508 // Setup the output 509 if out != nil { 510 // Clear the secret ID 511 reply.Node = out.Copy() 512 reply.Node.SecretID = "" 513 reply.Index = out.ModifyIndex 514 } else { 515 // Use the last index that affected the nodes table 516 index, err := state.Index("nodes") 517 if err != nil { 518 return err 519 } 520 reply.Node = nil 521 reply.Index = index 522 } 523 524 // Set the query response 525 n.srv.setQueryMeta(&reply.QueryMeta) 526 return nil 527 }} 528 return n.srv.blockingRPC(&opts) 529 } 530 531 // GetAllocs is used to request allocations for a specific node 532 func (n *Node) GetAllocs(args *structs.NodeSpecificRequest, 533 reply *structs.NodeAllocsResponse) error { 534 if done, err := n.srv.forward("Node.GetAllocs", args, args, reply); done { 535 return err 536 } 537 defer metrics.MeasureSince([]string{"nomad", "client", "get_allocs"}, time.Now()) 538 539 // Verify the arguments 540 if args.NodeID == "" { 541 return fmt.Errorf("missing node ID") 542 } 543 544 // Setup the blocking query 545 opts := blockingOptions{ 546 queryOpts: &args.QueryOptions, 547 queryMeta: &reply.QueryMeta, 548 run: func(ws memdb.WatchSet, state *state.StateStore) error { 549 // Look for the node 550 allocs, err := state.AllocsByNode(ws, args.NodeID) 551 if err != nil { 552 return err 553 } 554 555 // Setup the output 556 if len(allocs) != 0 { 557 reply.Allocs = allocs 558 for _, alloc := range allocs { 559 reply.Index = maxUint64(reply.Index, alloc.ModifyIndex) 560 } 561 } else { 562 reply.Allocs = nil 563 564 // Use the last index that affected the nodes table 565 index, err := state.Index("allocs") 566 if err != nil { 567 return err 568 } 569 570 // Must provide non-zero index to prevent blocking 571 // Index 1 is impossible anyways (due to Raft internals) 572 if index == 0 { 573 reply.Index = 1 574 } else { 575 reply.Index = index 576 } 577 } 578 return nil 579 }} 580 return n.srv.blockingRPC(&opts) 581 } 582 583 // GetClientAllocs is used to request a lightweight list of alloc modify indexes 584 // per allocation. 585 func (n *Node) GetClientAllocs(args *structs.NodeSpecificRequest, 586 reply *structs.NodeClientAllocsResponse) error { 587 if done, err := n.srv.forward("Node.GetClientAllocs", args, args, reply); done { 588 return err 589 } 590 defer metrics.MeasureSince([]string{"nomad", "client", "get_client_allocs"}, time.Now()) 591 592 // Verify the arguments 593 if args.NodeID == "" { 594 return fmt.Errorf("missing node ID") 595 } 596 597 // Setup the blocking query 598 opts := blockingOptions{ 599 queryOpts: &args.QueryOptions, 600 queryMeta: &reply.QueryMeta, 601 run: func(ws memdb.WatchSet, state *state.StateStore) error { 602 // Look for the node 603 node, err := state.NodeByID(ws, args.NodeID) 604 if err != nil { 605 return err 606 } 607 608 var allocs []*structs.Allocation 609 if node != nil { 610 // COMPAT: Remove in 0.6 611 // Check if the node should have a SecretID set 612 if args.SecretID == "" { 613 if pre, err := nodePreSecretID(node); err != nil { 614 return err 615 } else if !pre { 616 return fmt.Errorf("missing node secret ID for client status update") 617 } 618 } else if args.SecretID != node.SecretID { 619 return fmt.Errorf("node secret ID does not match") 620 } 621 622 var err error 623 allocs, err = state.AllocsByNode(ws, args.NodeID) 624 if err != nil { 625 return err 626 } 627 } 628 629 reply.Allocs = make(map[string]uint64) 630 // Setup the output 631 if len(allocs) != 0 { 632 for _, alloc := range allocs { 633 reply.Allocs[alloc.ID] = alloc.AllocModifyIndex 634 reply.Index = maxUint64(reply.Index, alloc.ModifyIndex) 635 } 636 } else { 637 // Use the last index that affected the nodes table 638 index, err := state.Index("allocs") 639 if err != nil { 640 return err 641 } 642 643 // Must provide non-zero index to prevent blocking 644 // Index 1 is impossible anyways (due to Raft internals) 645 if index == 0 { 646 reply.Index = 1 647 } else { 648 reply.Index = index 649 } 650 } 651 return nil 652 }} 653 return n.srv.blockingRPC(&opts) 654 } 655 656 // UpdateAlloc is used to update the client status of an allocation 657 func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.GenericResponse) error { 658 if done, err := n.srv.forward("Node.UpdateAlloc", args, args, reply); done { 659 return err 660 } 661 defer metrics.MeasureSince([]string{"nomad", "client", "update_alloc"}, time.Now()) 662 663 // Ensure at least a single alloc 664 if len(args.Alloc) == 0 { 665 return fmt.Errorf("must update at least one allocation") 666 } 667 668 // Add this to the batch 669 n.updatesLock.Lock() 670 n.updates = append(n.updates, args.Alloc...) 671 672 // Start a new batch if none 673 future := n.updateFuture 674 if future == nil { 675 future = NewBatchFuture() 676 n.updateFuture = future 677 n.updateTimer = time.AfterFunc(batchUpdateInterval, func() { 678 // Get the pending updates 679 n.updatesLock.Lock() 680 updates := n.updates 681 future := n.updateFuture 682 n.updates = nil 683 n.updateFuture = nil 684 n.updateTimer = nil 685 n.updatesLock.Unlock() 686 687 // Perform the batch update 688 n.batchUpdate(future, updates) 689 }) 690 } 691 n.updatesLock.Unlock() 692 693 // Wait for the future 694 if err := future.Wait(); err != nil { 695 return err 696 } 697 698 // Setup the response 699 reply.Index = future.Index() 700 return nil 701 } 702 703 // batchUpdate is used to update all the allocations 704 func (n *Node) batchUpdate(future *batchFuture, updates []*structs.Allocation) { 705 // Prepare the batch update 706 batch := &structs.AllocUpdateRequest{ 707 Alloc: updates, 708 WriteRequest: structs.WriteRequest{Region: n.srv.config.Region}, 709 } 710 711 // Commit this update via Raft 712 var mErr multierror.Error 713 _, index, err := n.srv.raftApply(structs.AllocClientUpdateRequestType, batch) 714 if err != nil { 715 n.srv.logger.Printf("[ERR] nomad.client: alloc update failed: %v", err) 716 mErr.Errors = append(mErr.Errors, err) 717 } 718 719 // For each allocation we are updating check if we should revoke any 720 // Vault Accessors 721 var revoke []*structs.VaultAccessor 722 for _, alloc := range updates { 723 // Skip any allocation that isn't dead on the client 724 if !alloc.Terminated() { 725 continue 726 } 727 728 // Determine if there are any Vault accessors for the allocation 729 ws := memdb.NewWatchSet() 730 accessors, err := n.srv.State().VaultAccessorsByAlloc(ws, alloc.ID) 731 if err != nil { 732 n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for alloc %q failed: %v", alloc.ID, err) 733 mErr.Errors = append(mErr.Errors, err) 734 } 735 736 revoke = append(revoke, accessors...) 737 } 738 739 if l := len(revoke); l != 0 { 740 n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors due to terminal allocations", l) 741 if err := n.srv.vault.RevokeTokens(context.Background(), revoke, true); err != nil { 742 n.srv.logger.Printf("[ERR] nomad.client: batched accessor revocation failed: %v", err) 743 mErr.Errors = append(mErr.Errors, err) 744 } 745 } 746 747 // Respond to the future 748 future.Respond(index, mErr.ErrorOrNil()) 749 } 750 751 // List is used to list the available nodes 752 func (n *Node) List(args *structs.NodeListRequest, 753 reply *structs.NodeListResponse) error { 754 if done, err := n.srv.forward("Node.List", args, args, reply); done { 755 return err 756 } 757 defer metrics.MeasureSince([]string{"nomad", "client", "list"}, time.Now()) 758 759 // Setup the blocking query 760 opts := blockingOptions{ 761 queryOpts: &args.QueryOptions, 762 queryMeta: &reply.QueryMeta, 763 run: func(ws memdb.WatchSet, state *state.StateStore) error { 764 // Capture all the nodes 765 var err error 766 var iter memdb.ResultIterator 767 if prefix := args.QueryOptions.Prefix; prefix != "" { 768 iter, err = state.NodesByIDPrefix(ws, prefix) 769 } else { 770 iter, err = state.Nodes(ws) 771 } 772 if err != nil { 773 return err 774 } 775 776 var nodes []*structs.NodeListStub 777 for { 778 raw := iter.Next() 779 if raw == nil { 780 break 781 } 782 node := raw.(*structs.Node) 783 nodes = append(nodes, node.Stub()) 784 } 785 reply.Nodes = nodes 786 787 // Use the last index that affected the jobs table 788 index, err := state.Index("nodes") 789 if err != nil { 790 return err 791 } 792 reply.Index = index 793 794 // Set the query response 795 n.srv.setQueryMeta(&reply.QueryMeta) 796 return nil 797 }} 798 return n.srv.blockingRPC(&opts) 799 } 800 801 // createNodeEvals is used to create evaluations for each alloc on a node. 802 // Each Eval is scoped to a job, so we need to potentially trigger many evals. 803 func (n *Node) createNodeEvals(nodeID string, nodeIndex uint64) ([]string, uint64, error) { 804 // Snapshot the state 805 snap, err := n.srv.fsm.State().Snapshot() 806 if err != nil { 807 return nil, 0, fmt.Errorf("failed to snapshot state: %v", err) 808 } 809 810 // Find all the allocations for this node 811 ws := memdb.NewWatchSet() 812 allocs, err := snap.AllocsByNode(ws, nodeID) 813 if err != nil { 814 return nil, 0, fmt.Errorf("failed to find allocs for '%s': %v", nodeID, err) 815 } 816 817 sysJobsIter, err := snap.JobsByScheduler(ws, "system") 818 if err != nil { 819 return nil, 0, fmt.Errorf("failed to find system jobs for '%s': %v", nodeID, err) 820 } 821 822 var sysJobs []*structs.Job 823 for job := sysJobsIter.Next(); job != nil; job = sysJobsIter.Next() { 824 sysJobs = append(sysJobs, job.(*structs.Job)) 825 } 826 827 // Fast-path if nothing to do 828 if len(allocs) == 0 && len(sysJobs) == 0 { 829 return nil, 0, nil 830 } 831 832 // Create an eval for each JobID affected 833 var evals []*structs.Evaluation 834 var evalIDs []string 835 jobIDs := make(map[string]struct{}) 836 837 for _, alloc := range allocs { 838 // Deduplicate on JobID 839 if _, ok := jobIDs[alloc.JobID]; ok { 840 continue 841 } 842 jobIDs[alloc.JobID] = struct{}{} 843 844 // Create a new eval 845 eval := &structs.Evaluation{ 846 ID: structs.GenerateUUID(), 847 Priority: alloc.Job.Priority, 848 Type: alloc.Job.Type, 849 TriggeredBy: structs.EvalTriggerNodeUpdate, 850 JobID: alloc.JobID, 851 NodeID: nodeID, 852 NodeModifyIndex: nodeIndex, 853 Status: structs.EvalStatusPending, 854 } 855 evals = append(evals, eval) 856 evalIDs = append(evalIDs, eval.ID) 857 } 858 859 // Create an evaluation for each system job. 860 for _, job := range sysJobs { 861 // Still dedup on JobID as the node may already have the system job. 862 if _, ok := jobIDs[job.ID]; ok { 863 continue 864 } 865 jobIDs[job.ID] = struct{}{} 866 867 // Create a new eval 868 eval := &structs.Evaluation{ 869 ID: structs.GenerateUUID(), 870 Priority: job.Priority, 871 Type: job.Type, 872 TriggeredBy: structs.EvalTriggerNodeUpdate, 873 JobID: job.ID, 874 NodeID: nodeID, 875 NodeModifyIndex: nodeIndex, 876 Status: structs.EvalStatusPending, 877 } 878 evals = append(evals, eval) 879 evalIDs = append(evalIDs, eval.ID) 880 } 881 882 // Create the Raft transaction 883 update := &structs.EvalUpdateRequest{ 884 Evals: evals, 885 WriteRequest: structs.WriteRequest{Region: n.srv.config.Region}, 886 } 887 888 // Commit this evaluation via Raft 889 // XXX: There is a risk of partial failure where the node update succeeds 890 // but that the EvalUpdate does not. 891 _, evalIndex, err := n.srv.raftApply(structs.EvalUpdateRequestType, update) 892 if err != nil { 893 return nil, 0, err 894 } 895 return evalIDs, evalIndex, nil 896 } 897 898 // batchFuture is used to wait on a batch update to complete 899 type batchFuture struct { 900 doneCh chan struct{} 901 err error 902 index uint64 903 } 904 905 // NewBatchFuture creates a new batch future 906 func NewBatchFuture() *batchFuture { 907 return &batchFuture{ 908 doneCh: make(chan struct{}), 909 } 910 } 911 912 // Wait is used to block for the future to complete and returns the error 913 func (b *batchFuture) Wait() error { 914 <-b.doneCh 915 return b.err 916 } 917 918 // Index is used to return the index of the batch, only after Wait() 919 func (b *batchFuture) Index() uint64 { 920 return b.index 921 } 922 923 // Respond is used to unblock the future 924 func (b *batchFuture) Respond(index uint64, err error) { 925 b.index = index 926 b.err = err 927 close(b.doneCh) 928 } 929 930 // DeriveVaultToken is used by the clients to request wrapped Vault tokens for 931 // tasks 932 func (n *Node) DeriveVaultToken(args *structs.DeriveVaultTokenRequest, 933 reply *structs.DeriveVaultTokenResponse) error { 934 935 // setErr is a helper for setting the recoverable error on the reply and 936 // logging it 937 setErr := func(e error, recoverable bool) { 938 if e == nil { 939 return 940 } 941 reply.Error = structs.NewRecoverableError(e, recoverable).(*structs.RecoverableError) 942 n.srv.logger.Printf("[ERR] nomad.client: DeriveVaultToken failed (recoverable %v): %v", recoverable, e) 943 } 944 945 if done, err := n.srv.forward("Node.DeriveVaultToken", args, args, reply); done { 946 setErr(err, structs.IsRecoverable(err) || err == structs.ErrNoLeader) 947 return nil 948 } 949 defer metrics.MeasureSince([]string{"nomad", "client", "derive_vault_token"}, time.Now()) 950 951 // Verify the arguments 952 if args.NodeID == "" { 953 setErr(fmt.Errorf("missing node ID"), false) 954 return nil 955 } 956 if args.SecretID == "" { 957 setErr(fmt.Errorf("missing node SecretID"), false) 958 return nil 959 } 960 if args.AllocID == "" { 961 setErr(fmt.Errorf("missing allocation ID"), false) 962 return nil 963 } 964 if len(args.Tasks) == 0 { 965 setErr(fmt.Errorf("no tasks specified"), false) 966 return nil 967 } 968 969 // Verify the following: 970 // * The Node exists and has the correct SecretID 971 // * The Allocation exists on the specified node 972 // * The allocation contains the given tasks and they each require Vault 973 // tokens 974 snap, err := n.srv.fsm.State().Snapshot() 975 if err != nil { 976 setErr(err, false) 977 return nil 978 } 979 ws := memdb.NewWatchSet() 980 node, err := snap.NodeByID(ws, args.NodeID) 981 if err != nil { 982 setErr(err, false) 983 return nil 984 } 985 if node == nil { 986 setErr(fmt.Errorf("Node %q does not exist", args.NodeID), false) 987 return nil 988 } 989 if node.SecretID != args.SecretID { 990 setErr(fmt.Errorf("SecretID mismatch"), false) 991 return nil 992 } 993 994 alloc, err := snap.AllocByID(ws, args.AllocID) 995 if err != nil { 996 setErr(err, false) 997 return nil 998 } 999 if alloc == nil { 1000 setErr(fmt.Errorf("Allocation %q does not exist", args.AllocID), false) 1001 return nil 1002 } 1003 if alloc.NodeID != args.NodeID { 1004 setErr(fmt.Errorf("Allocation %q not running on Node %q", args.AllocID, args.NodeID), false) 1005 return nil 1006 } 1007 if alloc.TerminalStatus() { 1008 setErr(fmt.Errorf("Can't request Vault token for terminal allocation"), false) 1009 return nil 1010 } 1011 1012 // Check the policies 1013 policies := alloc.Job.VaultPolicies() 1014 if policies == nil { 1015 setErr(fmt.Errorf("Job doesn't require Vault policies"), false) 1016 return nil 1017 } 1018 tg, ok := policies[alloc.TaskGroup] 1019 if !ok { 1020 setErr(fmt.Errorf("Task group does not require Vault policies"), false) 1021 return nil 1022 } 1023 1024 var unneeded []string 1025 for _, task := range args.Tasks { 1026 taskVault := tg[task] 1027 if taskVault == nil || len(taskVault.Policies) == 0 { 1028 unneeded = append(unneeded, task) 1029 } 1030 } 1031 1032 if len(unneeded) != 0 { 1033 e := fmt.Errorf("Requested Vault tokens for tasks without defined Vault policies: %s", 1034 strings.Join(unneeded, ", ")) 1035 setErr(e, false) 1036 return nil 1037 } 1038 1039 // At this point the request is valid and we should contact Vault for 1040 // tokens. 1041 1042 // Create an error group where we will spin up a fixed set of goroutines to 1043 // handle deriving tokens but where if any fails the whole group is 1044 // canceled. 1045 g, ctx := errgroup.WithContext(context.Background()) 1046 1047 // Cap the handlers 1048 handlers := len(args.Tasks) 1049 if handlers > maxParallelRequestsPerDerive { 1050 handlers = maxParallelRequestsPerDerive 1051 } 1052 1053 // Create the Vault Tokens 1054 input := make(chan string, handlers) 1055 results := make(map[string]*vapi.Secret, len(args.Tasks)) 1056 for i := 0; i < handlers; i++ { 1057 g.Go(func() error { 1058 for { 1059 select { 1060 case task, ok := <-input: 1061 if !ok { 1062 return nil 1063 } 1064 1065 secret, err := n.srv.vault.CreateToken(ctx, alloc, task) 1066 if err != nil { 1067 wrapped := fmt.Sprintf("failed to create token for task %q on alloc %q: %v", task, alloc.ID, err) 1068 return structs.WrapRecoverable(wrapped, err) 1069 } 1070 1071 results[task] = secret 1072 case <-ctx.Done(): 1073 return nil 1074 } 1075 } 1076 }) 1077 } 1078 1079 // Send the input 1080 go func() { 1081 defer close(input) 1082 for _, task := range args.Tasks { 1083 select { 1084 case <-ctx.Done(): 1085 return 1086 case input <- task: 1087 } 1088 } 1089 1090 }() 1091 1092 // Wait for everything to complete or for an error 1093 createErr := g.Wait() 1094 1095 // Retrieve the results 1096 accessors := make([]*structs.VaultAccessor, 0, len(results)) 1097 tokens := make(map[string]string, len(results)) 1098 for task, secret := range results { 1099 w := secret.WrapInfo 1100 if w == nil { 1101 return fmt.Errorf("Vault returned Secret without WrapInfo") 1102 } 1103 1104 tokens[task] = w.Token 1105 accessor := &structs.VaultAccessor{ 1106 Accessor: w.WrappedAccessor, 1107 Task: task, 1108 NodeID: alloc.NodeID, 1109 AllocID: alloc.ID, 1110 CreationTTL: w.TTL, 1111 } 1112 1113 accessors = append(accessors, accessor) 1114 } 1115 1116 // If there was an error revoke the created tokens 1117 if createErr != nil { 1118 n.srv.logger.Printf("[ERR] nomad.node: Vault token creation for alloc %q failed: %v", alloc.ID, createErr) 1119 1120 if revokeErr := n.srv.vault.RevokeTokens(context.Background(), accessors, false); revokeErr != nil { 1121 n.srv.logger.Printf("[ERR] nomad.node: Vault token revocation for alloc %q failed: %v", alloc.ID, revokeErr) 1122 } 1123 1124 if rerr, ok := createErr.(*structs.RecoverableError); ok { 1125 reply.Error = rerr 1126 } else { 1127 reply.Error = structs.NewRecoverableError(createErr, false).(*structs.RecoverableError) 1128 } 1129 1130 return nil 1131 } 1132 1133 // Commit to Raft before returning any of the tokens 1134 req := structs.VaultAccessorsRequest{Accessors: accessors} 1135 _, index, err := n.srv.raftApply(structs.VaultAccessorRegisterRequestType, &req) 1136 if err != nil { 1137 n.srv.logger.Printf("[ERR] nomad.client: Register Vault accessors for alloc %q failed: %v", alloc.ID, err) 1138 1139 // Determine if we can recover from the error 1140 retry := false 1141 switch err { 1142 case raft.ErrNotLeader, raft.ErrLeadershipLost, raft.ErrRaftShutdown, raft.ErrEnqueueTimeout: 1143 retry = true 1144 } 1145 1146 setErr(err, retry) 1147 return nil 1148 } 1149 1150 reply.Index = index 1151 reply.Tasks = tokens 1152 n.srv.setQueryMeta(&reply.QueryMeta) 1153 return nil 1154 }