github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/nomad/node_endpoint.go (about) 1 package nomad 2 3 import ( 4 "context" 5 "fmt" 6 "strings" 7 "sync" 8 "time" 9 10 "golang.org/x/sync/errgroup" 11 12 "github.com/armon/go-metrics" 13 "github.com/hashicorp/go-memdb" 14 "github.com/hashicorp/go-multierror" 15 "github.com/hashicorp/nomad/nomad/state" 16 "github.com/hashicorp/nomad/nomad/structs" 17 "github.com/hashicorp/nomad/nomad/watch" 18 "github.com/hashicorp/raft" 19 vapi "github.com/hashicorp/vault/api" 20 ) 21 22 const ( 23 // batchUpdateInterval is how long we wait to batch updates 24 batchUpdateInterval = 50 * time.Millisecond 25 26 // maxParallelRequestsPerDerive is the maximum number of parallel Vault 27 // create token requests that may be outstanding per derive request 28 maxParallelRequestsPerDerive = 16 29 ) 30 31 // Node endpoint is used for client interactions 32 type Node struct { 33 srv *Server 34 35 // updates holds pending client status updates for allocations 36 updates []*structs.Allocation 37 38 // updateFuture is used to wait for the pending batch update 39 // to complete. This may be nil if no batch is pending. 40 updateFuture *batchFuture 41 42 // updateTimer is the timer that will trigger the next batch 43 // update, and may be nil if there is no batch pending. 44 updateTimer *time.Timer 45 46 // updatesLock synchronizes access to the updates list, 47 // the future and the timer. 48 updatesLock sync.Mutex 49 } 50 51 // Register is used to upsert a client that is available for scheduling 52 func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUpdateResponse) error { 53 if done, err := n.srv.forward("Node.Register", args, args, reply); done { 54 return err 55 } 56 defer metrics.MeasureSince([]string{"nomad", "client", "register"}, time.Now()) 57 58 // Validate the arguments 59 if args.Node == nil { 60 return fmt.Errorf("missing node for client registration") 61 } 62 if args.Node.ID == "" { 63 return fmt.Errorf("missing node ID for client registration") 64 } 65 if args.Node.Datacenter == "" { 66 return fmt.Errorf("missing datacenter for client registration") 67 } 68 if args.Node.Name == "" { 69 return fmt.Errorf("missing node name for client registration") 70 } 71 if len(args.Node.Attributes) == 0 { 72 return fmt.Errorf("missing attributes for client registration") 73 } 74 75 // COMPAT: Remove after 0.6 76 // Need to check if this node is <0.4.x since SecretID is new in 0.5 77 pre, err := nodePreSecretID(args.Node) 78 if err != nil { 79 return err 80 } 81 if args.Node.SecretID == "" && !pre { 82 return fmt.Errorf("missing node secret ID for client registration") 83 } 84 85 // Default the status if none is given 86 if args.Node.Status == "" { 87 args.Node.Status = structs.NodeStatusInit 88 } 89 if !structs.ValidNodeStatus(args.Node.Status) { 90 return fmt.Errorf("invalid status for node") 91 } 92 93 // Set the timestamp when the node is registered 94 args.Node.StatusUpdatedAt = time.Now().Unix() 95 96 // Compute the node class 97 if err := args.Node.ComputeClass(); err != nil { 98 return fmt.Errorf("failed to computed node class: %v", err) 99 } 100 101 // Look for the node so we can detect a state transistion 102 snap, err := n.srv.fsm.State().Snapshot() 103 if err != nil { 104 return err 105 } 106 originalNode, err := snap.NodeByID(args.Node.ID) 107 if err != nil { 108 return err 109 } 110 111 // Check if the SecretID has been tampered with 112 if !pre && originalNode != nil { 113 if args.Node.SecretID != originalNode.SecretID && originalNode.SecretID != "" { 114 return fmt.Errorf("node secret ID does not match. Not registering node.") 115 } 116 } 117 118 // Commit this update via Raft 119 _, index, err := n.srv.raftApply(structs.NodeRegisterRequestType, args) 120 if err != nil { 121 n.srv.logger.Printf("[ERR] nomad.client: Register failed: %v", err) 122 return err 123 } 124 reply.NodeModifyIndex = index 125 126 // Check if we should trigger evaluations 127 originalStatus := structs.NodeStatusInit 128 if originalNode != nil { 129 originalStatus = originalNode.Status 130 } 131 transitionToReady := transitionedToReady(args.Node.Status, originalStatus) 132 if structs.ShouldDrainNode(args.Node.Status) || transitionToReady { 133 evalIDs, evalIndex, err := n.createNodeEvals(args.Node.ID, index) 134 if err != nil { 135 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 136 return err 137 } 138 reply.EvalIDs = evalIDs 139 reply.EvalCreateIndex = evalIndex 140 } 141 142 // Check if we need to setup a heartbeat 143 if !args.Node.TerminalStatus() { 144 ttl, err := n.srv.resetHeartbeatTimer(args.Node.ID) 145 if err != nil { 146 n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err) 147 return err 148 } 149 reply.HeartbeatTTL = ttl 150 } 151 152 // Set the reply index 153 reply.Index = index 154 snap, err = n.srv.fsm.State().Snapshot() 155 if err != nil { 156 return err 157 } 158 159 n.srv.peerLock.RLock() 160 defer n.srv.peerLock.RUnlock() 161 if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { 162 n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err) 163 return err 164 } 165 166 return nil 167 } 168 169 // nodePreSecretID is a helper that returns whether the node is on a version 170 // that is before SecretIDs were introduced 171 func nodePreSecretID(node *structs.Node) (bool, error) { 172 a := node.Attributes 173 if a == nil { 174 return false, fmt.Errorf("node doesn't have attributes set") 175 } 176 177 v, ok := a["nomad.version"] 178 if !ok { 179 return false, fmt.Errorf("missing Nomad version in attributes") 180 } 181 182 return !strings.HasPrefix(v, "0.5"), nil 183 } 184 185 // updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading. 186 func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error { 187 reply.LeaderRPCAddr = n.srv.raft.Leader() 188 189 // Reply with config information required for future RPC requests 190 reply.Servers = make([]*structs.NodeServerInfo, 0, len(n.srv.localPeers)) 191 for k, v := range n.srv.localPeers { 192 reply.Servers = append(reply.Servers, 193 &structs.NodeServerInfo{ 194 RPCAdvertiseAddr: k, 195 RPCMajorVersion: int32(v.MajorVersion), 196 RPCMinorVersion: int32(v.MinorVersion), 197 Datacenter: v.Datacenter, 198 }) 199 } 200 201 // TODO(sean@): Use an indexed node count instead 202 // 203 // Snapshot is used only to iterate over all nodes to create a node 204 // count to send back to Nomad Clients in their heartbeat so Clients 205 // can estimate the size of the cluster. 206 iter, err := snap.Nodes() 207 if err == nil { 208 for { 209 raw := iter.Next() 210 if raw == nil { 211 break 212 } 213 reply.NumNodes++ 214 } 215 } 216 217 return nil 218 } 219 220 // Deregister is used to remove a client from the cluster. If a client should 221 // just be made unavailable for scheduling, a status update is preferred. 222 func (n *Node) Deregister(args *structs.NodeDeregisterRequest, reply *structs.NodeUpdateResponse) error { 223 if done, err := n.srv.forward("Node.Deregister", args, args, reply); done { 224 return err 225 } 226 defer metrics.MeasureSince([]string{"nomad", "client", "deregister"}, time.Now()) 227 228 // Verify the arguments 229 if args.NodeID == "" { 230 return fmt.Errorf("missing node ID for client deregistration") 231 } 232 233 // Commit this update via Raft 234 _, index, err := n.srv.raftApply(structs.NodeDeregisterRequestType, args) 235 if err != nil { 236 n.srv.logger.Printf("[ERR] nomad.client: Deregister failed: %v", err) 237 return err 238 } 239 240 // Clear the heartbeat timer if any 241 n.srv.clearHeartbeatTimer(args.NodeID) 242 243 // Create the evaluations for this node 244 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) 245 if err != nil { 246 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 247 return err 248 } 249 250 // Determine if there are any Vault accessors on the node 251 accessors, err := n.srv.State().VaultAccessorsByNode(args.NodeID) 252 if err != nil { 253 n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for node %q failed: %v", args.NodeID, err) 254 return err 255 } 256 257 if len(accessors) != 0 { 258 if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil { 259 n.srv.logger.Printf("[ERR] nomad.client: revoking accessors for node %q failed: %v", args.NodeID, err) 260 return err 261 } 262 } 263 264 // Setup the reply 265 reply.EvalIDs = evalIDs 266 reply.EvalCreateIndex = evalIndex 267 reply.NodeModifyIndex = index 268 reply.Index = index 269 return nil 270 } 271 272 // UpdateStatus is used to update the status of a client node 273 func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *structs.NodeUpdateResponse) error { 274 if done, err := n.srv.forward("Node.UpdateStatus", args, args, reply); done { 275 return err 276 } 277 defer metrics.MeasureSince([]string{"nomad", "client", "update_status"}, time.Now()) 278 279 // Verify the arguments 280 if args.NodeID == "" { 281 return fmt.Errorf("missing node ID for client status update") 282 } 283 if !structs.ValidNodeStatus(args.Status) { 284 return fmt.Errorf("invalid status for node") 285 } 286 287 // Look for the node 288 snap, err := n.srv.fsm.State().Snapshot() 289 if err != nil { 290 return err 291 } 292 node, err := snap.NodeByID(args.NodeID) 293 if err != nil { 294 return err 295 } 296 if node == nil { 297 return fmt.Errorf("node not found") 298 } 299 300 // XXX: Could use the SecretID here but have to update the heartbeat system 301 // to track SecretIDs. 302 303 // Update the timestamp of when the node status was updated 304 node.StatusUpdatedAt = time.Now().Unix() 305 306 // Commit this update via Raft 307 var index uint64 308 if node.Status != args.Status { 309 _, index, err = n.srv.raftApply(structs.NodeUpdateStatusRequestType, args) 310 if err != nil { 311 n.srv.logger.Printf("[ERR] nomad.client: status update failed: %v", err) 312 return err 313 } 314 reply.NodeModifyIndex = index 315 } 316 317 // Check if we should trigger evaluations 318 transitionToReady := transitionedToReady(args.Status, node.Status) 319 if structs.ShouldDrainNode(args.Status) || transitionToReady { 320 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) 321 if err != nil { 322 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 323 return err 324 } 325 reply.EvalIDs = evalIDs 326 reply.EvalCreateIndex = evalIndex 327 } 328 329 // Check if we need to setup a heartbeat 330 switch args.Status { 331 case structs.NodeStatusDown: 332 // Determine if there are any Vault accessors on the node 333 accessors, err := n.srv.State().VaultAccessorsByNode(args.NodeID) 334 if err != nil { 335 n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for node %q failed: %v", args.NodeID, err) 336 return err 337 } 338 339 if len(accessors) != 0 { 340 if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil { 341 n.srv.logger.Printf("[ERR] nomad.client: revoking accessors for node %q failed: %v", args.NodeID, err) 342 return err 343 } 344 } 345 default: 346 ttl, err := n.srv.resetHeartbeatTimer(args.NodeID) 347 if err != nil { 348 n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err) 349 return err 350 } 351 reply.HeartbeatTTL = ttl 352 } 353 354 // Set the reply index and leader 355 reply.Index = index 356 n.srv.peerLock.RLock() 357 defer n.srv.peerLock.RUnlock() 358 if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { 359 n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err) 360 return err 361 } 362 363 return nil 364 } 365 366 // transitionedToReady is a helper that takes a nodes new and old status and 367 // returns whether it has transistioned to ready. 368 func transitionedToReady(newStatus, oldStatus string) bool { 369 initToReady := oldStatus == structs.NodeStatusInit && newStatus == structs.NodeStatusReady 370 terminalToReady := oldStatus == structs.NodeStatusDown && newStatus == structs.NodeStatusReady 371 return initToReady || terminalToReady 372 } 373 374 // UpdateDrain is used to update the drain mode of a client node 375 func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest, 376 reply *structs.NodeDrainUpdateResponse) error { 377 if done, err := n.srv.forward("Node.UpdateDrain", args, args, reply); done { 378 return err 379 } 380 defer metrics.MeasureSince([]string{"nomad", "client", "update_drain"}, time.Now()) 381 382 // Verify the arguments 383 if args.NodeID == "" { 384 return fmt.Errorf("missing node ID for drain update") 385 } 386 387 // Look for the node 388 snap, err := n.srv.fsm.State().Snapshot() 389 if err != nil { 390 return err 391 } 392 node, err := snap.NodeByID(args.NodeID) 393 if err != nil { 394 return err 395 } 396 if node == nil { 397 return fmt.Errorf("node not found") 398 } 399 400 // Update the timestamp to 401 node.StatusUpdatedAt = time.Now().Unix() 402 403 // Commit this update via Raft 404 var index uint64 405 if node.Drain != args.Drain { 406 _, index, err = n.srv.raftApply(structs.NodeUpdateDrainRequestType, args) 407 if err != nil { 408 n.srv.logger.Printf("[ERR] nomad.client: drain update failed: %v", err) 409 return err 410 } 411 reply.NodeModifyIndex = index 412 } 413 414 // Always attempt to create Node evaluations because there may be a System 415 // job registered that should be evaluated. 416 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) 417 if err != nil { 418 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 419 return err 420 } 421 reply.EvalIDs = evalIDs 422 reply.EvalCreateIndex = evalIndex 423 424 // Set the reply index 425 reply.Index = index 426 return nil 427 } 428 429 // Evaluate is used to force a re-evaluation of the node 430 func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUpdateResponse) error { 431 if done, err := n.srv.forward("Node.Evaluate", args, args, reply); done { 432 return err 433 } 434 defer metrics.MeasureSince([]string{"nomad", "client", "evaluate"}, time.Now()) 435 436 // Verify the arguments 437 if args.NodeID == "" { 438 return fmt.Errorf("missing node ID for evaluation") 439 } 440 441 // Look for the node 442 snap, err := n.srv.fsm.State().Snapshot() 443 if err != nil { 444 return err 445 } 446 node, err := snap.NodeByID(args.NodeID) 447 if err != nil { 448 return err 449 } 450 if node == nil { 451 return fmt.Errorf("node not found") 452 } 453 454 // Create the evaluation 455 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, node.ModifyIndex) 456 if err != nil { 457 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 458 return err 459 } 460 reply.EvalIDs = evalIDs 461 reply.EvalCreateIndex = evalIndex 462 463 // Set the reply index 464 reply.Index = evalIndex 465 466 n.srv.peerLock.RLock() 467 defer n.srv.peerLock.RUnlock() 468 if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { 469 n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err) 470 return err 471 } 472 return nil 473 } 474 475 // GetNode is used to request information about a specific node 476 func (n *Node) GetNode(args *structs.NodeSpecificRequest, 477 reply *structs.SingleNodeResponse) error { 478 if done, err := n.srv.forward("Node.GetNode", args, args, reply); done { 479 return err 480 } 481 defer metrics.MeasureSince([]string{"nomad", "client", "get_node"}, time.Now()) 482 483 // Setup the blocking query 484 opts := blockingOptions{ 485 queryOpts: &args.QueryOptions, 486 queryMeta: &reply.QueryMeta, 487 watch: watch.NewItems(watch.Item{Node: args.NodeID}), 488 run: func() error { 489 // Verify the arguments 490 if args.NodeID == "" { 491 return fmt.Errorf("missing node ID") 492 } 493 494 // Look for the node 495 snap, err := n.srv.fsm.State().Snapshot() 496 if err != nil { 497 return err 498 } 499 out, err := snap.NodeByID(args.NodeID) 500 if err != nil { 501 return err 502 } 503 504 // Setup the output 505 if out != nil { 506 // Clear the secret ID 507 reply.Node = out.Copy() 508 reply.Node.SecretID = "" 509 reply.Index = out.ModifyIndex 510 } else { 511 // Use the last index that affected the nodes table 512 index, err := snap.Index("nodes") 513 if err != nil { 514 return err 515 } 516 reply.Node = nil 517 reply.Index = index 518 } 519 520 // Set the query response 521 n.srv.setQueryMeta(&reply.QueryMeta) 522 return nil 523 }} 524 return n.srv.blockingRPC(&opts) 525 } 526 527 // GetAllocs is used to request allocations for a specific node 528 func (n *Node) GetAllocs(args *structs.NodeSpecificRequest, 529 reply *structs.NodeAllocsResponse) error { 530 if done, err := n.srv.forward("Node.GetAllocs", args, args, reply); done { 531 return err 532 } 533 defer metrics.MeasureSince([]string{"nomad", "client", "get_allocs"}, time.Now()) 534 535 // Verify the arguments 536 if args.NodeID == "" { 537 return fmt.Errorf("missing node ID") 538 } 539 540 // Setup the blocking query 541 opts := blockingOptions{ 542 queryOpts: &args.QueryOptions, 543 queryMeta: &reply.QueryMeta, 544 watch: watch.NewItems(watch.Item{AllocNode: args.NodeID}), 545 run: func() error { 546 // Look for the node 547 snap, err := n.srv.fsm.State().Snapshot() 548 if err != nil { 549 return err 550 } 551 allocs, err := snap.AllocsByNode(args.NodeID) 552 if err != nil { 553 return err 554 } 555 556 // Setup the output 557 if len(allocs) != 0 { 558 reply.Allocs = allocs 559 for _, alloc := range allocs { 560 reply.Index = maxUint64(reply.Index, alloc.ModifyIndex) 561 } 562 } else { 563 reply.Allocs = nil 564 565 // Use the last index that affected the nodes table 566 index, err := snap.Index("allocs") 567 if err != nil { 568 return err 569 } 570 571 // Must provide non-zero index to prevent blocking 572 // Index 1 is impossible anyways (due to Raft internals) 573 if index == 0 { 574 reply.Index = 1 575 } else { 576 reply.Index = index 577 } 578 } 579 return nil 580 }} 581 return n.srv.blockingRPC(&opts) 582 } 583 584 // GetClientAllocs is used to request a lightweight list of alloc modify indexes 585 // per allocation. 586 func (n *Node) GetClientAllocs(args *structs.NodeSpecificRequest, 587 reply *structs.NodeClientAllocsResponse) error { 588 if done, err := n.srv.forward("Node.GetClientAllocs", args, args, reply); done { 589 return err 590 } 591 defer metrics.MeasureSince([]string{"nomad", "client", "get_client_allocs"}, time.Now()) 592 593 // Verify the arguments 594 if args.NodeID == "" { 595 return fmt.Errorf("missing node ID") 596 } 597 598 // Setup the blocking query 599 opts := blockingOptions{ 600 queryOpts: &args.QueryOptions, 601 queryMeta: &reply.QueryMeta, 602 watch: watch.NewItems(watch.Item{AllocNode: args.NodeID}), 603 run: func() error { 604 // Look for the node 605 snap, err := n.srv.fsm.State().Snapshot() 606 if err != nil { 607 return err 608 } 609 610 // Look for the node 611 node, err := snap.NodeByID(args.NodeID) 612 if err != nil { 613 return err 614 } 615 616 var allocs []*structs.Allocation 617 if node != nil { 618 // COMPAT: Remove in 0.6 619 // Check if the node should have a SecretID set 620 if args.SecretID == "" { 621 if pre, err := nodePreSecretID(node); err != nil { 622 return err 623 } else if !pre { 624 return fmt.Errorf("missing node secret ID for client status update") 625 } 626 } else if args.SecretID != node.SecretID { 627 return fmt.Errorf("node secret ID does not match") 628 } 629 630 var err error 631 allocs, err = snap.AllocsByNode(args.NodeID) 632 if err != nil { 633 return err 634 } 635 } 636 637 reply.Allocs = make(map[string]uint64) 638 // Setup the output 639 if len(allocs) != 0 { 640 for _, alloc := range allocs { 641 reply.Allocs[alloc.ID] = alloc.AllocModifyIndex 642 reply.Index = maxUint64(reply.Index, alloc.ModifyIndex) 643 } 644 } else { 645 // Use the last index that affected the nodes table 646 index, err := snap.Index("allocs") 647 if err != nil { 648 return err 649 } 650 651 // Must provide non-zero index to prevent blocking 652 // Index 1 is impossible anyways (due to Raft internals) 653 if index == 0 { 654 reply.Index = 1 655 } else { 656 reply.Index = index 657 } 658 } 659 return nil 660 }} 661 return n.srv.blockingRPC(&opts) 662 } 663 664 // UpdateAlloc is used to update the client status of an allocation 665 func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.GenericResponse) error { 666 if done, err := n.srv.forward("Node.UpdateAlloc", args, args, reply); done { 667 return err 668 } 669 defer metrics.MeasureSince([]string{"nomad", "client", "update_alloc"}, time.Now()) 670 671 // Ensure at least a single alloc 672 if len(args.Alloc) == 0 { 673 return fmt.Errorf("must update at least one allocation") 674 } 675 676 // Add this to the batch 677 n.updatesLock.Lock() 678 n.updates = append(n.updates, args.Alloc...) 679 680 // Start a new batch if none 681 future := n.updateFuture 682 if future == nil { 683 future = NewBatchFuture() 684 n.updateFuture = future 685 n.updateTimer = time.AfterFunc(batchUpdateInterval, func() { 686 // Get the pending updates 687 n.updatesLock.Lock() 688 updates := n.updates 689 future := n.updateFuture 690 n.updates = nil 691 n.updateFuture = nil 692 n.updateTimer = nil 693 n.updatesLock.Unlock() 694 695 // Perform the batch update 696 n.batchUpdate(future, updates) 697 }) 698 } 699 n.updatesLock.Unlock() 700 701 // Wait for the future 702 if err := future.Wait(); err != nil { 703 return err 704 } 705 706 // Setup the response 707 reply.Index = future.Index() 708 return nil 709 } 710 711 // batchUpdate is used to update all the allocations 712 func (n *Node) batchUpdate(future *batchFuture, updates []*structs.Allocation) { 713 // Prepare the batch update 714 batch := &structs.AllocUpdateRequest{ 715 Alloc: updates, 716 WriteRequest: structs.WriteRequest{Region: n.srv.config.Region}, 717 } 718 719 // Commit this update via Raft 720 var mErr multierror.Error 721 _, index, err := n.srv.raftApply(structs.AllocClientUpdateRequestType, batch) 722 if err != nil { 723 n.srv.logger.Printf("[ERR] nomad.client: alloc update failed: %v", err) 724 mErr.Errors = append(mErr.Errors, err) 725 } 726 727 // For each allocation we are updating check if we should revoke any 728 // Vault Accessors 729 var revoke []*structs.VaultAccessor 730 for _, alloc := range updates { 731 // Skip any allocation that isn't dead on the client 732 if !alloc.Terminated() { 733 continue 734 } 735 736 // Determine if there are any Vault accessors for the allocation 737 accessors, err := n.srv.State().VaultAccessorsByAlloc(alloc.ID) 738 if err != nil { 739 n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for alloc %q failed: %v", alloc.ID, err) 740 mErr.Errors = append(mErr.Errors, err) 741 } 742 743 revoke = append(revoke, accessors...) 744 } 745 746 if len(revoke) != 0 { 747 if err := n.srv.vault.RevokeTokens(context.Background(), revoke, true); err != nil { 748 n.srv.logger.Printf("[ERR] nomad.client: batched accessor revocation failed: %v", err) 749 mErr.Errors = append(mErr.Errors, err) 750 } 751 } 752 753 // Respond to the future 754 future.Respond(index, mErr.ErrorOrNil()) 755 } 756 757 // List is used to list the available nodes 758 func (n *Node) List(args *structs.NodeListRequest, 759 reply *structs.NodeListResponse) error { 760 if done, err := n.srv.forward("Node.List", args, args, reply); done { 761 return err 762 } 763 defer metrics.MeasureSince([]string{"nomad", "client", "list"}, time.Now()) 764 765 // Setup the blocking query 766 opts := blockingOptions{ 767 queryOpts: &args.QueryOptions, 768 queryMeta: &reply.QueryMeta, 769 watch: watch.NewItems(watch.Item{Table: "nodes"}), 770 run: func() error { 771 // Capture all the nodes 772 snap, err := n.srv.fsm.State().Snapshot() 773 if err != nil { 774 return err 775 } 776 var iter memdb.ResultIterator 777 if prefix := args.QueryOptions.Prefix; prefix != "" { 778 iter, err = snap.NodesByIDPrefix(prefix) 779 } else { 780 iter, err = snap.Nodes() 781 } 782 if err != nil { 783 return err 784 } 785 786 var nodes []*structs.NodeListStub 787 for { 788 raw := iter.Next() 789 if raw == nil { 790 break 791 } 792 node := raw.(*structs.Node) 793 nodes = append(nodes, node.Stub()) 794 } 795 reply.Nodes = nodes 796 797 // Use the last index that affected the jobs table 798 index, err := snap.Index("nodes") 799 if err != nil { 800 return err 801 } 802 reply.Index = index 803 804 // Set the query response 805 n.srv.setQueryMeta(&reply.QueryMeta) 806 return nil 807 }} 808 return n.srv.blockingRPC(&opts) 809 } 810 811 // createNodeEvals is used to create evaluations for each alloc on a node. 812 // Each Eval is scoped to a job, so we need to potentially trigger many evals. 813 func (n *Node) createNodeEvals(nodeID string, nodeIndex uint64) ([]string, uint64, error) { 814 // Snapshot the state 815 snap, err := n.srv.fsm.State().Snapshot() 816 if err != nil { 817 return nil, 0, fmt.Errorf("failed to snapshot state: %v", err) 818 } 819 820 // Find all the allocations for this node 821 allocs, err := snap.AllocsByNode(nodeID) 822 if err != nil { 823 return nil, 0, fmt.Errorf("failed to find allocs for '%s': %v", nodeID, err) 824 } 825 826 sysJobsIter, err := snap.JobsByScheduler("system") 827 if err != nil { 828 return nil, 0, fmt.Errorf("failed to find system jobs for '%s': %v", nodeID, err) 829 } 830 831 var sysJobs []*structs.Job 832 for job := sysJobsIter.Next(); job != nil; job = sysJobsIter.Next() { 833 sysJobs = append(sysJobs, job.(*structs.Job)) 834 } 835 836 // Fast-path if nothing to do 837 if len(allocs) == 0 && len(sysJobs) == 0 { 838 return nil, 0, nil 839 } 840 841 // Create an eval for each JobID affected 842 var evals []*structs.Evaluation 843 var evalIDs []string 844 jobIDs := make(map[string]struct{}) 845 846 for _, alloc := range allocs { 847 // Deduplicate on JobID 848 if _, ok := jobIDs[alloc.JobID]; ok { 849 continue 850 } 851 jobIDs[alloc.JobID] = struct{}{} 852 853 // Create a new eval 854 eval := &structs.Evaluation{ 855 ID: structs.GenerateUUID(), 856 Priority: alloc.Job.Priority, 857 Type: alloc.Job.Type, 858 TriggeredBy: structs.EvalTriggerNodeUpdate, 859 JobID: alloc.JobID, 860 NodeID: nodeID, 861 NodeModifyIndex: nodeIndex, 862 Status: structs.EvalStatusPending, 863 } 864 evals = append(evals, eval) 865 evalIDs = append(evalIDs, eval.ID) 866 } 867 868 // Create an evaluation for each system job. 869 for _, job := range sysJobs { 870 // Still dedup on JobID as the node may already have the system job. 871 if _, ok := jobIDs[job.ID]; ok { 872 continue 873 } 874 jobIDs[job.ID] = struct{}{} 875 876 // Create a new eval 877 eval := &structs.Evaluation{ 878 ID: structs.GenerateUUID(), 879 Priority: job.Priority, 880 Type: job.Type, 881 TriggeredBy: structs.EvalTriggerNodeUpdate, 882 JobID: job.ID, 883 NodeID: nodeID, 884 NodeModifyIndex: nodeIndex, 885 Status: structs.EvalStatusPending, 886 } 887 evals = append(evals, eval) 888 evalIDs = append(evalIDs, eval.ID) 889 } 890 891 // Create the Raft transaction 892 update := &structs.EvalUpdateRequest{ 893 Evals: evals, 894 WriteRequest: structs.WriteRequest{Region: n.srv.config.Region}, 895 } 896 897 // Commit this evaluation via Raft 898 // XXX: There is a risk of partial failure where the node update succeeds 899 // but that the EvalUpdate does not. 900 _, evalIndex, err := n.srv.raftApply(structs.EvalUpdateRequestType, update) 901 if err != nil { 902 return nil, 0, err 903 } 904 return evalIDs, evalIndex, nil 905 } 906 907 // batchFuture is used to wait on a batch update to complete 908 type batchFuture struct { 909 doneCh chan struct{} 910 err error 911 index uint64 912 } 913 914 // NewBatchFuture creates a new batch future 915 func NewBatchFuture() *batchFuture { 916 return &batchFuture{ 917 doneCh: make(chan struct{}), 918 } 919 } 920 921 // Wait is used to block for the future to complete and returns the error 922 func (b *batchFuture) Wait() error { 923 <-b.doneCh 924 return b.err 925 } 926 927 // Index is used to return the index of the batch, only after Wait() 928 func (b *batchFuture) Index() uint64 { 929 return b.index 930 } 931 932 // Respond is used to unblock the future 933 func (b *batchFuture) Respond(index uint64, err error) { 934 b.index = index 935 b.err = err 936 close(b.doneCh) 937 } 938 939 // DeriveVaultToken is used by the clients to request wrapped Vault tokens for 940 // tasks 941 func (n *Node) DeriveVaultToken(args *structs.DeriveVaultTokenRequest, 942 reply *structs.DeriveVaultTokenResponse) error { 943 944 // setErr is a helper for setting the recoverable error on the reply and 945 // logging it 946 setErr := func(e error, recoverable bool) { 947 reply.Error = structs.NewRecoverableError(e, recoverable) 948 n.srv.logger.Printf("[ERR] nomad.client: DeriveVaultToken failed (recoverable %v): %v", recoverable, e) 949 } 950 951 if done, err := n.srv.forward("Node.DeriveVaultToken", args, args, reply); done { 952 setErr(err, err == structs.ErrNoLeader) 953 return nil 954 } 955 defer metrics.MeasureSince([]string{"nomad", "client", "derive_vault_token"}, time.Now()) 956 957 // Verify the arguments 958 if args.NodeID == "" { 959 setErr(fmt.Errorf("missing node ID"), false) 960 return nil 961 } 962 if args.SecretID == "" { 963 setErr(fmt.Errorf("missing node SecretID"), false) 964 return nil 965 } 966 if args.AllocID == "" { 967 setErr(fmt.Errorf("missing allocation ID"), false) 968 return nil 969 } 970 if len(args.Tasks) == 0 { 971 setErr(fmt.Errorf("no tasks specified"), false) 972 return nil 973 } 974 975 // Verify the following: 976 // * The Node exists and has the correct SecretID 977 // * The Allocation exists on the specified node 978 // * The allocation contains the given tasks and they each require Vault 979 // tokens 980 snap, err := n.srv.fsm.State().Snapshot() 981 if err != nil { 982 setErr(err, false) 983 return nil 984 } 985 node, err := snap.NodeByID(args.NodeID) 986 if err != nil { 987 setErr(err, false) 988 return nil 989 } 990 if node == nil { 991 setErr(fmt.Errorf("Node %q does not exist", args.NodeID), false) 992 return nil 993 } 994 if node.SecretID != args.SecretID { 995 setErr(fmt.Errorf("SecretID mismatch"), false) 996 return nil 997 } 998 999 alloc, err := snap.AllocByID(args.AllocID) 1000 if err != nil { 1001 setErr(err, false) 1002 return nil 1003 } 1004 if alloc == nil { 1005 setErr(fmt.Errorf("Allocation %q does not exist", args.AllocID), false) 1006 return nil 1007 } 1008 if alloc.NodeID != args.NodeID { 1009 setErr(fmt.Errorf("Allocation %q not running on Node %q", args.AllocID, args.NodeID), false) 1010 return nil 1011 } 1012 if alloc.TerminalStatus() { 1013 setErr(fmt.Errorf("Can't request Vault token for terminal allocation"), false) 1014 return nil 1015 } 1016 1017 // Check the policies 1018 policies := alloc.Job.VaultPolicies() 1019 if policies == nil { 1020 setErr(fmt.Errorf("Job doesn't require Vault policies"), false) 1021 return nil 1022 } 1023 tg, ok := policies[alloc.TaskGroup] 1024 if !ok { 1025 setErr(fmt.Errorf("Task group does not require Vault policies"), false) 1026 return nil 1027 } 1028 1029 var unneeded []string 1030 for _, task := range args.Tasks { 1031 taskVault := tg[task] 1032 if taskVault == nil || len(taskVault.Policies) == 0 { 1033 unneeded = append(unneeded, task) 1034 } 1035 } 1036 1037 if len(unneeded) != 0 { 1038 e := fmt.Errorf("Requested Vault tokens for tasks without defined Vault policies: %s", 1039 strings.Join(unneeded, ", ")) 1040 setErr(e, false) 1041 return nil 1042 } 1043 1044 // At this point the request is valid and we should contact Vault for 1045 // tokens. 1046 1047 // Create an error group where we will spin up a fixed set of goroutines to 1048 // handle deriving tokens but where if any fails the whole group is 1049 // canceled. 1050 g, ctx := errgroup.WithContext(context.Background()) 1051 1052 // Cap the handlers 1053 handlers := len(args.Tasks) 1054 if handlers > maxParallelRequestsPerDerive { 1055 handlers = maxParallelRequestsPerDerive 1056 } 1057 1058 // Create the Vault Tokens 1059 input := make(chan string, handlers) 1060 results := make(map[string]*vapi.Secret, len(args.Tasks)) 1061 for i := 0; i < handlers; i++ { 1062 g.Go(func() error { 1063 for { 1064 select { 1065 case task, ok := <-input: 1066 if !ok { 1067 return nil 1068 } 1069 1070 secret, err := n.srv.vault.CreateToken(ctx, alloc, task) 1071 if err != nil { 1072 wrapped := fmt.Errorf("failed to create token for task %q: %v", task, err) 1073 if rerr, ok := err.(*structs.RecoverableError); ok && rerr.Recoverable { 1074 // If the error is recoverable, propogate it 1075 return structs.NewRecoverableError(wrapped, true) 1076 } 1077 1078 return wrapped 1079 } 1080 1081 results[task] = secret 1082 case <-ctx.Done(): 1083 return nil 1084 } 1085 } 1086 }) 1087 } 1088 1089 // Send the input 1090 go func() { 1091 defer close(input) 1092 for _, task := range args.Tasks { 1093 select { 1094 case <-ctx.Done(): 1095 return 1096 case input <- task: 1097 } 1098 } 1099 1100 }() 1101 1102 // Wait for everything to complete or for an error 1103 createErr := g.Wait() 1104 1105 // Retrieve the results 1106 accessors := make([]*structs.VaultAccessor, 0, len(results)) 1107 tokens := make(map[string]string, len(results)) 1108 for task, secret := range results { 1109 w := secret.WrapInfo 1110 if w == nil { 1111 return fmt.Errorf("Vault returned Secret without WrapInfo") 1112 } 1113 1114 tokens[task] = w.Token 1115 accessor := &structs.VaultAccessor{ 1116 Accessor: w.WrappedAccessor, 1117 Task: task, 1118 NodeID: alloc.NodeID, 1119 AllocID: alloc.ID, 1120 CreationTTL: w.TTL, 1121 } 1122 1123 accessors = append(accessors, accessor) 1124 } 1125 1126 // If there was an error revoke the created tokens 1127 if createErr != nil { 1128 n.srv.logger.Printf("[ERR] nomad.node: Vault token creation failed: %v", createErr) 1129 1130 if revokeErr := n.srv.vault.RevokeTokens(context.Background(), accessors, false); revokeErr != nil { 1131 n.srv.logger.Printf("[ERR] nomad.node: Vault token revocation failed: %v", revokeErr) 1132 } 1133 1134 if rerr, ok := createErr.(*structs.RecoverableError); ok { 1135 reply.Error = rerr 1136 } else { 1137 reply.Error = structs.NewRecoverableError(createErr, false) 1138 } 1139 1140 return nil 1141 } 1142 1143 // Commit to Raft before returning any of the tokens 1144 req := structs.VaultAccessorsRequest{Accessors: accessors} 1145 _, index, err := n.srv.raftApply(structs.VaultAccessorRegisterRequestType, &req) 1146 if err != nil { 1147 n.srv.logger.Printf("[ERR] nomad.client: Register Vault accessors failed: %v", err) 1148 1149 // Determine if we can recover from the error 1150 retry := false 1151 switch err { 1152 case raft.ErrNotLeader, raft.ErrLeadershipLost, raft.ErrRaftShutdown, raft.ErrEnqueueTimeout: 1153 retry = true 1154 } 1155 1156 setErr(err, retry) 1157 return nil 1158 } 1159 1160 reply.Index = index 1161 reply.Tasks = tokens 1162 n.srv.setQueryMeta(&reply.QueryMeta) 1163 return nil 1164 }