github.com/zoomfoo/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/nomad/node_endpoint.go (about) 1 package nomad 2 3 import ( 4 "context" 5 "fmt" 6 "strings" 7 "sync" 8 "time" 9 10 "golang.org/x/sync/errgroup" 11 12 "github.com/armon/go-metrics" 13 "github.com/hashicorp/go-memdb" 14 "github.com/hashicorp/go-multierror" 15 "github.com/hashicorp/nomad/acl" 16 "github.com/hashicorp/nomad/helper/uuid" 17 "github.com/hashicorp/nomad/nomad/state" 18 "github.com/hashicorp/nomad/nomad/structs" 19 "github.com/hashicorp/raft" 20 vapi "github.com/hashicorp/vault/api" 21 ) 22 23 const ( 24 // batchUpdateInterval is how long we wait to batch updates 25 batchUpdateInterval = 50 * time.Millisecond 26 27 // maxParallelRequestsPerDerive is the maximum number of parallel Vault 28 // create token requests that may be outstanding per derive request 29 maxParallelRequestsPerDerive = 16 30 31 // NodeDrainEvents are the various drain messages 32 NodeDrainEventDrainSet = "Node drain strategy set" 33 NodeDrainEventDrainDisabled = "Node drain disabled" 34 NodeDrainEventDrainUpdated = "Node drain stategy updated" 35 36 // NodeEligibilityEventEligible is used when the nodes eligiblity is marked 37 // eligible 38 NodeEligibilityEventEligible = "Node marked as eligible for scheduling" 39 40 // NodeEligibilityEventIneligible is used when the nodes eligiblity is marked 41 // ineligible 42 NodeEligibilityEventIneligible = "Node marked as ineligible for scheduling" 43 44 // NodeHeartbeatEventReregistered is the message used when the node becomes 45 // reregistered by the heartbeat. 46 NodeHeartbeatEventReregistered = "Node reregistered by heartbeat" 47 ) 48 49 // Node endpoint is used for client interactions 50 type Node struct { 51 srv *Server 52 53 // ctx provides context regarding the underlying connection 54 ctx *RPCContext 55 56 // updates holds pending client status updates for allocations 57 updates []*structs.Allocation 58 59 // evals holds pending rescheduling eval updates triggered by failed allocations 60 evals []*structs.Evaluation 61 62 // updateFuture is used to wait for the pending batch update 63 // to complete. This may be nil if no batch is pending. 64 updateFuture *structs.BatchFuture 65 66 // updateTimer is the timer that will trigger the next batch 67 // update, and may be nil if there is no batch pending. 68 updateTimer *time.Timer 69 70 // updatesLock synchronizes access to the updates list, 71 // the future and the timer. 72 updatesLock sync.Mutex 73 } 74 75 // Register is used to upsert a client that is available for scheduling 76 func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUpdateResponse) error { 77 if done, err := n.srv.forward("Node.Register", args, args, reply); done { 78 // We have a valid node connection since there is no error from the 79 // forwarded server, so add the mapping to cache the 80 // connection and allow the server to send RPCs to the client. 81 if err == nil && n.ctx != nil && n.ctx.NodeID == "" { 82 n.ctx.NodeID = args.Node.ID 83 n.srv.addNodeConn(n.ctx) 84 } 85 86 return err 87 } 88 defer metrics.MeasureSince([]string{"nomad", "client", "register"}, time.Now()) 89 90 // Validate the arguments 91 if args.Node == nil { 92 return fmt.Errorf("missing node for client registration") 93 } 94 if args.Node.ID == "" { 95 return fmt.Errorf("missing node ID for client registration") 96 } 97 if args.Node.Datacenter == "" { 98 return fmt.Errorf("missing datacenter for client registration") 99 } 100 if args.Node.Name == "" { 101 return fmt.Errorf("missing node name for client registration") 102 } 103 if len(args.Node.Attributes) == 0 { 104 return fmt.Errorf("missing attributes for client registration") 105 } 106 if args.Node.SecretID == "" { 107 return fmt.Errorf("missing node secret ID for client registration") 108 } 109 110 // Default the status if none is given 111 if args.Node.Status == "" { 112 args.Node.Status = structs.NodeStatusInit 113 } 114 if !structs.ValidNodeStatus(args.Node.Status) { 115 return fmt.Errorf("invalid status for node") 116 } 117 118 // Default to eligible for scheduling if unset 119 if args.Node.SchedulingEligibility == "" { 120 args.Node.SchedulingEligibility = structs.NodeSchedulingEligible 121 } 122 123 // Set the timestamp when the node is registered 124 args.Node.StatusUpdatedAt = time.Now().Unix() 125 126 // Compute the node class 127 if err := args.Node.ComputeClass(); err != nil { 128 return fmt.Errorf("failed to computed node class: %v", err) 129 } 130 131 // Look for the node so we can detect a state transition 132 snap, err := n.srv.fsm.State().Snapshot() 133 if err != nil { 134 return err 135 } 136 137 ws := memdb.NewWatchSet() 138 originalNode, err := snap.NodeByID(ws, args.Node.ID) 139 if err != nil { 140 return err 141 } 142 143 // Check if the SecretID has been tampered with 144 if originalNode != nil { 145 if args.Node.SecretID != originalNode.SecretID && originalNode.SecretID != "" { 146 return fmt.Errorf("node secret ID does not match. Not registering node.") 147 } 148 } 149 150 // We have a valid node connection, so add the mapping to cache the 151 // connection and allow the server to send RPCs to the client. We only cache 152 // the connection if it is not being forwarded from another server. 153 if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() { 154 n.ctx.NodeID = args.Node.ID 155 n.srv.addNodeConn(n.ctx) 156 } 157 158 // Commit this update via Raft 159 _, index, err := n.srv.raftApply(structs.NodeRegisterRequestType, args) 160 if err != nil { 161 n.srv.logger.Printf("[ERR] nomad.client: Register failed: %v", err) 162 return err 163 } 164 reply.NodeModifyIndex = index 165 166 // Check if we should trigger evaluations 167 originalStatus := structs.NodeStatusInit 168 if originalNode != nil { 169 originalStatus = originalNode.Status 170 } 171 transitionToReady := transitionedToReady(args.Node.Status, originalStatus) 172 if structs.ShouldDrainNode(args.Node.Status) || transitionToReady { 173 evalIDs, evalIndex, err := n.createNodeEvals(args.Node.ID, index) 174 if err != nil { 175 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 176 return err 177 } 178 reply.EvalIDs = evalIDs 179 reply.EvalCreateIndex = evalIndex 180 } 181 182 // Check if we need to setup a heartbeat 183 if !args.Node.TerminalStatus() { 184 ttl, err := n.srv.resetHeartbeatTimer(args.Node.ID) 185 if err != nil { 186 n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err) 187 return err 188 } 189 reply.HeartbeatTTL = ttl 190 } 191 192 // Set the reply index 193 reply.Index = index 194 snap, err = n.srv.fsm.State().Snapshot() 195 if err != nil { 196 return err 197 } 198 199 n.srv.peerLock.RLock() 200 defer n.srv.peerLock.RUnlock() 201 if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { 202 n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err) 203 return err 204 } 205 206 return nil 207 } 208 209 // updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading. 210 func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error { 211 reply.LeaderRPCAddr = string(n.srv.raft.Leader()) 212 213 // Reply with config information required for future RPC requests 214 reply.Servers = make([]*structs.NodeServerInfo, 0, len(n.srv.localPeers)) 215 for _, v := range n.srv.localPeers { 216 reply.Servers = append(reply.Servers, 217 &structs.NodeServerInfo{ 218 RPCAdvertiseAddr: v.RPCAddr.String(), 219 RPCMajorVersion: int32(v.MajorVersion), 220 RPCMinorVersion: int32(v.MinorVersion), 221 Datacenter: v.Datacenter, 222 }) 223 } 224 225 // TODO(sean@): Use an indexed node count instead 226 // 227 // Snapshot is used only to iterate over all nodes to create a node 228 // count to send back to Nomad Clients in their heartbeat so Clients 229 // can estimate the size of the cluster. 230 ws := memdb.NewWatchSet() 231 iter, err := snap.Nodes(ws) 232 if err == nil { 233 for { 234 raw := iter.Next() 235 if raw == nil { 236 break 237 } 238 reply.NumNodes++ 239 } 240 } 241 242 return nil 243 } 244 245 // Deregister is used to remove a client from the cluster. If a client should 246 // just be made unavailable for scheduling, a status update is preferred. 247 func (n *Node) Deregister(args *structs.NodeDeregisterRequest, reply *structs.NodeUpdateResponse) error { 248 if done, err := n.srv.forward("Node.Deregister", args, args, reply); done { 249 return err 250 } 251 defer metrics.MeasureSince([]string{"nomad", "client", "deregister"}, time.Now()) 252 253 // Check node permissions 254 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 255 return err 256 } else if aclObj != nil && !aclObj.AllowNodeWrite() { 257 return structs.ErrPermissionDenied 258 } 259 260 // Verify the arguments 261 if args.NodeID == "" { 262 return fmt.Errorf("missing node ID for client deregistration") 263 } 264 // Look for the node 265 snap, err := n.srv.fsm.State().Snapshot() 266 if err != nil { 267 return err 268 } 269 270 ws := memdb.NewWatchSet() 271 node, err := snap.NodeByID(ws, args.NodeID) 272 if err != nil { 273 return err 274 } 275 if node == nil { 276 return fmt.Errorf("node not found") 277 } 278 279 // Commit this update via Raft 280 _, index, err := n.srv.raftApply(structs.NodeDeregisterRequestType, args) 281 if err != nil { 282 n.srv.logger.Printf("[ERR] nomad.client: Deregister failed: %v", err) 283 return err 284 } 285 286 // Clear the heartbeat timer if any 287 n.srv.clearHeartbeatTimer(args.NodeID) 288 289 // Create the evaluations for this node 290 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) 291 if err != nil { 292 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 293 return err 294 } 295 296 // Determine if there are any Vault accessors on the node 297 accessors, err := snap.VaultAccessorsByNode(ws, args.NodeID) 298 if err != nil { 299 n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for node %q failed: %v", args.NodeID, err) 300 return err 301 } 302 303 if l := len(accessors); l != 0 { 304 n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors on node %q due to deregister", l, args.NodeID) 305 if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil { 306 n.srv.logger.Printf("[ERR] nomad.client: revoking accessors for node %q failed: %v", args.NodeID, err) 307 return err 308 } 309 } 310 311 // Setup the reply 312 reply.EvalIDs = evalIDs 313 reply.EvalCreateIndex = evalIndex 314 reply.NodeModifyIndex = index 315 reply.Index = index 316 return nil 317 } 318 319 // UpdateStatus is used to update the status of a client node 320 func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *structs.NodeUpdateResponse) error { 321 if done, err := n.srv.forward("Node.UpdateStatus", args, args, reply); done { 322 // We have a valid node connection since there is no error from the 323 // forwarded server, so add the mapping to cache the 324 // connection and allow the server to send RPCs to the client. 325 if err == nil && n.ctx != nil && n.ctx.NodeID == "" { 326 n.ctx.NodeID = args.NodeID 327 n.srv.addNodeConn(n.ctx) 328 } 329 330 return err 331 } 332 defer metrics.MeasureSince([]string{"nomad", "client", "update_status"}, time.Now()) 333 334 // Verify the arguments 335 if args.NodeID == "" { 336 return fmt.Errorf("missing node ID for client status update") 337 } 338 if !structs.ValidNodeStatus(args.Status) { 339 return fmt.Errorf("invalid status for node") 340 } 341 342 // Look for the node 343 snap, err := n.srv.fsm.State().Snapshot() 344 if err != nil { 345 return err 346 } 347 348 ws := memdb.NewWatchSet() 349 node, err := snap.NodeByID(ws, args.NodeID) 350 if err != nil { 351 return err 352 } 353 if node == nil { 354 return fmt.Errorf("node not found") 355 } 356 357 // We have a valid node connection, so add the mapping to cache the 358 // connection and allow the server to send RPCs to the client. We only cache 359 // the connection if it is not being forwarded from another server. 360 if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() { 361 n.ctx.NodeID = args.NodeID 362 n.srv.addNodeConn(n.ctx) 363 } 364 365 // XXX: Could use the SecretID here but have to update the heartbeat system 366 // to track SecretIDs. 367 368 // Update the timestamp of when the node status was updated 369 node.StatusUpdatedAt = time.Now().Unix() 370 371 // Commit this update via Raft 372 var index uint64 373 if node.Status != args.Status { 374 // Attach an event if we are updating the node status to ready when it 375 // is down via a heartbeat 376 if node.Status == structs.NodeStatusDown && args.NodeEvent == nil { 377 args.NodeEvent = structs.NewNodeEvent(). 378 SetSubsystem(structs.NodeEventSubsystemCluster). 379 SetMessage(NodeHeartbeatEventReregistered) 380 } 381 382 _, index, err = n.srv.raftApply(structs.NodeUpdateStatusRequestType, args) 383 if err != nil { 384 n.srv.logger.Printf("[ERR] nomad.client: status update failed: %v", err) 385 return err 386 } 387 reply.NodeModifyIndex = index 388 } 389 390 // Check if we should trigger evaluations 391 transitionToReady := transitionedToReady(args.Status, node.Status) 392 if structs.ShouldDrainNode(args.Status) || transitionToReady { 393 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) 394 if err != nil { 395 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 396 return err 397 } 398 reply.EvalIDs = evalIDs 399 reply.EvalCreateIndex = evalIndex 400 } 401 402 // Check if we need to setup a heartbeat 403 switch args.Status { 404 case structs.NodeStatusDown: 405 // Determine if there are any Vault accessors on the node 406 accessors, err := n.srv.State().VaultAccessorsByNode(ws, args.NodeID) 407 if err != nil { 408 n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for node %q failed: %v", args.NodeID, err) 409 return err 410 } 411 412 if l := len(accessors); l != 0 { 413 n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors on node %q due to down state", l, args.NodeID) 414 if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil { 415 n.srv.logger.Printf("[ERR] nomad.client: revoking accessors for node %q failed: %v", args.NodeID, err) 416 return err 417 } 418 } 419 default: 420 ttl, err := n.srv.resetHeartbeatTimer(args.NodeID) 421 if err != nil { 422 n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err) 423 return err 424 } 425 reply.HeartbeatTTL = ttl 426 } 427 428 // Set the reply index and leader 429 reply.Index = index 430 n.srv.peerLock.RLock() 431 defer n.srv.peerLock.RUnlock() 432 if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { 433 n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err) 434 return err 435 } 436 437 return nil 438 } 439 440 // transitionedToReady is a helper that takes a nodes new and old status and 441 // returns whether it has transitioned to ready. 442 func transitionedToReady(newStatus, oldStatus string) bool { 443 initToReady := oldStatus == structs.NodeStatusInit && newStatus == structs.NodeStatusReady 444 terminalToReady := oldStatus == structs.NodeStatusDown && newStatus == structs.NodeStatusReady 445 return initToReady || terminalToReady 446 } 447 448 // UpdateDrain is used to update the drain mode of a client node 449 func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest, 450 reply *structs.NodeDrainUpdateResponse) error { 451 if done, err := n.srv.forward("Node.UpdateDrain", args, args, reply); done { 452 return err 453 } 454 defer metrics.MeasureSince([]string{"nomad", "client", "update_drain"}, time.Now()) 455 456 // Check node write permissions 457 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 458 return err 459 } else if aclObj != nil && !aclObj.AllowNodeWrite() { 460 return structs.ErrPermissionDenied 461 } 462 463 // Verify the arguments 464 if args.NodeID == "" { 465 return fmt.Errorf("missing node ID for drain update") 466 } 467 if args.NodeEvent != nil { 468 return fmt.Errorf("node event must not be set") 469 } 470 471 // Look for the node 472 snap, err := n.srv.fsm.State().Snapshot() 473 if err != nil { 474 return err 475 } 476 node, err := snap.NodeByID(nil, args.NodeID) 477 if err != nil { 478 return err 479 } 480 if node == nil { 481 return fmt.Errorf("node not found") 482 } 483 484 // COMPAT: Remove in 0.9. Attempt to upgrade the request if it is of the old 485 // format. 486 if args.Drain && args.DrainStrategy == nil { 487 args.DrainStrategy = &structs.DrainStrategy{ 488 DrainSpec: structs.DrainSpec{ 489 Deadline: -1 * time.Second, // Force drain 490 }, 491 } 492 } 493 494 // Mark the deadline time 495 if args.DrainStrategy != nil && args.DrainStrategy.Deadline.Nanoseconds() > 0 { 496 args.DrainStrategy.ForceDeadline = time.Now().Add(args.DrainStrategy.Deadline) 497 } 498 499 // Construct the node event 500 args.NodeEvent = structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemDrain) 501 if node.DrainStrategy == nil && args.DrainStrategy != nil { 502 args.NodeEvent.SetMessage(NodeDrainEventDrainSet) 503 } else if node.DrainStrategy != nil && args.DrainStrategy != nil { 504 args.NodeEvent.SetMessage(NodeDrainEventDrainUpdated) 505 } else if node.DrainStrategy != nil && args.DrainStrategy == nil { 506 args.NodeEvent.SetMessage(NodeDrainEventDrainDisabled) 507 } else { 508 args.NodeEvent = nil 509 } 510 511 // Commit this update via Raft 512 _, index, err := n.srv.raftApply(structs.NodeUpdateDrainRequestType, args) 513 if err != nil { 514 n.srv.logger.Printf("[ERR] nomad.client: drain update failed: %v", err) 515 return err 516 } 517 reply.NodeModifyIndex = index 518 519 // If the node is transitioning to be eligible, create Node evaluations 520 // because there may be a System job registered that should be evaluated. 521 if node.SchedulingEligibility == structs.NodeSchedulingIneligible && args.MarkEligible && args.DrainStrategy == nil { 522 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) 523 if err != nil { 524 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 525 return err 526 } 527 reply.EvalIDs = evalIDs 528 reply.EvalCreateIndex = evalIndex 529 } 530 531 // Set the reply index 532 reply.Index = index 533 return nil 534 } 535 536 // UpdateEligibility is used to update the scheduling eligibility of a node 537 func (n *Node) UpdateEligibility(args *structs.NodeUpdateEligibilityRequest, 538 reply *structs.NodeEligibilityUpdateResponse) error { 539 if done, err := n.srv.forward("Node.UpdateEligibility", args, args, reply); done { 540 return err 541 } 542 defer metrics.MeasureSince([]string{"nomad", "client", "update_eligibility"}, time.Now()) 543 544 // Check node write permissions 545 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 546 return err 547 } else if aclObj != nil && !aclObj.AllowNodeWrite() { 548 return structs.ErrPermissionDenied 549 } 550 551 // Verify the arguments 552 if args.NodeID == "" { 553 return fmt.Errorf("missing node ID for setting scheduling eligibility") 554 } 555 if args.NodeEvent != nil { 556 return fmt.Errorf("node event must not be set") 557 } 558 559 // Check that only allowed types are set 560 switch args.Eligibility { 561 case structs.NodeSchedulingEligible, structs.NodeSchedulingIneligible: 562 default: 563 return fmt.Errorf("invalid scheduling eligibility %q", args.Eligibility) 564 } 565 566 // Look for the node 567 snap, err := n.srv.fsm.State().Snapshot() 568 if err != nil { 569 return err 570 } 571 node, err := snap.NodeByID(nil, args.NodeID) 572 if err != nil { 573 return err 574 } 575 if node == nil { 576 return fmt.Errorf("node not found") 577 } 578 579 if node.DrainStrategy != nil && args.Eligibility == structs.NodeSchedulingEligible { 580 return fmt.Errorf("can not set node's scheduling eligibility to eligible while it is draining") 581 } 582 583 switch args.Eligibility { 584 case structs.NodeSchedulingEligible, structs.NodeSchedulingIneligible: 585 default: 586 return fmt.Errorf("invalid scheduling eligibility %q", args.Eligibility) 587 } 588 589 // Construct the node event 590 args.NodeEvent = structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster) 591 if node.SchedulingEligibility == args.Eligibility { 592 return nil // Nothing to do 593 } else if args.Eligibility == structs.NodeSchedulingEligible { 594 args.NodeEvent.SetMessage(NodeEligibilityEventEligible) 595 } else { 596 args.NodeEvent.SetMessage(NodeEligibilityEventIneligible) 597 } 598 599 // Commit this update via Raft 600 outErr, index, err := n.srv.raftApply(structs.NodeUpdateEligibilityRequestType, args) 601 if err != nil { 602 n.srv.logger.Printf("[ERR] nomad.client: eligibility update failed: %v", err) 603 return err 604 } 605 if outErr != nil { 606 if err, ok := outErr.(error); ok && err != nil { 607 n.srv.logger.Printf("[ERR] nomad.client: eligibility update failed: %v", err) 608 return err 609 } 610 } 611 612 // If the node is transitioning to be eligible, create Node evaluations 613 // because there may be a System job registered that should be evaluated. 614 if node.SchedulingEligibility == structs.NodeSchedulingIneligible && args.Eligibility == structs.NodeSchedulingEligible { 615 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) 616 if err != nil { 617 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 618 return err 619 } 620 reply.EvalIDs = evalIDs 621 reply.EvalCreateIndex = evalIndex 622 } 623 624 // Set the reply index 625 reply.Index = index 626 return nil 627 } 628 629 // Evaluate is used to force a re-evaluation of the node 630 func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUpdateResponse) error { 631 if done, err := n.srv.forward("Node.Evaluate", args, args, reply); done { 632 return err 633 } 634 defer metrics.MeasureSince([]string{"nomad", "client", "evaluate"}, time.Now()) 635 636 // Check node write permissions 637 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 638 return err 639 } else if aclObj != nil && !aclObj.AllowNodeWrite() { 640 return structs.ErrPermissionDenied 641 } 642 643 // Verify the arguments 644 if args.NodeID == "" { 645 return fmt.Errorf("missing node ID for evaluation") 646 } 647 648 // Look for the node 649 snap, err := n.srv.fsm.State().Snapshot() 650 if err != nil { 651 return err 652 } 653 ws := memdb.NewWatchSet() 654 node, err := snap.NodeByID(ws, args.NodeID) 655 if err != nil { 656 return err 657 } 658 if node == nil { 659 return fmt.Errorf("node not found") 660 } 661 662 // Create the evaluation 663 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, node.ModifyIndex) 664 if err != nil { 665 n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err) 666 return err 667 } 668 reply.EvalIDs = evalIDs 669 reply.EvalCreateIndex = evalIndex 670 671 // Set the reply index 672 reply.Index = evalIndex 673 674 n.srv.peerLock.RLock() 675 defer n.srv.peerLock.RUnlock() 676 if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { 677 n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err) 678 return err 679 } 680 return nil 681 } 682 683 // GetNode is used to request information about a specific node 684 func (n *Node) GetNode(args *structs.NodeSpecificRequest, 685 reply *structs.SingleNodeResponse) error { 686 if done, err := n.srv.forward("Node.GetNode", args, args, reply); done { 687 return err 688 } 689 defer metrics.MeasureSince([]string{"nomad", "client", "get_node"}, time.Now()) 690 691 // Check node read permissions 692 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 693 // If ResolveToken had an unexpected error return that 694 if err != structs.ErrTokenNotFound { 695 return err 696 } 697 698 // Attempt to lookup AuthToken as a Node.SecretID since nodes 699 // call this endpoint and don't have an ACL token. 700 node, stateErr := n.srv.fsm.State().NodeBySecretID(nil, args.AuthToken) 701 if stateErr != nil { 702 // Return the original ResolveToken error with this err 703 var merr multierror.Error 704 merr.Errors = append(merr.Errors, err, stateErr) 705 return merr.ErrorOrNil() 706 } 707 708 // Not a node or a valid ACL token 709 if node == nil { 710 return structs.ErrTokenNotFound 711 } 712 } else if aclObj != nil && !aclObj.AllowNodeRead() { 713 return structs.ErrPermissionDenied 714 } 715 716 // Setup the blocking query 717 opts := blockingOptions{ 718 queryOpts: &args.QueryOptions, 719 queryMeta: &reply.QueryMeta, 720 run: func(ws memdb.WatchSet, state *state.StateStore) error { 721 // Verify the arguments 722 if args.NodeID == "" { 723 return fmt.Errorf("missing node ID") 724 } 725 726 // Look for the node 727 out, err := state.NodeByID(ws, args.NodeID) 728 if err != nil { 729 return err 730 } 731 732 // Setup the output 733 if out != nil { 734 // Clear the secret ID 735 reply.Node = out.Copy() 736 reply.Node.SecretID = "" 737 reply.Index = out.ModifyIndex 738 } else { 739 // Use the last index that affected the nodes table 740 index, err := state.Index("nodes") 741 if err != nil { 742 return err 743 } 744 reply.Node = nil 745 reply.Index = index 746 } 747 748 // Set the query response 749 n.srv.setQueryMeta(&reply.QueryMeta) 750 return nil 751 }} 752 return n.srv.blockingRPC(&opts) 753 } 754 755 // GetAllocs is used to request allocations for a specific node 756 func (n *Node) GetAllocs(args *structs.NodeSpecificRequest, 757 reply *structs.NodeAllocsResponse) error { 758 if done, err := n.srv.forward("Node.GetAllocs", args, args, reply); done { 759 return err 760 } 761 defer metrics.MeasureSince([]string{"nomad", "client", "get_allocs"}, time.Now()) 762 763 // Check node read and namespace job read permissions 764 aclObj, err := n.srv.ResolveToken(args.AuthToken) 765 if err != nil { 766 return err 767 } 768 if aclObj != nil && !aclObj.AllowNodeRead() { 769 return structs.ErrPermissionDenied 770 } 771 772 // cache namespace perms 773 readableNamespaces := map[string]bool{} 774 775 // readNS is a caching namespace read-job helper 776 readNS := func(ns string) bool { 777 if aclObj == nil { 778 // ACLs are disabled; everything is readable 779 return true 780 } 781 782 if readable, ok := readableNamespaces[ns]; ok { 783 // cache hit 784 return readable 785 } 786 787 // cache miss 788 readable := aclObj.AllowNsOp(ns, acl.NamespaceCapabilityReadJob) 789 readableNamespaces[ns] = readable 790 return readable 791 } 792 793 // Verify the arguments 794 if args.NodeID == "" { 795 return fmt.Errorf("missing node ID") 796 } 797 798 // Setup the blocking query 799 opts := blockingOptions{ 800 queryOpts: &args.QueryOptions, 801 queryMeta: &reply.QueryMeta, 802 run: func(ws memdb.WatchSet, state *state.StateStore) error { 803 // Look for the node 804 allocs, err := state.AllocsByNode(ws, args.NodeID) 805 if err != nil { 806 return err 807 } 808 809 // Setup the output 810 if n := len(allocs); n != 0 { 811 reply.Allocs = make([]*structs.Allocation, 0, n) 812 for _, alloc := range allocs { 813 if readNS(alloc.Namespace) { 814 reply.Allocs = append(reply.Allocs, alloc) 815 } 816 817 // Get the max of all allocs since 818 // subsequent requests need to start 819 // from the latest index 820 reply.Index = maxUint64(reply.Index, alloc.ModifyIndex) 821 } 822 } else { 823 reply.Allocs = nil 824 825 // Use the last index that affected the nodes table 826 index, err := state.Index("allocs") 827 if err != nil { 828 return err 829 } 830 831 // Must provide non-zero index to prevent blocking 832 // Index 1 is impossible anyways (due to Raft internals) 833 if index == 0 { 834 reply.Index = 1 835 } else { 836 reply.Index = index 837 } 838 } 839 return nil 840 }} 841 return n.srv.blockingRPC(&opts) 842 } 843 844 // GetClientAllocs is used to request a lightweight list of alloc modify indexes 845 // per allocation. 846 func (n *Node) GetClientAllocs(args *structs.NodeSpecificRequest, 847 reply *structs.NodeClientAllocsResponse) error { 848 if done, err := n.srv.forward("Node.GetClientAllocs", args, args, reply); done { 849 // We have a valid node connection since there is no error from the 850 // forwarded server, so add the mapping to cache the 851 // connection and allow the server to send RPCs to the client. 852 if err == nil && n.ctx != nil && n.ctx.NodeID == "" { 853 n.ctx.NodeID = args.NodeID 854 n.srv.addNodeConn(n.ctx) 855 } 856 857 return err 858 } 859 defer metrics.MeasureSince([]string{"nomad", "client", "get_client_allocs"}, time.Now()) 860 861 // Verify the arguments 862 if args.NodeID == "" { 863 return fmt.Errorf("missing node ID") 864 } 865 866 // numOldAllocs is used to detect if there is a garbage collection event 867 // that effects the node. When an allocation is garbage collected, that does 868 // not change the modify index changes and thus the query won't unblock, 869 // even though the set of allocations on the node has changed. 870 var numOldAllocs int 871 872 // Setup the blocking query 873 opts := blockingOptions{ 874 queryOpts: &args.QueryOptions, 875 queryMeta: &reply.QueryMeta, 876 run: func(ws memdb.WatchSet, state *state.StateStore) error { 877 // Look for the node 878 node, err := state.NodeByID(ws, args.NodeID) 879 if err != nil { 880 return err 881 } 882 883 var allocs []*structs.Allocation 884 if node != nil { 885 if args.SecretID == "" { 886 return fmt.Errorf("missing node secret ID for client status update") 887 } else if args.SecretID != node.SecretID { 888 return fmt.Errorf("node secret ID does not match") 889 } 890 891 // We have a valid node connection, so add the mapping to cache the 892 // connection and allow the server to send RPCs to the client. We only cache 893 // the connection if it is not being forwarded from another server. 894 if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() { 895 n.ctx.NodeID = args.NodeID 896 n.srv.addNodeConn(n.ctx) 897 } 898 899 var err error 900 allocs, err = state.AllocsByNode(ws, args.NodeID) 901 if err != nil { 902 return err 903 } 904 } 905 906 reply.Allocs = make(map[string]uint64) 907 reply.MigrateTokens = make(map[string]string) 908 909 // preferTableIndex is used to determine whether we should build the 910 // response index based on the full table indexes versus the modify 911 // indexes of the allocations on the specific node. This is 912 // preferred in the case that the node doesn't yet have allocations 913 // or when we detect a GC that effects the node. 914 preferTableIndex := true 915 916 // Setup the output 917 if numAllocs := len(allocs); numAllocs != 0 { 918 preferTableIndex = false 919 920 for _, alloc := range allocs { 921 reply.Allocs[alloc.ID] = alloc.AllocModifyIndex 922 923 // If the allocation is going to do a migration, create a 924 // migration token so that the client can authenticate with 925 // the node hosting the previous allocation. 926 if alloc.ShouldMigrate() { 927 prevAllocation, err := state.AllocByID(ws, alloc.PreviousAllocation) 928 if err != nil { 929 return err 930 } 931 932 if prevAllocation != nil && prevAllocation.NodeID != alloc.NodeID { 933 allocNode, err := state.NodeByID(ws, prevAllocation.NodeID) 934 if err != nil { 935 return err 936 } 937 if allocNode == nil { 938 // Node must have been GC'd so skip the token 939 continue 940 } 941 942 token, err := structs.GenerateMigrateToken(prevAllocation.ID, allocNode.SecretID) 943 if err != nil { 944 return err 945 } 946 reply.MigrateTokens[alloc.ID] = token 947 } 948 } 949 950 reply.Index = maxUint64(reply.Index, alloc.ModifyIndex) 951 } 952 953 // Determine if we have less allocations than before. This 954 // indicates there was a garbage collection 955 if numAllocs < numOldAllocs { 956 preferTableIndex = true 957 } 958 959 // Store the new number of allocations 960 numOldAllocs = numAllocs 961 } 962 963 if preferTableIndex { 964 // Use the last index that affected the nodes table 965 index, err := state.Index("allocs") 966 if err != nil { 967 return err 968 } 969 970 // Must provide non-zero index to prevent blocking 971 // Index 1 is impossible anyways (due to Raft internals) 972 if index == 0 { 973 reply.Index = 1 974 } else { 975 reply.Index = index 976 } 977 } 978 return nil 979 }} 980 return n.srv.blockingRPC(&opts) 981 } 982 983 // UpdateAlloc is used to update the client status of an allocation 984 func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.GenericResponse) error { 985 if done, err := n.srv.forward("Node.UpdateAlloc", args, args, reply); done { 986 return err 987 } 988 defer metrics.MeasureSince([]string{"nomad", "client", "update_alloc"}, time.Now()) 989 990 // Ensure at least a single alloc 991 if len(args.Alloc) == 0 { 992 return fmt.Errorf("must update at least one allocation") 993 } 994 995 // Ensure that evals aren't set from client RPCs 996 // We create them here before the raft update 997 if len(args.Evals) != 0 { 998 return fmt.Errorf("evals field must not be set") 999 } 1000 1001 // Update modified timestamp for client initiated allocation updates 1002 now := time.Now() 1003 var evals []*structs.Evaluation 1004 1005 for _, alloc := range args.Alloc { 1006 alloc.ModifyTime = now.UTC().UnixNano() 1007 1008 // Add an evaluation if this is a failed alloc that is eligible for rescheduling 1009 if alloc.ClientStatus == structs.AllocClientStatusFailed { 1010 // Only create evaluations if this is an existing alloc, 1011 // and eligible as per its task group's ReschedulePolicy 1012 if existingAlloc, _ := n.srv.State().AllocByID(nil, alloc.ID); existingAlloc != nil { 1013 job, err := n.srv.State().JobByID(nil, existingAlloc.Namespace, existingAlloc.JobID) 1014 if err != nil { 1015 n.srv.logger.Printf("[ERR] nomad.client: UpdateAlloc unable to find job ID %q :%v", existingAlloc.JobID, err) 1016 continue 1017 } 1018 if job == nil { 1019 n.srv.logger.Printf("[DEBUG] nomad.client: UpdateAlloc unable to find job ID %q", existingAlloc.JobID) 1020 continue 1021 } 1022 taskGroup := job.LookupTaskGroup(existingAlloc.TaskGroup) 1023 if taskGroup != nil && existingAlloc.FollowupEvalID == "" && existingAlloc.RescheduleEligible(taskGroup.ReschedulePolicy, now) { 1024 eval := &structs.Evaluation{ 1025 ID: uuid.Generate(), 1026 Namespace: existingAlloc.Namespace, 1027 TriggeredBy: structs.EvalTriggerRetryFailedAlloc, 1028 JobID: existingAlloc.JobID, 1029 Type: job.Type, 1030 Priority: job.Priority, 1031 Status: structs.EvalStatusPending, 1032 } 1033 evals = append(evals, eval) 1034 } 1035 } 1036 } 1037 } 1038 1039 // Add this to the batch 1040 n.updatesLock.Lock() 1041 n.updates = append(n.updates, args.Alloc...) 1042 n.evals = append(n.evals, evals...) 1043 1044 // Start a new batch if none 1045 future := n.updateFuture 1046 if future == nil { 1047 future = structs.NewBatchFuture() 1048 n.updateFuture = future 1049 n.updateTimer = time.AfterFunc(batchUpdateInterval, func() { 1050 // Get the pending updates 1051 n.updatesLock.Lock() 1052 updates := n.updates 1053 evals := n.evals 1054 future := n.updateFuture 1055 n.updates = nil 1056 n.evals = nil 1057 n.updateFuture = nil 1058 n.updateTimer = nil 1059 n.updatesLock.Unlock() 1060 1061 // Perform the batch update 1062 n.batchUpdate(future, updates, evals) 1063 }) 1064 } 1065 n.updatesLock.Unlock() 1066 1067 // Wait for the future 1068 if err := future.Wait(); err != nil { 1069 return err 1070 } 1071 1072 // Setup the response 1073 reply.Index = future.Index() 1074 return nil 1075 } 1076 1077 // batchUpdate is used to update all the allocations 1078 func (n *Node) batchUpdate(future *structs.BatchFuture, updates []*structs.Allocation, evals []*structs.Evaluation) { 1079 // Group pending evals by jobID to prevent creating unnecessary evals 1080 evalsByJobId := make(map[structs.NamespacedID]struct{}) 1081 var trimmedEvals []*structs.Evaluation 1082 for _, eval := range evals { 1083 namespacedID := structs.NamespacedID{ 1084 ID: eval.JobID, 1085 Namespace: eval.Namespace, 1086 } 1087 _, exists := evalsByJobId[namespacedID] 1088 if !exists { 1089 trimmedEvals = append(trimmedEvals, eval) 1090 evalsByJobId[namespacedID] = struct{}{} 1091 } 1092 } 1093 1094 if len(trimmedEvals) > 0 { 1095 n.srv.logger.Printf("[DEBUG] nomad.client: Adding %v evaluations for rescheduling failed allocations", len(trimmedEvals)) 1096 } 1097 // Prepare the batch update 1098 batch := &structs.AllocUpdateRequest{ 1099 Alloc: updates, 1100 Evals: trimmedEvals, 1101 WriteRequest: structs.WriteRequest{Region: n.srv.config.Region}, 1102 } 1103 1104 // Commit this update via Raft 1105 var mErr multierror.Error 1106 _, index, err := n.srv.raftApply(structs.AllocClientUpdateRequestType, batch) 1107 if err != nil { 1108 n.srv.logger.Printf("[ERR] nomad.client: alloc update failed: %v", err) 1109 mErr.Errors = append(mErr.Errors, err) 1110 } 1111 1112 // For each allocation we are updating check if we should revoke any 1113 // Vault Accessors 1114 var revoke []*structs.VaultAccessor 1115 for _, alloc := range updates { 1116 // Skip any allocation that isn't dead on the client 1117 if !alloc.Terminated() { 1118 continue 1119 } 1120 1121 // Determine if there are any Vault accessors for the allocation 1122 ws := memdb.NewWatchSet() 1123 accessors, err := n.srv.State().VaultAccessorsByAlloc(ws, alloc.ID) 1124 if err != nil { 1125 n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for alloc %q failed: %v", alloc.ID, err) 1126 mErr.Errors = append(mErr.Errors, err) 1127 } 1128 1129 revoke = append(revoke, accessors...) 1130 } 1131 1132 if l := len(revoke); l != 0 { 1133 n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors due to terminal allocations", l) 1134 if err := n.srv.vault.RevokeTokens(context.Background(), revoke, true); err != nil { 1135 n.srv.logger.Printf("[ERR] nomad.client: batched accessor revocation failed: %v", err) 1136 mErr.Errors = append(mErr.Errors, err) 1137 } 1138 } 1139 1140 // Respond to the future 1141 future.Respond(index, mErr.ErrorOrNil()) 1142 } 1143 1144 // List is used to list the available nodes 1145 func (n *Node) List(args *structs.NodeListRequest, 1146 reply *structs.NodeListResponse) error { 1147 if done, err := n.srv.forward("Node.List", args, args, reply); done { 1148 return err 1149 } 1150 defer metrics.MeasureSince([]string{"nomad", "client", "list"}, time.Now()) 1151 1152 // Check node read permissions 1153 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 1154 return err 1155 } else if aclObj != nil && !aclObj.AllowNodeRead() { 1156 return structs.ErrPermissionDenied 1157 } 1158 1159 // Setup the blocking query 1160 opts := blockingOptions{ 1161 queryOpts: &args.QueryOptions, 1162 queryMeta: &reply.QueryMeta, 1163 run: func(ws memdb.WatchSet, state *state.StateStore) error { 1164 // Capture all the nodes 1165 var err error 1166 var iter memdb.ResultIterator 1167 if prefix := args.QueryOptions.Prefix; prefix != "" { 1168 iter, err = state.NodesByIDPrefix(ws, prefix) 1169 } else { 1170 iter, err = state.Nodes(ws) 1171 } 1172 if err != nil { 1173 return err 1174 } 1175 1176 var nodes []*structs.NodeListStub 1177 for { 1178 raw := iter.Next() 1179 if raw == nil { 1180 break 1181 } 1182 node := raw.(*structs.Node) 1183 nodes = append(nodes, node.Stub()) 1184 } 1185 reply.Nodes = nodes 1186 1187 // Use the last index that affected the jobs table 1188 index, err := state.Index("nodes") 1189 if err != nil { 1190 return err 1191 } 1192 reply.Index = index 1193 1194 // Set the query response 1195 n.srv.setQueryMeta(&reply.QueryMeta) 1196 return nil 1197 }} 1198 return n.srv.blockingRPC(&opts) 1199 } 1200 1201 // createNodeEvals is used to create evaluations for each alloc on a node. 1202 // Each Eval is scoped to a job, so we need to potentially trigger many evals. 1203 func (n *Node) createNodeEvals(nodeID string, nodeIndex uint64) ([]string, uint64, error) { 1204 // Snapshot the state 1205 snap, err := n.srv.fsm.State().Snapshot() 1206 if err != nil { 1207 return nil, 0, fmt.Errorf("failed to snapshot state: %v", err) 1208 } 1209 1210 // Find all the allocations for this node 1211 ws := memdb.NewWatchSet() 1212 allocs, err := snap.AllocsByNode(ws, nodeID) 1213 if err != nil { 1214 return nil, 0, fmt.Errorf("failed to find allocs for '%s': %v", nodeID, err) 1215 } 1216 1217 sysJobsIter, err := snap.JobsByScheduler(ws, "system") 1218 if err != nil { 1219 return nil, 0, fmt.Errorf("failed to find system jobs for '%s': %v", nodeID, err) 1220 } 1221 1222 var sysJobs []*structs.Job 1223 for job := sysJobsIter.Next(); job != nil; job = sysJobsIter.Next() { 1224 sysJobs = append(sysJobs, job.(*structs.Job)) 1225 } 1226 1227 // Fast-path if nothing to do 1228 if len(allocs) == 0 && len(sysJobs) == 0 { 1229 return nil, 0, nil 1230 } 1231 1232 // Create an eval for each JobID affected 1233 var evals []*structs.Evaluation 1234 var evalIDs []string 1235 jobIDs := make(map[string]struct{}) 1236 1237 for _, alloc := range allocs { 1238 // Deduplicate on JobID 1239 if _, ok := jobIDs[alloc.JobID]; ok { 1240 continue 1241 } 1242 jobIDs[alloc.JobID] = struct{}{} 1243 1244 // Create a new eval 1245 eval := &structs.Evaluation{ 1246 ID: uuid.Generate(), 1247 Namespace: alloc.Namespace, 1248 Priority: alloc.Job.Priority, 1249 Type: alloc.Job.Type, 1250 TriggeredBy: structs.EvalTriggerNodeUpdate, 1251 JobID: alloc.JobID, 1252 NodeID: nodeID, 1253 NodeModifyIndex: nodeIndex, 1254 Status: structs.EvalStatusPending, 1255 } 1256 evals = append(evals, eval) 1257 evalIDs = append(evalIDs, eval.ID) 1258 } 1259 1260 // Create an evaluation for each system job. 1261 for _, job := range sysJobs { 1262 // Still dedup on JobID as the node may already have the system job. 1263 if _, ok := jobIDs[job.ID]; ok { 1264 continue 1265 } 1266 jobIDs[job.ID] = struct{}{} 1267 1268 // Create a new eval 1269 eval := &structs.Evaluation{ 1270 ID: uuid.Generate(), 1271 Namespace: job.Namespace, 1272 Priority: job.Priority, 1273 Type: job.Type, 1274 TriggeredBy: structs.EvalTriggerNodeUpdate, 1275 JobID: job.ID, 1276 NodeID: nodeID, 1277 NodeModifyIndex: nodeIndex, 1278 Status: structs.EvalStatusPending, 1279 } 1280 evals = append(evals, eval) 1281 evalIDs = append(evalIDs, eval.ID) 1282 } 1283 1284 // Create the Raft transaction 1285 update := &structs.EvalUpdateRequest{ 1286 Evals: evals, 1287 WriteRequest: structs.WriteRequest{Region: n.srv.config.Region}, 1288 } 1289 1290 // Commit this evaluation via Raft 1291 // XXX: There is a risk of partial failure where the node update succeeds 1292 // but that the EvalUpdate does not. 1293 _, evalIndex, err := n.srv.raftApply(structs.EvalUpdateRequestType, update) 1294 if err != nil { 1295 return nil, 0, err 1296 } 1297 return evalIDs, evalIndex, nil 1298 } 1299 1300 // DeriveVaultToken is used by the clients to request wrapped Vault tokens for 1301 // tasks 1302 func (n *Node) DeriveVaultToken(args *structs.DeriveVaultTokenRequest, 1303 reply *structs.DeriveVaultTokenResponse) error { 1304 1305 // setErr is a helper for setting the recoverable error on the reply and 1306 // logging it 1307 setErr := func(e error, recoverable bool) { 1308 if e == nil { 1309 return 1310 } 1311 re, ok := e.(*structs.RecoverableError) 1312 if ok { 1313 // No need to wrap if error is already a RecoverableError 1314 reply.Error = re 1315 } else { 1316 reply.Error = structs.NewRecoverableError(e, recoverable).(*structs.RecoverableError) 1317 } 1318 1319 n.srv.logger.Printf("[ERR] nomad.client: DeriveVaultToken failed (recoverable %v): %v", recoverable, e) 1320 } 1321 1322 if done, err := n.srv.forward("Node.DeriveVaultToken", args, args, reply); done { 1323 setErr(err, structs.IsRecoverable(err) || err == structs.ErrNoLeader) 1324 return nil 1325 } 1326 defer metrics.MeasureSince([]string{"nomad", "client", "derive_vault_token"}, time.Now()) 1327 1328 // Verify the arguments 1329 if args.NodeID == "" { 1330 setErr(fmt.Errorf("missing node ID"), false) 1331 return nil 1332 } 1333 if args.SecretID == "" { 1334 setErr(fmt.Errorf("missing node SecretID"), false) 1335 return nil 1336 } 1337 if args.AllocID == "" { 1338 setErr(fmt.Errorf("missing allocation ID"), false) 1339 return nil 1340 } 1341 if len(args.Tasks) == 0 { 1342 setErr(fmt.Errorf("no tasks specified"), false) 1343 return nil 1344 } 1345 1346 // Verify the following: 1347 // * The Node exists and has the correct SecretID 1348 // * The Allocation exists on the specified node 1349 // * The allocation contains the given tasks and they each require Vault 1350 // tokens 1351 snap, err := n.srv.fsm.State().Snapshot() 1352 if err != nil { 1353 setErr(err, false) 1354 return nil 1355 } 1356 ws := memdb.NewWatchSet() 1357 node, err := snap.NodeByID(ws, args.NodeID) 1358 if err != nil { 1359 setErr(err, false) 1360 return nil 1361 } 1362 if node == nil { 1363 setErr(fmt.Errorf("Node %q does not exist", args.NodeID), false) 1364 return nil 1365 } 1366 if node.SecretID != args.SecretID { 1367 setErr(fmt.Errorf("SecretID mismatch"), false) 1368 return nil 1369 } 1370 1371 alloc, err := snap.AllocByID(ws, args.AllocID) 1372 if err != nil { 1373 setErr(err, false) 1374 return nil 1375 } 1376 if alloc == nil { 1377 setErr(fmt.Errorf("Allocation %q does not exist", args.AllocID), false) 1378 return nil 1379 } 1380 if alloc.NodeID != args.NodeID { 1381 setErr(fmt.Errorf("Allocation %q not running on Node %q", args.AllocID, args.NodeID), false) 1382 return nil 1383 } 1384 if alloc.TerminalStatus() { 1385 setErr(fmt.Errorf("Can't request Vault token for terminal allocation"), false) 1386 return nil 1387 } 1388 1389 // Check the policies 1390 policies := alloc.Job.VaultPolicies() 1391 if policies == nil { 1392 setErr(fmt.Errorf("Job doesn't require Vault policies"), false) 1393 return nil 1394 } 1395 tg, ok := policies[alloc.TaskGroup] 1396 if !ok { 1397 setErr(fmt.Errorf("Task group does not require Vault policies"), false) 1398 return nil 1399 } 1400 1401 var unneeded []string 1402 for _, task := range args.Tasks { 1403 taskVault := tg[task] 1404 if taskVault == nil || len(taskVault.Policies) == 0 { 1405 unneeded = append(unneeded, task) 1406 } 1407 } 1408 1409 if len(unneeded) != 0 { 1410 e := fmt.Errorf("Requested Vault tokens for tasks without defined Vault policies: %s", 1411 strings.Join(unneeded, ", ")) 1412 setErr(e, false) 1413 return nil 1414 } 1415 1416 // At this point the request is valid and we should contact Vault for 1417 // tokens. 1418 1419 // Create an error group where we will spin up a fixed set of goroutines to 1420 // handle deriving tokens but where if any fails the whole group is 1421 // canceled. 1422 g, ctx := errgroup.WithContext(context.Background()) 1423 1424 // Cap the handlers 1425 handlers := len(args.Tasks) 1426 if handlers > maxParallelRequestsPerDerive { 1427 handlers = maxParallelRequestsPerDerive 1428 } 1429 1430 // Create the Vault Tokens 1431 input := make(chan string, handlers) 1432 results := make(map[string]*vapi.Secret, len(args.Tasks)) 1433 for i := 0; i < handlers; i++ { 1434 g.Go(func() error { 1435 for { 1436 select { 1437 case task, ok := <-input: 1438 if !ok { 1439 return nil 1440 } 1441 1442 secret, err := n.srv.vault.CreateToken(ctx, alloc, task) 1443 if err != nil { 1444 return err 1445 } 1446 1447 results[task] = secret 1448 case <-ctx.Done(): 1449 return nil 1450 } 1451 } 1452 }) 1453 } 1454 1455 // Send the input 1456 go func() { 1457 defer close(input) 1458 for _, task := range args.Tasks { 1459 select { 1460 case <-ctx.Done(): 1461 return 1462 case input <- task: 1463 } 1464 } 1465 1466 }() 1467 1468 // Wait for everything to complete or for an error 1469 createErr := g.Wait() 1470 1471 // Retrieve the results 1472 accessors := make([]*structs.VaultAccessor, 0, len(results)) 1473 tokens := make(map[string]string, len(results)) 1474 for task, secret := range results { 1475 w := secret.WrapInfo 1476 tokens[task] = w.Token 1477 accessor := &structs.VaultAccessor{ 1478 Accessor: w.WrappedAccessor, 1479 Task: task, 1480 NodeID: alloc.NodeID, 1481 AllocID: alloc.ID, 1482 CreationTTL: w.TTL, 1483 } 1484 1485 accessors = append(accessors, accessor) 1486 } 1487 1488 // If there was an error revoke the created tokens 1489 if createErr != nil { 1490 n.srv.logger.Printf("[ERR] nomad.node: Vault token creation for alloc %q failed: %v", alloc.ID, createErr) 1491 1492 if revokeErr := n.srv.vault.RevokeTokens(context.Background(), accessors, false); revokeErr != nil { 1493 n.srv.logger.Printf("[ERR] nomad.node: Vault token revocation for alloc %q failed: %v", alloc.ID, revokeErr) 1494 } 1495 1496 if rerr, ok := createErr.(*structs.RecoverableError); ok { 1497 reply.Error = rerr 1498 } else { 1499 reply.Error = structs.NewRecoverableError(createErr, false).(*structs.RecoverableError) 1500 } 1501 1502 return nil 1503 } 1504 1505 // Commit to Raft before returning any of the tokens 1506 req := structs.VaultAccessorsRequest{Accessors: accessors} 1507 _, index, err := n.srv.raftApply(structs.VaultAccessorRegisterRequestType, &req) 1508 if err != nil { 1509 n.srv.logger.Printf("[ERR] nomad.client: Register Vault accessors for alloc %q failed: %v", alloc.ID, err) 1510 1511 // Determine if we can recover from the error 1512 retry := false 1513 switch err { 1514 case raft.ErrNotLeader, raft.ErrLeadershipLost, raft.ErrRaftShutdown, raft.ErrEnqueueTimeout: 1515 retry = true 1516 } 1517 1518 setErr(err, retry) 1519 return nil 1520 } 1521 1522 reply.Index = index 1523 reply.Tasks = tokens 1524 n.srv.setQueryMeta(&reply.QueryMeta) 1525 return nil 1526 } 1527 1528 func (n *Node) EmitEvents(args *structs.EmitNodeEventsRequest, reply *structs.EmitNodeEventsResponse) error { 1529 if done, err := n.srv.forward("Node.EmitEvents", args, args, reply); done { 1530 return err 1531 } 1532 defer metrics.MeasureSince([]string{"nomad", "client", "emit_events"}, time.Now()) 1533 1534 if len(args.NodeEvents) == 0 { 1535 return fmt.Errorf("no node events given") 1536 } 1537 for nodeID, events := range args.NodeEvents { 1538 if len(events) == 0 { 1539 return fmt.Errorf("no node events given for node %q", nodeID) 1540 } 1541 } 1542 1543 _, index, err := n.srv.raftApply(structs.UpsertNodeEventsType, args) 1544 if err != nil { 1545 n.srv.logger.Printf("[ERR] nomad.node upserting node events failed: %v", err) 1546 return err 1547 } 1548 1549 reply.Index = index 1550 return nil 1551 }