github.com/manicqin/nomad@v0.9.5/nomad/node_endpoint.go (about) 1 package nomad 2 3 import ( 4 "context" 5 "fmt" 6 "strings" 7 "sync" 8 "time" 9 10 "golang.org/x/sync/errgroup" 11 12 metrics "github.com/armon/go-metrics" 13 log "github.com/hashicorp/go-hclog" 14 memdb "github.com/hashicorp/go-memdb" 15 multierror "github.com/hashicorp/go-multierror" 16 vapi "github.com/hashicorp/vault/api" 17 18 "github.com/hashicorp/nomad/acl" 19 "github.com/hashicorp/nomad/helper/uuid" 20 "github.com/hashicorp/nomad/nomad/state" 21 "github.com/hashicorp/nomad/nomad/structs" 22 "github.com/hashicorp/raft" 23 ) 24 25 const ( 26 // batchUpdateInterval is how long we wait to batch updates 27 batchUpdateInterval = 50 * time.Millisecond 28 29 // maxParallelRequestsPerDerive is the maximum number of parallel Vault 30 // create token requests that may be outstanding per derive request 31 maxParallelRequestsPerDerive = 16 32 33 // NodeDrainEvents are the various drain messages 34 NodeDrainEventDrainSet = "Node drain strategy set" 35 NodeDrainEventDrainDisabled = "Node drain disabled" 36 NodeDrainEventDrainUpdated = "Node drain stategy updated" 37 38 // NodeEligibilityEventEligible is used when the nodes eligiblity is marked 39 // eligible 40 NodeEligibilityEventEligible = "Node marked as eligible for scheduling" 41 42 // NodeEligibilityEventIneligible is used when the nodes eligiblity is marked 43 // ineligible 44 NodeEligibilityEventIneligible = "Node marked as ineligible for scheduling" 45 46 // NodeHeartbeatEventReregistered is the message used when the node becomes 47 // reregistered by the heartbeat. 48 NodeHeartbeatEventReregistered = "Node reregistered by heartbeat" 49 ) 50 51 // Node endpoint is used for client interactions 52 type Node struct { 53 srv *Server 54 logger log.Logger 55 56 // ctx provides context regarding the underlying connection 57 ctx *RPCContext 58 59 // updates holds pending client status updates for allocations 60 updates []*structs.Allocation 61 62 // evals holds pending rescheduling eval updates triggered by failed allocations 63 evals []*structs.Evaluation 64 65 // updateFuture is used to wait for the pending batch update 66 // to complete. This may be nil if no batch is pending. 67 updateFuture *structs.BatchFuture 68 69 // updateTimer is the timer that will trigger the next batch 70 // update, and may be nil if there is no batch pending. 71 updateTimer *time.Timer 72 73 // updatesLock synchronizes access to the updates list, 74 // the future and the timer. 75 updatesLock sync.Mutex 76 } 77 78 // Register is used to upsert a client that is available for scheduling 79 func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUpdateResponse) error { 80 if done, err := n.srv.forward("Node.Register", args, args, reply); done { 81 // We have a valid node connection since there is no error from the 82 // forwarded server, so add the mapping to cache the 83 // connection and allow the server to send RPCs to the client. 84 if err == nil && n.ctx != nil && n.ctx.NodeID == "" { 85 n.ctx.NodeID = args.Node.ID 86 n.srv.addNodeConn(n.ctx) 87 } 88 89 return err 90 } 91 defer metrics.MeasureSince([]string{"nomad", "client", "register"}, time.Now()) 92 93 if n.srv.config.ACLEnforceNode { 94 // Check noderpc write permissions 95 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 96 return err 97 } else if aclObj != nil && !aclObj.AllowNodeRPCWrite() { 98 return structs.ErrPermissionDenied 99 } 100 } 101 102 // Validate the arguments 103 if args.Node == nil { 104 return fmt.Errorf("missing node for client registration") 105 } 106 if args.Node.ID == "" { 107 return fmt.Errorf("missing node ID for client registration") 108 } 109 if args.Node.Datacenter == "" { 110 return fmt.Errorf("missing datacenter for client registration") 111 } 112 if args.Node.Name == "" { 113 return fmt.Errorf("missing node name for client registration") 114 } 115 if len(args.Node.Attributes) == 0 { 116 return fmt.Errorf("missing attributes for client registration") 117 } 118 if args.Node.SecretID == "" { 119 return fmt.Errorf("missing node secret ID for client registration") 120 } 121 122 // Default the status if none is given 123 if args.Node.Status == "" { 124 args.Node.Status = structs.NodeStatusInit 125 } 126 if !structs.ValidNodeStatus(args.Node.Status) { 127 return fmt.Errorf("invalid status for node") 128 } 129 130 // Default to eligible for scheduling if unset 131 if args.Node.SchedulingEligibility == "" { 132 args.Node.SchedulingEligibility = structs.NodeSchedulingEligible 133 } 134 135 // Set the timestamp when the node is registered 136 args.Node.StatusUpdatedAt = time.Now().Unix() 137 138 // Compute the node class 139 if err := args.Node.ComputeClass(); err != nil { 140 return fmt.Errorf("failed to computed node class: %v", err) 141 } 142 143 // Look for the node so we can detect a state transition 144 snap, err := n.srv.fsm.State().Snapshot() 145 if err != nil { 146 return err 147 } 148 149 ws := memdb.NewWatchSet() 150 originalNode, err := snap.NodeByID(ws, args.Node.ID) 151 if err != nil { 152 return err 153 } 154 155 // Check if the SecretID has been tampered with 156 if originalNode != nil { 157 if args.Node.SecretID != originalNode.SecretID && originalNode.SecretID != "" { 158 return fmt.Errorf("node secret ID does not match. Not registering node.") 159 } 160 } 161 162 // We have a valid node connection, so add the mapping to cache the 163 // connection and allow the server to send RPCs to the client. We only cache 164 // the connection if it is not being forwarded from another server. 165 if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() { 166 n.ctx.NodeID = args.Node.ID 167 n.srv.addNodeConn(n.ctx) 168 } 169 170 // Commit this update via Raft 171 _, index, err := n.srv.raftApply(structs.NodeRegisterRequestType, args) 172 if err != nil { 173 n.logger.Error("register failed", "error", err) 174 return err 175 } 176 reply.NodeModifyIndex = index 177 178 // Check if we should trigger evaluations 179 originalStatus := structs.NodeStatusInit 180 if originalNode != nil { 181 originalStatus = originalNode.Status 182 } 183 transitionToReady := transitionedToReady(args.Node.Status, originalStatus) 184 if structs.ShouldDrainNode(args.Node.Status) || transitionToReady { 185 evalIDs, evalIndex, err := n.createNodeEvals(args.Node.ID, index) 186 if err != nil { 187 n.logger.Error("eval creation failed", "error", err) 188 return err 189 } 190 reply.EvalIDs = evalIDs 191 reply.EvalCreateIndex = evalIndex 192 } 193 194 // Check if we need to setup a heartbeat 195 if !args.Node.TerminalStatus() { 196 ttl, err := n.srv.resetHeartbeatTimer(args.Node.ID) 197 if err != nil { 198 n.logger.Error("heartbeat reset failed", "error", err) 199 return err 200 } 201 reply.HeartbeatTTL = ttl 202 } 203 204 // Set the reply index 205 reply.Index = index 206 snap, err = n.srv.fsm.State().Snapshot() 207 if err != nil { 208 return err 209 } 210 211 n.srv.peerLock.RLock() 212 defer n.srv.peerLock.RUnlock() 213 if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { 214 n.logger.Error("failed to populate NodeUpdateResponse", "error", err) 215 return err 216 } 217 218 return nil 219 } 220 221 // updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading. 222 func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error { 223 reply.LeaderRPCAddr = string(n.srv.raft.Leader()) 224 225 // Reply with config information required for future RPC requests 226 reply.Servers = make([]*structs.NodeServerInfo, 0, len(n.srv.localPeers)) 227 for _, v := range n.srv.localPeers { 228 reply.Servers = append(reply.Servers, 229 &structs.NodeServerInfo{ 230 RPCAdvertiseAddr: v.RPCAddr.String(), 231 RPCMajorVersion: int32(v.MajorVersion), 232 RPCMinorVersion: int32(v.MinorVersion), 233 Datacenter: v.Datacenter, 234 }) 235 } 236 237 // TODO(sean@): Use an indexed node count instead 238 // 239 // Snapshot is used only to iterate over all nodes to create a node 240 // count to send back to Nomad Clients in their heartbeat so Clients 241 // can estimate the size of the cluster. 242 ws := memdb.NewWatchSet() 243 iter, err := snap.Nodes(ws) 244 if err == nil { 245 for { 246 raw := iter.Next() 247 if raw == nil { 248 break 249 } 250 reply.NumNodes++ 251 } 252 } 253 254 return nil 255 } 256 257 // Deregister is used to remove a client from the cluster. If a client should 258 // just be made unavailable for scheduling, a status update is preferred. 259 func (n *Node) Deregister(args *structs.NodeDeregisterRequest, reply *structs.NodeUpdateResponse) error { 260 if done, err := n.srv.forward("Node.Deregister", args, args, reply); done { 261 return err 262 } 263 defer metrics.MeasureSince([]string{"nomad", "client", "deregister"}, time.Now()) 264 265 if args.NodeID == "" { 266 return fmt.Errorf("missing node ID for client deregistration") 267 } 268 269 // deregister takes a batch 270 repack := &structs.NodeBatchDeregisterRequest{ 271 NodeIDs: []string{args.NodeID}, 272 WriteRequest: args.WriteRequest, 273 } 274 275 return n.deregister(repack, reply, func() (interface{}, uint64, error) { 276 return n.srv.raftApply(structs.NodeDeregisterRequestType, args) 277 }) 278 } 279 280 // BatchDeregister is used to remove client nodes from the cluster. 281 func (n *Node) BatchDeregister(args *structs.NodeBatchDeregisterRequest, reply *structs.NodeUpdateResponse) error { 282 if done, err := n.srv.forward("Node.BatchDeregister", args, args, reply); done { 283 return err 284 } 285 defer metrics.MeasureSince([]string{"nomad", "client", "batch_deregister"}, time.Now()) 286 287 if len(args.NodeIDs) == 0 { 288 return fmt.Errorf("missing node IDs for client deregistration") 289 } 290 291 return n.deregister(args, reply, func() (interface{}, uint64, error) { 292 return n.srv.raftApply(structs.NodeBatchDeregisterRequestType, args) 293 }) 294 } 295 296 // deregister takes a raftMessage closure, to support both Deregister and BatchDeregister 297 func (n *Node) deregister(args *structs.NodeBatchDeregisterRequest, 298 reply *structs.NodeUpdateResponse, 299 raftApplyFn func() (interface{}, uint64, error), 300 ) error { 301 // Check request permissions 302 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 303 return err 304 } else if aclObj != nil && !aclObj.AllowNodeWrite() { 305 return structs.ErrPermissionDenied 306 } 307 308 // Look for the node 309 snap, err := n.srv.fsm.State().Snapshot() 310 if err != nil { 311 return err 312 } 313 314 ws := memdb.NewWatchSet() 315 for _, nodeID := range args.NodeIDs { 316 node, err := snap.NodeByID(ws, nodeID) 317 if err != nil { 318 return err 319 } 320 if node == nil { 321 return fmt.Errorf("node not found") 322 } 323 } 324 325 // Commit this update via Raft 326 _, index, err := raftApplyFn() 327 if err != nil { 328 n.logger.Error("raft message failed", "error", err) 329 return err 330 } 331 332 for _, nodeID := range args.NodeIDs { 333 // Clear the heartbeat timer if any 334 n.srv.clearHeartbeatTimer(nodeID) 335 336 // Create the evaluations for this node 337 evalIDs, evalIndex, err := n.createNodeEvals(nodeID, index) 338 if err != nil { 339 n.logger.Error("eval creation failed", "error", err) 340 return err 341 } 342 343 // Determine if there are any Vault accessors on the node 344 accessors, err := snap.VaultAccessorsByNode(ws, nodeID) 345 if err != nil { 346 n.logger.Error("looking up accessors for node failed", "node_id", nodeID, "error", err) 347 return err 348 } 349 350 if l := len(accessors); l != 0 { 351 n.logger.Debug("revoking accessors on node due to deregister", "num_accessors", l, "node_id", nodeID) 352 if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil { 353 n.logger.Error("revoking accessors for node failed", "node_id", nodeID, "error", err) 354 return err 355 } 356 } 357 358 reply.EvalIDs = append(reply.EvalIDs, evalIDs...) 359 // Set the reply eval create index just the first time 360 if reply.EvalCreateIndex == 0 { 361 reply.EvalCreateIndex = evalIndex 362 } 363 } 364 365 reply.NodeModifyIndex = index 366 reply.Index = index 367 return nil 368 } 369 370 // UpdateStatus is used to update the status of a client node 371 func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *structs.NodeUpdateResponse) error { 372 if done, err := n.srv.forward("Node.UpdateStatus", args, args, reply); done { 373 // We have a valid node connection since there is no error from the 374 // forwarded server, so add the mapping to cache the 375 // connection and allow the server to send RPCs to the client. 376 if err == nil && n.ctx != nil && n.ctx.NodeID == "" { 377 n.ctx.NodeID = args.NodeID 378 n.srv.addNodeConn(n.ctx) 379 } 380 381 return err 382 } 383 384 defer metrics.MeasureSince([]string{"nomad", "client", "update_status"}, time.Now()) 385 386 // Verify the arguments 387 if args.NodeID == "" { 388 return fmt.Errorf("missing node ID for client status update") 389 } 390 if !structs.ValidNodeStatus(args.Status) { 391 return fmt.Errorf("invalid status for node") 392 } 393 394 // Look for the node 395 snap, err := n.srv.fsm.State().Snapshot() 396 if err != nil { 397 return err 398 } 399 400 ws := memdb.NewWatchSet() 401 node, err := snap.NodeByID(ws, args.NodeID) 402 if err != nil { 403 return err 404 } 405 if node == nil { 406 return fmt.Errorf("node not found") 407 } 408 409 // We have a valid node connection, so add the mapping to cache the 410 // connection and allow the server to send RPCs to the client. We only cache 411 // the connection if it is not being forwarded from another server. 412 if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() { 413 n.ctx.NodeID = args.NodeID 414 n.srv.addNodeConn(n.ctx) 415 } 416 417 // XXX: Could use the SecretID here but have to update the heartbeat system 418 // to track SecretIDs. 419 420 // Update the timestamp of when the node status was updated 421 args.UpdatedAt = time.Now().Unix() 422 423 // Commit this update via Raft 424 var index uint64 425 if node.Status != args.Status { 426 // Attach an event if we are updating the node status to ready when it 427 // is down via a heartbeat 428 if node.Status == structs.NodeStatusDown && args.NodeEvent == nil { 429 args.NodeEvent = structs.NewNodeEvent(). 430 SetSubsystem(structs.NodeEventSubsystemCluster). 431 SetMessage(NodeHeartbeatEventReregistered) 432 } 433 434 _, index, err = n.srv.raftApply(structs.NodeUpdateStatusRequestType, args) 435 if err != nil { 436 n.logger.Error("status update failed", "error", err) 437 return err 438 } 439 reply.NodeModifyIndex = index 440 } 441 442 // Check if we should trigger evaluations 443 transitionToReady := transitionedToReady(args.Status, node.Status) 444 if structs.ShouldDrainNode(args.Status) || transitionToReady { 445 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) 446 if err != nil { 447 n.logger.Error("eval creation failed", "error", err) 448 return err 449 } 450 reply.EvalIDs = evalIDs 451 reply.EvalCreateIndex = evalIndex 452 } 453 454 // Check if we need to setup a heartbeat 455 switch args.Status { 456 case structs.NodeStatusDown: 457 // Determine if there are any Vault accessors on the node 458 accessors, err := n.srv.State().VaultAccessorsByNode(ws, args.NodeID) 459 if err != nil { 460 n.logger.Error("looking up accessors for node failed", "node_id", args.NodeID, "error", err) 461 return err 462 } 463 464 if l := len(accessors); l != 0 { 465 n.logger.Debug("revoking accessors on node due to down state", "num_accessors", l, "node_id", args.NodeID) 466 if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil { 467 n.logger.Error("revoking accessors for node failed", "node_id", args.NodeID, "error", err) 468 return err 469 } 470 } 471 default: 472 ttl, err := n.srv.resetHeartbeatTimer(args.NodeID) 473 if err != nil { 474 n.logger.Error("heartbeat reset failed", "error", err) 475 return err 476 } 477 reply.HeartbeatTTL = ttl 478 } 479 480 // Set the reply index and leader 481 reply.Index = index 482 n.srv.peerLock.RLock() 483 defer n.srv.peerLock.RUnlock() 484 if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { 485 n.logger.Error("failed to populate NodeUpdateResponse", "error", err) 486 return err 487 } 488 489 return nil 490 } 491 492 // transitionedToReady is a helper that takes a nodes new and old status and 493 // returns whether it has transitioned to ready. 494 func transitionedToReady(newStatus, oldStatus string) bool { 495 initToReady := oldStatus == structs.NodeStatusInit && newStatus == structs.NodeStatusReady 496 terminalToReady := oldStatus == structs.NodeStatusDown && newStatus == structs.NodeStatusReady 497 return initToReady || terminalToReady 498 } 499 500 // UpdateDrain is used to update the drain mode of a client node 501 func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest, 502 reply *structs.NodeDrainUpdateResponse) error { 503 if done, err := n.srv.forward("Node.UpdateDrain", args, args, reply); done { 504 return err 505 } 506 defer metrics.MeasureSince([]string{"nomad", "client", "update_drain"}, time.Now()) 507 508 // Check node write permissions 509 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 510 return err 511 } else if aclObj != nil && !aclObj.AllowNodeWrite() { 512 return structs.ErrPermissionDenied 513 } 514 515 // Verify the arguments 516 if args.NodeID == "" { 517 return fmt.Errorf("missing node ID for drain update") 518 } 519 if args.NodeEvent != nil { 520 return fmt.Errorf("node event must not be set") 521 } 522 523 // Look for the node 524 snap, err := n.srv.fsm.State().Snapshot() 525 if err != nil { 526 return err 527 } 528 node, err := snap.NodeByID(nil, args.NodeID) 529 if err != nil { 530 return err 531 } 532 if node == nil { 533 return fmt.Errorf("node not found") 534 } 535 536 now := time.Now().UTC() 537 538 // Update the timestamp of when the node status was updated 539 args.UpdatedAt = now.Unix() 540 541 // COMPAT: Remove in 0.9. Attempt to upgrade the request if it is of the old 542 // format. 543 if args.Drain && args.DrainStrategy == nil { 544 args.DrainStrategy = &structs.DrainStrategy{ 545 DrainSpec: structs.DrainSpec{ 546 Deadline: -1 * time.Second, // Force drain 547 }, 548 } 549 } 550 551 // Setup drain strategy 552 if args.DrainStrategy != nil { 553 // Mark start time for the drain 554 if node.DrainStrategy == nil { 555 args.DrainStrategy.StartedAt = now 556 } else { 557 args.DrainStrategy.StartedAt = node.DrainStrategy.StartedAt 558 } 559 560 // Mark the deadline time 561 if args.DrainStrategy.Deadline.Nanoseconds() > 0 { 562 args.DrainStrategy.ForceDeadline = now.Add(args.DrainStrategy.Deadline) 563 } 564 } 565 566 // Construct the node event 567 args.NodeEvent = structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemDrain) 568 if node.DrainStrategy == nil && args.DrainStrategy != nil { 569 args.NodeEvent.SetMessage(NodeDrainEventDrainSet) 570 } else if node.DrainStrategy != nil && args.DrainStrategy != nil { 571 args.NodeEvent.SetMessage(NodeDrainEventDrainUpdated) 572 } else if node.DrainStrategy != nil && args.DrainStrategy == nil { 573 args.NodeEvent.SetMessage(NodeDrainEventDrainDisabled) 574 } else { 575 args.NodeEvent = nil 576 } 577 578 // Commit this update via Raft 579 _, index, err := n.srv.raftApply(structs.NodeUpdateDrainRequestType, args) 580 if err != nil { 581 n.logger.Error("drain update failed", "error", err) 582 return err 583 } 584 reply.NodeModifyIndex = index 585 586 // If the node is transitioning to be eligible, create Node evaluations 587 // because there may be a System job registered that should be evaluated. 588 if node.SchedulingEligibility == structs.NodeSchedulingIneligible && args.MarkEligible && args.DrainStrategy == nil { 589 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) 590 if err != nil { 591 n.logger.Error("eval creation failed", "error", err) 592 return err 593 } 594 reply.EvalIDs = evalIDs 595 reply.EvalCreateIndex = evalIndex 596 } 597 598 // Set the reply index 599 reply.Index = index 600 return nil 601 } 602 603 // UpdateEligibility is used to update the scheduling eligibility of a node 604 func (n *Node) UpdateEligibility(args *structs.NodeUpdateEligibilityRequest, 605 reply *structs.NodeEligibilityUpdateResponse) error { 606 if done, err := n.srv.forward("Node.UpdateEligibility", args, args, reply); done { 607 return err 608 } 609 defer metrics.MeasureSince([]string{"nomad", "client", "update_eligibility"}, time.Now()) 610 611 // Check node write permissions 612 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 613 return err 614 } else if aclObj != nil && !aclObj.AllowNodeWrite() { 615 return structs.ErrPermissionDenied 616 } 617 618 // Verify the arguments 619 if args.NodeID == "" { 620 return fmt.Errorf("missing node ID for setting scheduling eligibility") 621 } 622 if args.NodeEvent != nil { 623 return fmt.Errorf("node event must not be set") 624 } 625 626 // Check that only allowed types are set 627 switch args.Eligibility { 628 case structs.NodeSchedulingEligible, structs.NodeSchedulingIneligible: 629 default: 630 return fmt.Errorf("invalid scheduling eligibility %q", args.Eligibility) 631 } 632 633 // Look for the node 634 snap, err := n.srv.fsm.State().Snapshot() 635 if err != nil { 636 return err 637 } 638 node, err := snap.NodeByID(nil, args.NodeID) 639 if err != nil { 640 return err 641 } 642 if node == nil { 643 return fmt.Errorf("node not found") 644 } 645 646 if node.DrainStrategy != nil && args.Eligibility == structs.NodeSchedulingEligible { 647 return fmt.Errorf("can not set node's scheduling eligibility to eligible while it is draining") 648 } 649 650 switch args.Eligibility { 651 case structs.NodeSchedulingEligible, structs.NodeSchedulingIneligible: 652 default: 653 return fmt.Errorf("invalid scheduling eligibility %q", args.Eligibility) 654 } 655 656 // Update the timestamp of when the node status was updated 657 args.UpdatedAt = time.Now().Unix() 658 659 // Construct the node event 660 args.NodeEvent = structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster) 661 if node.SchedulingEligibility == args.Eligibility { 662 return nil // Nothing to do 663 } else if args.Eligibility == structs.NodeSchedulingEligible { 664 args.NodeEvent.SetMessage(NodeEligibilityEventEligible) 665 } else { 666 args.NodeEvent.SetMessage(NodeEligibilityEventIneligible) 667 } 668 669 // Commit this update via Raft 670 outErr, index, err := n.srv.raftApply(structs.NodeUpdateEligibilityRequestType, args) 671 if err != nil { 672 n.logger.Error("eligibility update failed", "error", err) 673 return err 674 } 675 if outErr != nil { 676 if err, ok := outErr.(error); ok && err != nil { 677 n.logger.Error("eligibility update failed", "error", err) 678 return err 679 } 680 } 681 682 // If the node is transitioning to be eligible, create Node evaluations 683 // because there may be a System job registered that should be evaluated. 684 if node.SchedulingEligibility == structs.NodeSchedulingIneligible && args.Eligibility == structs.NodeSchedulingEligible { 685 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index) 686 if err != nil { 687 n.logger.Error("eval creation failed", "error", err) 688 return err 689 } 690 reply.EvalIDs = evalIDs 691 reply.EvalCreateIndex = evalIndex 692 } 693 694 // Set the reply index 695 reply.Index = index 696 return nil 697 } 698 699 // Evaluate is used to force a re-evaluation of the node 700 func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUpdateResponse) error { 701 if done, err := n.srv.forward("Node.Evaluate", args, args, reply); done { 702 return err 703 } 704 defer metrics.MeasureSince([]string{"nomad", "client", "evaluate"}, time.Now()) 705 706 // Check node write permissions 707 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 708 return err 709 } else if aclObj != nil && !aclObj.AllowNodeWrite() { 710 return structs.ErrPermissionDenied 711 } 712 713 // Verify the arguments 714 if args.NodeID == "" { 715 return fmt.Errorf("missing node ID for evaluation") 716 } 717 718 // Look for the node 719 snap, err := n.srv.fsm.State().Snapshot() 720 if err != nil { 721 return err 722 } 723 ws := memdb.NewWatchSet() 724 node, err := snap.NodeByID(ws, args.NodeID) 725 if err != nil { 726 return err 727 } 728 if node == nil { 729 return fmt.Errorf("node not found") 730 } 731 732 // Create the evaluation 733 evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, node.ModifyIndex) 734 if err != nil { 735 n.logger.Error("eval creation failed", "error", err) 736 return err 737 } 738 reply.EvalIDs = evalIDs 739 reply.EvalCreateIndex = evalIndex 740 741 // Set the reply index 742 reply.Index = evalIndex 743 744 n.srv.peerLock.RLock() 745 defer n.srv.peerLock.RUnlock() 746 if err := n.constructNodeServerInfoResponse(snap, reply); err != nil { 747 n.logger.Error("failed to populate NodeUpdateResponse", "error", err) 748 return err 749 } 750 return nil 751 } 752 753 // GetNode is used to request information about a specific node 754 func (n *Node) GetNode(args *structs.NodeSpecificRequest, 755 reply *structs.SingleNodeResponse) error { 756 if done, err := n.srv.forward("Node.GetNode", args, args, reply); done { 757 return err 758 } 759 defer metrics.MeasureSince([]string{"nomad", "client", "get_node"}, time.Now()) 760 761 // Check node read permissions 762 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 763 // If ResolveToken had an unexpected error return that 764 if err != structs.ErrTokenNotFound { 765 return err 766 } 767 768 // Attempt to lookup AuthToken as a Node.SecretID since nodes 769 // call this endpoint and don't have an ACL token. 770 node, stateErr := n.srv.fsm.State().NodeBySecretID(nil, args.AuthToken) 771 if stateErr != nil { 772 // Return the original ResolveToken error with this err 773 var merr multierror.Error 774 merr.Errors = append(merr.Errors, err, stateErr) 775 return merr.ErrorOrNil() 776 } 777 778 // Not a node or a valid ACL token 779 if node == nil { 780 return structs.ErrTokenNotFound 781 } 782 } else if aclObj != nil && !aclObj.AllowNodeRead() { 783 return structs.ErrPermissionDenied 784 } 785 786 // Setup the blocking query 787 opts := blockingOptions{ 788 queryOpts: &args.QueryOptions, 789 queryMeta: &reply.QueryMeta, 790 run: func(ws memdb.WatchSet, state *state.StateStore) error { 791 // Verify the arguments 792 if args.NodeID == "" { 793 return fmt.Errorf("missing node ID") 794 } 795 796 // Look for the node 797 out, err := state.NodeByID(ws, args.NodeID) 798 if err != nil { 799 return err 800 } 801 802 // Setup the output 803 if out != nil { 804 // Clear the secret ID 805 reply.Node = out.Copy() 806 reply.Node.SecretID = "" 807 reply.Index = out.ModifyIndex 808 } else { 809 // Use the last index that affected the nodes table 810 index, err := state.Index("nodes") 811 if err != nil { 812 return err 813 } 814 reply.Node = nil 815 reply.Index = index 816 } 817 818 // Set the query response 819 n.srv.setQueryMeta(&reply.QueryMeta) 820 return nil 821 }} 822 return n.srv.blockingRPC(&opts) 823 } 824 825 // GetAllocs is used to request allocations for a specific node 826 func (n *Node) GetAllocs(args *structs.NodeSpecificRequest, 827 reply *structs.NodeAllocsResponse) error { 828 if done, err := n.srv.forward("Node.GetAllocs", args, args, reply); done { 829 return err 830 } 831 defer metrics.MeasureSince([]string{"nomad", "client", "get_allocs"}, time.Now()) 832 833 // Check node read and namespace job read permissions 834 aclObj, err := n.srv.ResolveToken(args.AuthToken) 835 if err != nil { 836 return err 837 } 838 if aclObj != nil && !aclObj.AllowNodeRead() { 839 return structs.ErrPermissionDenied 840 } 841 842 // cache namespace perms 843 readableNamespaces := map[string]bool{} 844 845 // readNS is a caching namespace read-job helper 846 readNS := func(ns string) bool { 847 if aclObj == nil { 848 // ACLs are disabled; everything is readable 849 return true 850 } 851 852 if readable, ok := readableNamespaces[ns]; ok { 853 // cache hit 854 return readable 855 } 856 857 // cache miss 858 readable := aclObj.AllowNsOp(ns, acl.NamespaceCapabilityReadJob) 859 readableNamespaces[ns] = readable 860 return readable 861 } 862 863 // Verify the arguments 864 if args.NodeID == "" { 865 return fmt.Errorf("missing node ID") 866 } 867 868 // Setup the blocking query 869 opts := blockingOptions{ 870 queryOpts: &args.QueryOptions, 871 queryMeta: &reply.QueryMeta, 872 run: func(ws memdb.WatchSet, state *state.StateStore) error { 873 // Look for the node 874 allocs, err := state.AllocsByNode(ws, args.NodeID) 875 if err != nil { 876 return err 877 } 878 879 // Setup the output 880 if n := len(allocs); n != 0 { 881 reply.Allocs = make([]*structs.Allocation, 0, n) 882 for _, alloc := range allocs { 883 if readNS(alloc.Namespace) { 884 reply.Allocs = append(reply.Allocs, alloc) 885 } 886 887 // Get the max of all allocs since 888 // subsequent requests need to start 889 // from the latest index 890 reply.Index = maxUint64(reply.Index, alloc.ModifyIndex) 891 } 892 } else { 893 reply.Allocs = nil 894 895 // Use the last index that affected the nodes table 896 index, err := state.Index("allocs") 897 if err != nil { 898 return err 899 } 900 901 // Must provide non-zero index to prevent blocking 902 // Index 1 is impossible anyways (due to Raft internals) 903 if index == 0 { 904 reply.Index = 1 905 } else { 906 reply.Index = index 907 } 908 } 909 return nil 910 }} 911 return n.srv.blockingRPC(&opts) 912 } 913 914 // GetClientAllocs is used to request a lightweight list of alloc modify indexes 915 // per allocation. 916 func (n *Node) GetClientAllocs(args *structs.NodeSpecificRequest, 917 reply *structs.NodeClientAllocsResponse) error { 918 if done, err := n.srv.forward("Node.GetClientAllocs", args, args, reply); done { 919 // We have a valid node connection since there is no error from the 920 // forwarded server, so add the mapping to cache the 921 // connection and allow the server to send RPCs to the client. 922 if err == nil && n.ctx != nil && n.ctx.NodeID == "" { 923 n.ctx.NodeID = args.NodeID 924 n.srv.addNodeConn(n.ctx) 925 } 926 927 return err 928 } 929 defer metrics.MeasureSince([]string{"nomad", "client", "get_client_allocs"}, time.Now()) 930 931 // Verify the arguments 932 if args.NodeID == "" { 933 return fmt.Errorf("missing node ID") 934 } 935 936 // numOldAllocs is used to detect if there is a garbage collection event 937 // that effects the node. When an allocation is garbage collected, that does 938 // not change the modify index changes and thus the query won't unblock, 939 // even though the set of allocations on the node has changed. 940 var numOldAllocs int 941 942 // Setup the blocking query 943 opts := blockingOptions{ 944 queryOpts: &args.QueryOptions, 945 queryMeta: &reply.QueryMeta, 946 run: func(ws memdb.WatchSet, state *state.StateStore) error { 947 // Look for the node 948 node, err := state.NodeByID(ws, args.NodeID) 949 if err != nil { 950 return err 951 } 952 953 var allocs []*structs.Allocation 954 if node != nil { 955 if args.SecretID == "" { 956 return fmt.Errorf("missing node secret ID for client status update") 957 } else if args.SecretID != node.SecretID { 958 return fmt.Errorf("node secret ID does not match") 959 } 960 961 // We have a valid node connection, so add the mapping to cache the 962 // connection and allow the server to send RPCs to the client. We only cache 963 // the connection if it is not being forwarded from another server. 964 if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() { 965 n.ctx.NodeID = args.NodeID 966 n.srv.addNodeConn(n.ctx) 967 } 968 969 var err error 970 allocs, err = state.AllocsByNode(ws, args.NodeID) 971 if err != nil { 972 return err 973 } 974 } 975 976 reply.Allocs = make(map[string]uint64) 977 reply.MigrateTokens = make(map[string]string) 978 979 // preferTableIndex is used to determine whether we should build the 980 // response index based on the full table indexes versus the modify 981 // indexes of the allocations on the specific node. This is 982 // preferred in the case that the node doesn't yet have allocations 983 // or when we detect a GC that effects the node. 984 preferTableIndex := true 985 986 // Setup the output 987 if numAllocs := len(allocs); numAllocs != 0 { 988 preferTableIndex = false 989 990 for _, alloc := range allocs { 991 reply.Allocs[alloc.ID] = alloc.AllocModifyIndex 992 993 // If the allocation is going to do a migration, create a 994 // migration token so that the client can authenticate with 995 // the node hosting the previous allocation. 996 if alloc.ShouldMigrate() { 997 prevAllocation, err := state.AllocByID(ws, alloc.PreviousAllocation) 998 if err != nil { 999 return err 1000 } 1001 1002 if prevAllocation != nil && prevAllocation.NodeID != alloc.NodeID { 1003 allocNode, err := state.NodeByID(ws, prevAllocation.NodeID) 1004 if err != nil { 1005 return err 1006 } 1007 if allocNode == nil { 1008 // Node must have been GC'd so skip the token 1009 continue 1010 } 1011 1012 token, err := structs.GenerateMigrateToken(prevAllocation.ID, allocNode.SecretID) 1013 if err != nil { 1014 return err 1015 } 1016 reply.MigrateTokens[alloc.ID] = token 1017 } 1018 } 1019 1020 reply.Index = maxUint64(reply.Index, alloc.ModifyIndex) 1021 } 1022 1023 // Determine if we have less allocations than before. This 1024 // indicates there was a garbage collection 1025 if numAllocs < numOldAllocs { 1026 preferTableIndex = true 1027 } 1028 1029 // Store the new number of allocations 1030 numOldAllocs = numAllocs 1031 } 1032 1033 if preferTableIndex { 1034 // Use the last index that affected the nodes table 1035 index, err := state.Index("allocs") 1036 if err != nil { 1037 return err 1038 } 1039 1040 // Must provide non-zero index to prevent blocking 1041 // Index 1 is impossible anyways (due to Raft internals) 1042 if index == 0 { 1043 reply.Index = 1 1044 } else { 1045 reply.Index = index 1046 } 1047 } 1048 return nil 1049 }} 1050 return n.srv.blockingRPC(&opts) 1051 } 1052 1053 // UpdateAlloc is used to update the client status of an allocation 1054 func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.GenericResponse) error { 1055 if done, err := n.srv.forward("Node.UpdateAlloc", args, args, reply); done { 1056 return err 1057 } 1058 defer metrics.MeasureSince([]string{"nomad", "client", "update_alloc"}, time.Now()) 1059 1060 // Ensure at least a single alloc 1061 if len(args.Alloc) == 0 { 1062 return fmt.Errorf("must update at least one allocation") 1063 } 1064 1065 // Ensure that evals aren't set from client RPCs 1066 // We create them here before the raft update 1067 if len(args.Evals) != 0 { 1068 return fmt.Errorf("evals field must not be set") 1069 } 1070 1071 // Update modified timestamp for client initiated allocation updates 1072 now := time.Now() 1073 var evals []*structs.Evaluation 1074 1075 for _, alloc := range args.Alloc { 1076 alloc.ModifyTime = now.UTC().UnixNano() 1077 1078 // Add an evaluation if this is a failed alloc that is eligible for rescheduling 1079 if alloc.ClientStatus == structs.AllocClientStatusFailed { 1080 // Only create evaluations if this is an existing alloc, 1081 // and eligible as per its task group's ReschedulePolicy 1082 if existingAlloc, _ := n.srv.State().AllocByID(nil, alloc.ID); existingAlloc != nil { 1083 job, err := n.srv.State().JobByID(nil, existingAlloc.Namespace, existingAlloc.JobID) 1084 if err != nil { 1085 n.logger.Error("UpdateAlloc unable to find job", "job", existingAlloc.JobID, "error", err) 1086 continue 1087 } 1088 if job == nil { 1089 n.logger.Debug("UpdateAlloc unable to find job", "job", existingAlloc.JobID) 1090 continue 1091 } 1092 taskGroup := job.LookupTaskGroup(existingAlloc.TaskGroup) 1093 if taskGroup != nil && existingAlloc.FollowupEvalID == "" && existingAlloc.RescheduleEligible(taskGroup.ReschedulePolicy, now) { 1094 eval := &structs.Evaluation{ 1095 ID: uuid.Generate(), 1096 Namespace: existingAlloc.Namespace, 1097 TriggeredBy: structs.EvalTriggerRetryFailedAlloc, 1098 JobID: existingAlloc.JobID, 1099 Type: job.Type, 1100 Priority: job.Priority, 1101 Status: structs.EvalStatusPending, 1102 CreateTime: now.UTC().UnixNano(), 1103 ModifyTime: now.UTC().UnixNano(), 1104 } 1105 evals = append(evals, eval) 1106 } 1107 } 1108 } 1109 } 1110 1111 // Add this to the batch 1112 n.updatesLock.Lock() 1113 n.updates = append(n.updates, args.Alloc...) 1114 n.evals = append(n.evals, evals...) 1115 1116 // Start a new batch if none 1117 future := n.updateFuture 1118 if future == nil { 1119 future = structs.NewBatchFuture() 1120 n.updateFuture = future 1121 n.updateTimer = time.AfterFunc(batchUpdateInterval, func() { 1122 // Get the pending updates 1123 n.updatesLock.Lock() 1124 updates := n.updates 1125 evals := n.evals 1126 future := n.updateFuture 1127 n.updates = nil 1128 n.evals = nil 1129 n.updateFuture = nil 1130 n.updateTimer = nil 1131 n.updatesLock.Unlock() 1132 1133 // Perform the batch update 1134 n.batchUpdate(future, updates, evals) 1135 }) 1136 } 1137 n.updatesLock.Unlock() 1138 1139 // Wait for the future 1140 if err := future.Wait(); err != nil { 1141 return err 1142 } 1143 1144 // Setup the response 1145 reply.Index = future.Index() 1146 return nil 1147 } 1148 1149 // batchUpdate is used to update all the allocations 1150 func (n *Node) batchUpdate(future *structs.BatchFuture, updates []*structs.Allocation, evals []*structs.Evaluation) { 1151 // Group pending evals by jobID to prevent creating unnecessary evals 1152 evalsByJobId := make(map[structs.NamespacedID]struct{}) 1153 var trimmedEvals []*structs.Evaluation 1154 for _, eval := range evals { 1155 namespacedID := structs.NamespacedID{ 1156 ID: eval.JobID, 1157 Namespace: eval.Namespace, 1158 } 1159 _, exists := evalsByJobId[namespacedID] 1160 if !exists { 1161 now := time.Now().UTC().UnixNano() 1162 eval.CreateTime = now 1163 eval.ModifyTime = now 1164 trimmedEvals = append(trimmedEvals, eval) 1165 evalsByJobId[namespacedID] = struct{}{} 1166 } 1167 } 1168 1169 if len(trimmedEvals) > 0 { 1170 n.logger.Debug("adding evaluations for rescheduling failed allocations", "num_evals", len(trimmedEvals)) 1171 } 1172 // Prepare the batch update 1173 batch := &structs.AllocUpdateRequest{ 1174 Alloc: updates, 1175 Evals: trimmedEvals, 1176 WriteRequest: structs.WriteRequest{Region: n.srv.config.Region}, 1177 } 1178 1179 // Commit this update via Raft 1180 var mErr multierror.Error 1181 _, index, err := n.srv.raftApply(structs.AllocClientUpdateRequestType, batch) 1182 if err != nil { 1183 n.logger.Error("alloc update failed", "error", err) 1184 mErr.Errors = append(mErr.Errors, err) 1185 } 1186 1187 // For each allocation we are updating check if we should revoke any 1188 // Vault Accessors 1189 var revoke []*structs.VaultAccessor 1190 for _, alloc := range updates { 1191 // Skip any allocation that isn't dead on the client 1192 if !alloc.Terminated() { 1193 continue 1194 } 1195 1196 // Determine if there are any Vault accessors for the allocation 1197 ws := memdb.NewWatchSet() 1198 accessors, err := n.srv.State().VaultAccessorsByAlloc(ws, alloc.ID) 1199 if err != nil { 1200 n.logger.Error("looking up Vault accessors for alloc failed", "alloc_id", alloc.ID, "error", err) 1201 mErr.Errors = append(mErr.Errors, err) 1202 } 1203 1204 revoke = append(revoke, accessors...) 1205 } 1206 1207 if l := len(revoke); l != 0 { 1208 n.logger.Debug("revoking accessors due to terminal allocations", "num_accessors", l) 1209 if err := n.srv.vault.RevokeTokens(context.Background(), revoke, true); err != nil { 1210 n.logger.Error("batched Vault accessor revocation failed", "error", err) 1211 mErr.Errors = append(mErr.Errors, err) 1212 } 1213 } 1214 1215 // Respond to the future 1216 future.Respond(index, mErr.ErrorOrNil()) 1217 } 1218 1219 // List is used to list the available nodes 1220 func (n *Node) List(args *structs.NodeListRequest, 1221 reply *structs.NodeListResponse) error { 1222 if done, err := n.srv.forward("Node.List", args, args, reply); done { 1223 return err 1224 } 1225 defer metrics.MeasureSince([]string{"nomad", "client", "list"}, time.Now()) 1226 1227 // Check node read permissions 1228 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 1229 return err 1230 } else if aclObj != nil && !aclObj.AllowNodeRead() { 1231 return structs.ErrPermissionDenied 1232 } 1233 1234 // Setup the blocking query 1235 opts := blockingOptions{ 1236 queryOpts: &args.QueryOptions, 1237 queryMeta: &reply.QueryMeta, 1238 run: func(ws memdb.WatchSet, state *state.StateStore) error { 1239 // Capture all the nodes 1240 var err error 1241 var iter memdb.ResultIterator 1242 if prefix := args.QueryOptions.Prefix; prefix != "" { 1243 iter, err = state.NodesByIDPrefix(ws, prefix) 1244 } else { 1245 iter, err = state.Nodes(ws) 1246 } 1247 if err != nil { 1248 return err 1249 } 1250 1251 var nodes []*structs.NodeListStub 1252 for { 1253 raw := iter.Next() 1254 if raw == nil { 1255 break 1256 } 1257 node := raw.(*structs.Node) 1258 nodes = append(nodes, node.Stub()) 1259 } 1260 reply.Nodes = nodes 1261 1262 // Use the last index that affected the jobs table 1263 index, err := state.Index("nodes") 1264 if err != nil { 1265 return err 1266 } 1267 reply.Index = index 1268 1269 // Set the query response 1270 n.srv.setQueryMeta(&reply.QueryMeta) 1271 return nil 1272 }} 1273 return n.srv.blockingRPC(&opts) 1274 } 1275 1276 // createNodeEvals is used to create evaluations for each alloc on a node. 1277 // Each Eval is scoped to a job, so we need to potentially trigger many evals. 1278 func (n *Node) createNodeEvals(nodeID string, nodeIndex uint64) ([]string, uint64, error) { 1279 // Snapshot the state 1280 snap, err := n.srv.fsm.State().Snapshot() 1281 if err != nil { 1282 return nil, 0, fmt.Errorf("failed to snapshot state: %v", err) 1283 } 1284 1285 // Find all the allocations for this node 1286 ws := memdb.NewWatchSet() 1287 allocs, err := snap.AllocsByNode(ws, nodeID) 1288 if err != nil { 1289 return nil, 0, fmt.Errorf("failed to find allocs for '%s': %v", nodeID, err) 1290 } 1291 1292 sysJobsIter, err := snap.JobsByScheduler(ws, "system") 1293 if err != nil { 1294 return nil, 0, fmt.Errorf("failed to find system jobs for '%s': %v", nodeID, err) 1295 } 1296 1297 var sysJobs []*structs.Job 1298 for job := sysJobsIter.Next(); job != nil; job = sysJobsIter.Next() { 1299 sysJobs = append(sysJobs, job.(*structs.Job)) 1300 } 1301 1302 // Fast-path if nothing to do 1303 if len(allocs) == 0 && len(sysJobs) == 0 { 1304 return nil, 0, nil 1305 } 1306 1307 // Create an eval for each JobID affected 1308 var evals []*structs.Evaluation 1309 var evalIDs []string 1310 jobIDs := make(map[string]struct{}) 1311 now := time.Now().UTC().UnixNano() 1312 1313 for _, alloc := range allocs { 1314 // Deduplicate on JobID 1315 if _, ok := jobIDs[alloc.JobID]; ok { 1316 continue 1317 } 1318 jobIDs[alloc.JobID] = struct{}{} 1319 1320 // Create a new eval 1321 eval := &structs.Evaluation{ 1322 ID: uuid.Generate(), 1323 Namespace: alloc.Namespace, 1324 Priority: alloc.Job.Priority, 1325 Type: alloc.Job.Type, 1326 TriggeredBy: structs.EvalTriggerNodeUpdate, 1327 JobID: alloc.JobID, 1328 NodeID: nodeID, 1329 NodeModifyIndex: nodeIndex, 1330 Status: structs.EvalStatusPending, 1331 CreateTime: now, 1332 ModifyTime: now, 1333 } 1334 evals = append(evals, eval) 1335 evalIDs = append(evalIDs, eval.ID) 1336 } 1337 1338 // Create an evaluation for each system job. 1339 for _, job := range sysJobs { 1340 // Still dedup on JobID as the node may already have the system job. 1341 if _, ok := jobIDs[job.ID]; ok { 1342 continue 1343 } 1344 jobIDs[job.ID] = struct{}{} 1345 1346 // Create a new eval 1347 eval := &structs.Evaluation{ 1348 ID: uuid.Generate(), 1349 Namespace: job.Namespace, 1350 Priority: job.Priority, 1351 Type: job.Type, 1352 TriggeredBy: structs.EvalTriggerNodeUpdate, 1353 JobID: job.ID, 1354 NodeID: nodeID, 1355 NodeModifyIndex: nodeIndex, 1356 Status: structs.EvalStatusPending, 1357 CreateTime: now, 1358 ModifyTime: now, 1359 } 1360 evals = append(evals, eval) 1361 evalIDs = append(evalIDs, eval.ID) 1362 } 1363 1364 // Create the Raft transaction 1365 update := &structs.EvalUpdateRequest{ 1366 Evals: evals, 1367 WriteRequest: structs.WriteRequest{Region: n.srv.config.Region}, 1368 } 1369 1370 // Commit this evaluation via Raft 1371 // XXX: There is a risk of partial failure where the node update succeeds 1372 // but that the EvalUpdate does not. 1373 _, evalIndex, err := n.srv.raftApply(structs.EvalUpdateRequestType, update) 1374 if err != nil { 1375 return nil, 0, err 1376 } 1377 return evalIDs, evalIndex, nil 1378 } 1379 1380 // DeriveVaultToken is used by the clients to request wrapped Vault tokens for 1381 // tasks 1382 func (n *Node) DeriveVaultToken(args *structs.DeriveVaultTokenRequest, 1383 reply *structs.DeriveVaultTokenResponse) error { 1384 1385 // setErr is a helper for setting the recoverable error on the reply and 1386 // logging it 1387 setErr := func(e error, recoverable bool) { 1388 if e == nil { 1389 return 1390 } 1391 re, ok := e.(*structs.RecoverableError) 1392 if ok { 1393 // No need to wrap if error is already a RecoverableError 1394 reply.Error = re 1395 } else { 1396 reply.Error = structs.NewRecoverableError(e, recoverable).(*structs.RecoverableError) 1397 } 1398 1399 n.logger.Error("DeriveVaultToken failed", "recoverable", recoverable, "error", e) 1400 } 1401 1402 if done, err := n.srv.forward("Node.DeriveVaultToken", args, args, reply); done { 1403 setErr(err, structs.IsRecoverable(err) || err == structs.ErrNoLeader) 1404 return nil 1405 } 1406 defer metrics.MeasureSince([]string{"nomad", "client", "derive_vault_token"}, time.Now()) 1407 1408 // Verify the arguments 1409 if args.NodeID == "" { 1410 setErr(fmt.Errorf("missing node ID"), false) 1411 return nil 1412 } 1413 if args.SecretID == "" { 1414 setErr(fmt.Errorf("missing node SecretID"), false) 1415 return nil 1416 } 1417 if args.AllocID == "" { 1418 setErr(fmt.Errorf("missing allocation ID"), false) 1419 return nil 1420 } 1421 if len(args.Tasks) == 0 { 1422 setErr(fmt.Errorf("no tasks specified"), false) 1423 return nil 1424 } 1425 1426 // Verify the following: 1427 // * The Node exists and has the correct SecretID 1428 // * The Allocation exists on the specified node 1429 // * The allocation contains the given tasks and they each require Vault 1430 // tokens 1431 snap, err := n.srv.fsm.State().Snapshot() 1432 if err != nil { 1433 setErr(err, false) 1434 return nil 1435 } 1436 ws := memdb.NewWatchSet() 1437 node, err := snap.NodeByID(ws, args.NodeID) 1438 if err != nil { 1439 setErr(err, false) 1440 return nil 1441 } 1442 if node == nil { 1443 setErr(fmt.Errorf("Node %q does not exist", args.NodeID), false) 1444 return nil 1445 } 1446 if node.SecretID != args.SecretID { 1447 setErr(fmt.Errorf("SecretID mismatch"), false) 1448 return nil 1449 } 1450 1451 alloc, err := snap.AllocByID(ws, args.AllocID) 1452 if err != nil { 1453 setErr(err, false) 1454 return nil 1455 } 1456 if alloc == nil { 1457 setErr(fmt.Errorf("Allocation %q does not exist", args.AllocID), false) 1458 return nil 1459 } 1460 if alloc.NodeID != args.NodeID { 1461 setErr(fmt.Errorf("Allocation %q not running on Node %q", args.AllocID, args.NodeID), false) 1462 return nil 1463 } 1464 if alloc.TerminalStatus() { 1465 setErr(fmt.Errorf("Can't request Vault token for terminal allocation"), false) 1466 return nil 1467 } 1468 1469 // Check the policies 1470 policies := alloc.Job.VaultPolicies() 1471 if policies == nil { 1472 setErr(fmt.Errorf("Job doesn't require Vault policies"), false) 1473 return nil 1474 } 1475 tg, ok := policies[alloc.TaskGroup] 1476 if !ok { 1477 setErr(fmt.Errorf("Task group does not require Vault policies"), false) 1478 return nil 1479 } 1480 1481 var unneeded []string 1482 for _, task := range args.Tasks { 1483 taskVault := tg[task] 1484 if taskVault == nil || len(taskVault.Policies) == 0 { 1485 unneeded = append(unneeded, task) 1486 } 1487 } 1488 1489 if len(unneeded) != 0 { 1490 e := fmt.Errorf("Requested Vault tokens for tasks without defined Vault policies: %s", 1491 strings.Join(unneeded, ", ")) 1492 setErr(e, false) 1493 return nil 1494 } 1495 1496 // At this point the request is valid and we should contact Vault for 1497 // tokens. 1498 1499 // Create an error group where we will spin up a fixed set of goroutines to 1500 // handle deriving tokens but where if any fails the whole group is 1501 // canceled. 1502 g, ctx := errgroup.WithContext(context.Background()) 1503 1504 // Cap the handlers 1505 handlers := len(args.Tasks) 1506 if handlers > maxParallelRequestsPerDerive { 1507 handlers = maxParallelRequestsPerDerive 1508 } 1509 1510 // Create the Vault Tokens 1511 input := make(chan string, handlers) 1512 results := make(map[string]*vapi.Secret, len(args.Tasks)) 1513 for i := 0; i < handlers; i++ { 1514 g.Go(func() error { 1515 for { 1516 select { 1517 case task, ok := <-input: 1518 if !ok { 1519 return nil 1520 } 1521 1522 secret, err := n.srv.vault.CreateToken(ctx, alloc, task) 1523 if err != nil { 1524 return err 1525 } 1526 1527 results[task] = secret 1528 case <-ctx.Done(): 1529 return nil 1530 } 1531 } 1532 }) 1533 } 1534 1535 // Send the input 1536 go func() { 1537 defer close(input) 1538 for _, task := range args.Tasks { 1539 select { 1540 case <-ctx.Done(): 1541 return 1542 case input <- task: 1543 } 1544 } 1545 1546 }() 1547 1548 // Wait for everything to complete or for an error 1549 createErr := g.Wait() 1550 1551 // Retrieve the results 1552 accessors := make([]*structs.VaultAccessor, 0, len(results)) 1553 tokens := make(map[string]string, len(results)) 1554 for task, secret := range results { 1555 w := secret.WrapInfo 1556 tokens[task] = w.Token 1557 accessor := &structs.VaultAccessor{ 1558 Accessor: w.WrappedAccessor, 1559 Task: task, 1560 NodeID: alloc.NodeID, 1561 AllocID: alloc.ID, 1562 CreationTTL: w.TTL, 1563 } 1564 1565 accessors = append(accessors, accessor) 1566 } 1567 1568 // If there was an error revoke the created tokens 1569 if createErr != nil { 1570 n.logger.Error("Vault token creation for alloc failed", "alloc_id", alloc.ID, "error", createErr) 1571 1572 if revokeErr := n.srv.vault.RevokeTokens(context.Background(), accessors, false); revokeErr != nil { 1573 n.logger.Error("Vault token revocation for alloc failed", "alloc_id", alloc.ID, "error", revokeErr) 1574 } 1575 1576 if rerr, ok := createErr.(*structs.RecoverableError); ok { 1577 reply.Error = rerr 1578 } else { 1579 reply.Error = structs.NewRecoverableError(createErr, false).(*structs.RecoverableError) 1580 } 1581 1582 return nil 1583 } 1584 1585 // Commit to Raft before returning any of the tokens 1586 req := structs.VaultAccessorsRequest{Accessors: accessors} 1587 _, index, err := n.srv.raftApply(structs.VaultAccessorRegisterRequestType, &req) 1588 if err != nil { 1589 n.logger.Error("registering Vault accessors for alloc failed", "alloc_id", alloc.ID, "error", err) 1590 1591 // Determine if we can recover from the error 1592 retry := false 1593 switch err { 1594 case raft.ErrNotLeader, raft.ErrLeadershipLost, raft.ErrRaftShutdown, raft.ErrEnqueueTimeout: 1595 retry = true 1596 } 1597 1598 setErr(err, retry) 1599 return nil 1600 } 1601 1602 reply.Index = index 1603 reply.Tasks = tokens 1604 n.srv.setQueryMeta(&reply.QueryMeta) 1605 return nil 1606 } 1607 1608 func (n *Node) EmitEvents(args *structs.EmitNodeEventsRequest, reply *structs.EmitNodeEventsResponse) error { 1609 if done, err := n.srv.forward("Node.EmitEvents", args, args, reply); done { 1610 return err 1611 } 1612 defer metrics.MeasureSince([]string{"nomad", "client", "emit_events"}, time.Now()) 1613 1614 if len(args.NodeEvents) == 0 { 1615 return fmt.Errorf("no node events given") 1616 } 1617 for nodeID, events := range args.NodeEvents { 1618 if len(events) == 0 { 1619 return fmt.Errorf("no node events given for node %q", nodeID) 1620 } 1621 } 1622 1623 _, index, err := n.srv.raftApply(structs.UpsertNodeEventsType, args) 1624 if err != nil { 1625 n.logger.Error("upserting node events failed", "error", err) 1626 return err 1627 } 1628 1629 reply.Index = index 1630 return nil 1631 }