github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/nomad/node_endpoint.go (about) 1 package nomad 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "net/http" 8 "reflect" 9 "strings" 10 "sync" 11 "time" 12 13 "github.com/armon/go-metrics" 14 "github.com/hashicorp/go-hclog" 15 "github.com/hashicorp/go-memdb" 16 "github.com/hashicorp/go-multierror" 17 vapi "github.com/hashicorp/vault/api" 18 "golang.org/x/sync/errgroup" 19 20 "github.com/hashicorp/nomad/acl" 21 "github.com/hashicorp/nomad/helper/uuid" 22 "github.com/hashicorp/nomad/nomad/state" 23 "github.com/hashicorp/nomad/nomad/state/paginator" 24 "github.com/hashicorp/nomad/nomad/structs" 25 "github.com/hashicorp/raft" 26 ) 27 28 const ( 29 // batchUpdateInterval is how long we wait to batch updates 30 batchUpdateInterval = 50 * time.Millisecond 31 32 // maxParallelRequestsPerDerive is the maximum number of parallel Vault 33 // create token requests that may be outstanding per derive request 34 maxParallelRequestsPerDerive = 16 35 36 // NodeDrainEvents are the various drain messages 37 NodeDrainEventDrainSet = "Node drain strategy set" 38 NodeDrainEventDrainDisabled = "Node drain disabled" 39 NodeDrainEventDrainUpdated = "Node drain strategy updated" 40 41 // NodeEligibilityEventEligible is used when the nodes eligiblity is marked 42 // eligible 43 NodeEligibilityEventEligible = "Node marked as eligible for scheduling" 44 45 // NodeEligibilityEventIneligible is used when the nodes eligiblity is marked 46 // ineligible 47 NodeEligibilityEventIneligible = "Node marked as ineligible for scheduling" 48 49 // NodeHeartbeatEventReregistered is the message used when the node becomes 50 // reregistered by the heartbeat. 51 NodeHeartbeatEventReregistered = "Node reregistered by heartbeat" 52 ) 53 54 // Node endpoint is used for client interactions 55 type Node struct { 56 srv *Server 57 logger hclog.Logger 58 59 // ctx provides context regarding the underlying connection 60 ctx *RPCContext 61 62 // updates holds pending client status updates for allocations 63 updates []*structs.Allocation 64 65 // evals holds pending rescheduling eval updates triggered by failed allocations 66 evals []*structs.Evaluation 67 68 // updateFuture is used to wait for the pending batch update 69 // to complete. This may be nil if no batch is pending. 70 updateFuture *structs.BatchFuture 71 72 // updateTimer is the timer that will trigger the next batch 73 // update, and may be nil if there is no batch pending. 74 updateTimer *time.Timer 75 76 // updatesLock synchronizes access to the updates list, 77 // the future and the timer. 78 updatesLock sync.Mutex 79 } 80 81 func NewNodeEndpoint(srv *Server, ctx *RPCContext) *Node { 82 return &Node{ 83 srv: srv, 84 ctx: ctx, 85 logger: srv.logger.Named("client"), 86 updates: []*structs.Allocation{}, 87 evals: []*structs.Evaluation{}, 88 } 89 } 90 91 // Register is used to upsert a client that is available for scheduling 92 func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUpdateResponse) error { 93 isForwarded := args.IsForwarded() 94 if done, err := n.srv.forward("Node.Register", args, args, reply); done { 95 // We have a valid node connection since there is no error from the 96 // forwarded server, so add the mapping to cache the 97 // connection and allow the server to send RPCs to the client. 98 if err == nil && n.ctx != nil && n.ctx.NodeID == "" && !isForwarded { 99 n.ctx.NodeID = args.Node.ID 100 n.srv.addNodeConn(n.ctx) 101 } 102 103 return err 104 } 105 defer metrics.MeasureSince([]string{"nomad", "client", "register"}, time.Now()) 106 107 // Validate the arguments 108 if args.Node == nil { 109 return fmt.Errorf("missing node for client registration") 110 } 111 if args.Node.ID == "" { 112 return fmt.Errorf("missing node ID for client registration") 113 } 114 if args.Node.Datacenter == "" { 115 return fmt.Errorf("missing datacenter for client registration") 116 } 117 if args.Node.Name == "" { 118 return fmt.Errorf("missing node name for client registration") 119 } 120 if len(args.Node.Attributes) == 0 { 121 return fmt.Errorf("missing attributes for client registration") 122 } 123 if args.Node.SecretID == "" { 124 return fmt.Errorf("missing node secret ID for client registration") 125 } 126 127 // Default the status if none is given 128 if args.Node.Status == "" { 129 args.Node.Status = structs.NodeStatusInit 130 } 131 if !structs.ValidNodeStatus(args.Node.Status) { 132 return fmt.Errorf("invalid status for node") 133 } 134 135 // Default to eligible for scheduling if unset 136 if args.Node.SchedulingEligibility == "" { 137 args.Node.SchedulingEligibility = structs.NodeSchedulingEligible 138 } 139 140 // Set the timestamp when the node is registered 141 args.Node.StatusUpdatedAt = time.Now().Unix() 142 143 // Compute the node class 144 if err := args.Node.ComputeClass(); err != nil { 145 return fmt.Errorf("failed to computed node class: %v", err) 146 } 147 148 // Look for the node so we can detect a state transition 149 snap, err := n.srv.fsm.State().Snapshot() 150 if err != nil { 151 return err 152 } 153 154 ws := memdb.NewWatchSet() 155 originalNode, err := snap.NodeByID(ws, args.Node.ID) 156 if err != nil { 157 return err 158 } 159 160 // Check if the SecretID has been tampered with 161 if originalNode != nil { 162 if args.Node.SecretID != originalNode.SecretID && originalNode.SecretID != "" { 163 return fmt.Errorf("node secret ID does not match. Not registering node.") 164 } 165 } 166 167 // We have a valid node connection, so add the mapping to cache the 168 // connection and allow the server to send RPCs to the client. We only cache 169 // the connection if it is not being forwarded from another server. 170 if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() { 171 n.ctx.NodeID = args.Node.ID 172 n.srv.addNodeConn(n.ctx) 173 } 174 175 // Commit this update via Raft 176 _, index, err := n.srv.raftApply(structs.NodeRegisterRequestType, args) 177 if err != nil { 178 n.logger.Error("register failed", "error", err) 179 return err 180 } 181 reply.NodeModifyIndex = index 182 183 // Check if we should trigger evaluations 184 if shouldCreateNodeEval(originalNode, args.Node) { 185 evalIDs, evalIndex, err := n.createNodeEvals(args.Node, index) 186 if err != nil { 187 n.logger.Error("eval creation failed", "error", err) 188 return err 189 } 190 reply.EvalIDs = evalIDs 191 reply.EvalCreateIndex = evalIndex 192 } 193 194 // Check if we need to setup a heartbeat 195 if !args.Node.TerminalStatus() { 196 ttl, err := n.srv.resetHeartbeatTimer(args.Node.ID) 197 if err != nil { 198 n.logger.Error("heartbeat reset failed", "error", err) 199 return err 200 } 201 reply.HeartbeatTTL = ttl 202 } 203 204 // Set the reply index 205 reply.Index = index 206 snap, err = n.srv.fsm.State().Snapshot() 207 if err != nil { 208 return err 209 } 210 211 n.srv.peerLock.RLock() 212 defer n.srv.peerLock.RUnlock() 213 if err := n.constructNodeServerInfoResponse(args.Node.ID, snap, reply); err != nil { 214 n.logger.Error("failed to populate NodeUpdateResponse", "error", err) 215 return err 216 } 217 218 return nil 219 } 220 221 // shouldCreateNodeEval returns true if the node update may result into 222 // allocation updates, so the node should be re-evaluating. 223 // 224 // Such cases might be: 225 // * node health/drain status changes that may result into alloc rescheduling 226 // * node drivers or attributes changing that may cause system job placement changes 227 func shouldCreateNodeEval(original, updated *structs.Node) bool { 228 if structs.ShouldDrainNode(updated.Status) { 229 return true 230 } 231 232 if original == nil { 233 return nodeStatusTransitionRequiresEval(updated.Status, structs.NodeStatusInit) 234 } 235 236 if nodeStatusTransitionRequiresEval(updated.Status, original.Status) { 237 return true 238 } 239 240 // check fields used by the feasibility checks in ../scheduler/feasible.go, 241 // whether through a Constraint explicitly added by user or an implicit constraint 242 // added through a driver/volume check. 243 // 244 // Node Resources (e.g. CPU/Memory) are handled differently, using blocked evals, 245 // and not relevant in this check. 246 return !(original.ID == updated.ID && 247 original.Datacenter == updated.Datacenter && 248 original.Name == updated.Name && 249 original.NodeClass == updated.NodeClass && 250 reflect.DeepEqual(original.Attributes, updated.Attributes) && 251 reflect.DeepEqual(original.Meta, updated.Meta) && 252 reflect.DeepEqual(original.Drivers, updated.Drivers) && 253 reflect.DeepEqual(original.HostVolumes, updated.HostVolumes) && 254 equalDevices(original, updated)) 255 } 256 257 func equalDevices(n1, n2 *structs.Node) bool { 258 // ignore super old nodes, mostly to avoid nil dereferencing 259 if n1.NodeResources == nil || n2.NodeResources == nil { 260 return n1.NodeResources == n2.NodeResources 261 } 262 263 // treat nil and empty value as equal 264 if len(n1.NodeResources.Devices) == 0 { 265 return len(n1.NodeResources.Devices) == len(n2.NodeResources.Devices) 266 } 267 268 return reflect.DeepEqual(n1.NodeResources.Devices, n2.NodeResources.Devices) 269 } 270 271 // updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading. 272 func (n *Node) constructNodeServerInfoResponse(nodeID string, snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error { 273 reply.LeaderRPCAddr = string(n.srv.raft.Leader()) 274 275 // Reply with config information required for future RPC requests 276 reply.Servers = make([]*structs.NodeServerInfo, 0, len(n.srv.localPeers)) 277 for _, v := range n.srv.localPeers { 278 reply.Servers = append(reply.Servers, 279 &structs.NodeServerInfo{ 280 RPCAdvertiseAddr: v.RPCAddr.String(), 281 Datacenter: v.Datacenter, 282 }) 283 } 284 285 // Add ClientStatus information to heartbeat response. 286 node, _ := snap.NodeByID(nil, nodeID) 287 reply.SchedulingEligibility = node.SchedulingEligibility 288 289 // TODO(sean@): Use an indexed node count instead 290 // 291 // Snapshot is used only to iterate over all nodes to create a node 292 // count to send back to Nomad Clients in their heartbeat so Clients 293 // can estimate the size of the cluster. 294 ws := memdb.NewWatchSet() 295 iter, err := snap.Nodes(ws) 296 if err == nil { 297 for { 298 raw := iter.Next() 299 if raw == nil { 300 break 301 } 302 reply.NumNodes++ 303 } 304 } 305 306 reply.Features = n.srv.EnterpriseState.Features() 307 308 return nil 309 } 310 311 // Deregister is used to remove a client from the cluster. If a client should 312 // just be made unavailable for scheduling, a status update is preferred. 313 func (n *Node) Deregister(args *structs.NodeDeregisterRequest, reply *structs.NodeUpdateResponse) error { 314 if done, err := n.srv.forward("Node.Deregister", args, args, reply); done { 315 return err 316 } 317 defer metrics.MeasureSince([]string{"nomad", "client", "deregister"}, time.Now()) 318 319 if args.NodeID == "" { 320 return fmt.Errorf("missing node ID for client deregistration") 321 } 322 323 // deregister takes a batch 324 repack := &structs.NodeBatchDeregisterRequest{ 325 NodeIDs: []string{args.NodeID}, 326 WriteRequest: args.WriteRequest, 327 } 328 329 return n.deregister(repack, reply, func() (interface{}, uint64, error) { 330 return n.srv.raftApply(structs.NodeDeregisterRequestType, args) 331 }) 332 } 333 334 // BatchDeregister is used to remove client nodes from the cluster. 335 func (n *Node) BatchDeregister(args *structs.NodeBatchDeregisterRequest, reply *structs.NodeUpdateResponse) error { 336 if done, err := n.srv.forward("Node.BatchDeregister", args, args, reply); done { 337 return err 338 } 339 defer metrics.MeasureSince([]string{"nomad", "client", "batch_deregister"}, time.Now()) 340 341 if len(args.NodeIDs) == 0 { 342 return fmt.Errorf("missing node IDs for client deregistration") 343 } 344 345 return n.deregister(args, reply, func() (interface{}, uint64, error) { 346 return n.srv.raftApply(structs.NodeBatchDeregisterRequestType, args) 347 }) 348 } 349 350 // deregister takes a raftMessage closure, to support both Deregister and BatchDeregister 351 func (n *Node) deregister(args *structs.NodeBatchDeregisterRequest, 352 reply *structs.NodeUpdateResponse, 353 raftApplyFn func() (interface{}, uint64, error), 354 ) error { 355 // Check request permissions 356 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 357 return err 358 } else if aclObj != nil && !aclObj.AllowNodeWrite() { 359 return structs.ErrPermissionDenied 360 } 361 362 // Look for the node 363 snap, err := n.srv.fsm.State().Snapshot() 364 if err != nil { 365 return err 366 } 367 368 nodes := make([]*structs.Node, 0, len(args.NodeIDs)) 369 for _, nodeID := range args.NodeIDs { 370 node, err := snap.NodeByID(nil, nodeID) 371 if err != nil { 372 return err 373 } 374 if node == nil { 375 return fmt.Errorf("node not found") 376 } 377 nodes = append(nodes, node) 378 } 379 380 // Commit this update via Raft 381 _, index, err := raftApplyFn() 382 if err != nil { 383 n.logger.Error("raft message failed", "error", err) 384 return err 385 } 386 387 for _, node := range nodes { 388 nodeID := node.ID 389 390 // Clear the heartbeat timer if any 391 n.srv.clearHeartbeatTimer(nodeID) 392 393 // Create the evaluations for this node 394 evalIDs, evalIndex, err := n.createNodeEvals(node, index) 395 if err != nil { 396 n.logger.Error("eval creation failed", "error", err) 397 return err 398 } 399 400 // Determine if there are any Vault accessors on the node 401 if accessors, err := snap.VaultAccessorsByNode(nil, nodeID); err != nil { 402 n.logger.Error("looking up vault accessors for node failed", "node_id", nodeID, "error", err) 403 return err 404 } else if l := len(accessors); l > 0 { 405 n.logger.Debug("revoking vault accessors on node due to deregister", "num_accessors", l, "node_id", nodeID) 406 if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil { 407 n.logger.Error("revoking vault accessors for node failed", "node_id", nodeID, "error", err) 408 return err 409 } 410 } 411 412 // Determine if there are any SI token accessors on the node 413 if accessors, err := snap.SITokenAccessorsByNode(nil, nodeID); err != nil { 414 n.logger.Error("looking up si accessors for node failed", "node_id", nodeID, "error", err) 415 return err 416 } else if l := len(accessors); l > 0 { 417 n.logger.Debug("revoking si accessors on node due to deregister", "num_accessors", l, "node_id", nodeID) 418 // Unlike with the Vault integration, there's no error returned here, since 419 // bootstrapping the Consul client is elsewhere. Errors in revocation trigger 420 // background retry attempts rather than inline error handling. 421 _ = n.srv.consulACLs.RevokeTokens(context.Background(), accessors, true) 422 } 423 424 reply.EvalIDs = append(reply.EvalIDs, evalIDs...) 425 // Set the reply eval create index just the first time 426 if reply.EvalCreateIndex == 0 { 427 reply.EvalCreateIndex = evalIndex 428 } 429 } 430 431 reply.NodeModifyIndex = index 432 reply.Index = index 433 return nil 434 } 435 436 // UpdateStatus is used to update the status of a client node 437 func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *structs.NodeUpdateResponse) error { 438 isForwarded := args.IsForwarded() 439 if done, err := n.srv.forward("Node.UpdateStatus", args, args, reply); done { 440 // We have a valid node connection since there is no error from the 441 // forwarded server, so add the mapping to cache the 442 // connection and allow the server to send RPCs to the client. 443 if err == nil && n.ctx != nil && n.ctx.NodeID == "" && !isForwarded { 444 n.ctx.NodeID = args.NodeID 445 n.srv.addNodeConn(n.ctx) 446 } 447 448 return err 449 } 450 defer metrics.MeasureSince([]string{"nomad", "client", "update_status"}, time.Now()) 451 452 // Verify the arguments 453 if args.NodeID == "" { 454 return fmt.Errorf("missing node ID for client status update") 455 } 456 if !structs.ValidNodeStatus(args.Status) { 457 return fmt.Errorf("invalid status for node") 458 } 459 460 // Look for the node 461 snap, err := n.srv.fsm.State().Snapshot() 462 if err != nil { 463 return err 464 } 465 466 ws := memdb.NewWatchSet() 467 node, err := snap.NodeByID(ws, args.NodeID) 468 if err != nil { 469 return err 470 } 471 if node == nil { 472 return fmt.Errorf("node not found") 473 } 474 475 // We have a valid node connection, so add the mapping to cache the 476 // connection and allow the server to send RPCs to the client. We only cache 477 // the connection if it is not being forwarded from another server. 478 if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() { 479 n.ctx.NodeID = args.NodeID 480 n.srv.addNodeConn(n.ctx) 481 } 482 483 // XXX: Could use the SecretID here but have to update the heartbeat system 484 // to track SecretIDs. 485 486 // Update the timestamp of when the node status was updated 487 args.UpdatedAt = time.Now().Unix() 488 489 // Commit this update via Raft 490 var index uint64 491 if node.Status != args.Status { 492 // Attach an event if we are updating the node status to ready when it 493 // is down via a heartbeat 494 if node.Status == structs.NodeStatusDown && args.NodeEvent == nil { 495 args.NodeEvent = structs.NewNodeEvent(). 496 SetSubsystem(structs.NodeEventSubsystemCluster). 497 SetMessage(NodeHeartbeatEventReregistered) 498 } 499 500 _, index, err = n.srv.raftApply(structs.NodeUpdateStatusRequestType, args) 501 if err != nil { 502 n.logger.Error("status update failed", "error", err) 503 return err 504 } 505 reply.NodeModifyIndex = index 506 } 507 508 // Check if we should trigger evaluations 509 if structs.ShouldDrainNode(args.Status) || 510 nodeStatusTransitionRequiresEval(args.Status, node.Status) { 511 evalIDs, evalIndex, err := n.createNodeEvals(node, index) 512 if err != nil { 513 n.logger.Error("eval creation failed", "error", err) 514 return err 515 } 516 reply.EvalIDs = evalIDs 517 reply.EvalCreateIndex = evalIndex 518 } 519 520 // Check if we need to setup a heartbeat 521 switch args.Status { 522 case structs.NodeStatusDown: 523 // Determine if there are any Vault accessors on the node to cleanup 524 if accessors, err := n.srv.State().VaultAccessorsByNode(ws, args.NodeID); err != nil { 525 n.logger.Error("looking up vault accessors for node failed", "node_id", args.NodeID, "error", err) 526 return err 527 } else if l := len(accessors); l > 0 { 528 n.logger.Debug("revoking vault accessors on node due to down state", "num_accessors", l, "node_id", args.NodeID) 529 if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil { 530 n.logger.Error("revoking vault accessors for node failed", "node_id", args.NodeID, "error", err) 531 return err 532 } 533 } 534 535 // Determine if there are any SI token accessors on the node to cleanup 536 if accessors, err := n.srv.State().SITokenAccessorsByNode(ws, args.NodeID); err != nil { 537 n.logger.Error("looking up SI accessors for node failed", "node_id", args.NodeID, "error", err) 538 return err 539 } else if l := len(accessors); l > 0 { 540 n.logger.Debug("revoking SI accessors on node due to down state", "num_accessors", l, "node_id", args.NodeID) 541 _ = n.srv.consulACLs.RevokeTokens(context.Background(), accessors, true) 542 } 543 544 // Identify the service registrations current placed on the downed 545 // node. 546 serviceRegistrations, err := n.srv.State().GetServiceRegistrationsByNodeID(ws, args.NodeID) 547 if err != nil { 548 n.logger.Error("looking up service registrations for node failed", 549 "node_id", args.NodeID, "error", err) 550 return err 551 } 552 553 // If the node has service registrations assigned to it, delete these 554 // via Raft. 555 if l := len(serviceRegistrations); l > 0 { 556 n.logger.Debug("deleting service registrations on node due to down state", 557 "num_service_registrations", l, "node_id", args.NodeID) 558 559 deleteRegReq := structs.ServiceRegistrationDeleteByNodeIDRequest{NodeID: args.NodeID} 560 561 _, index, err = n.srv.raftApply(structs.ServiceRegistrationDeleteByNodeIDRequestType, &deleteRegReq) 562 if err != nil { 563 n.logger.Error("failed to delete service registrations for node", 564 "node_id", args.NodeID, "error", err) 565 return err 566 } 567 } 568 569 default: 570 ttl, err := n.srv.resetHeartbeatTimer(args.NodeID) 571 if err != nil { 572 n.logger.Error("heartbeat reset failed", "error", err) 573 return err 574 } 575 reply.HeartbeatTTL = ttl 576 } 577 578 // Set the reply index and leader 579 reply.Index = index 580 n.srv.peerLock.RLock() 581 defer n.srv.peerLock.RUnlock() 582 if err := n.constructNodeServerInfoResponse(node.GetID(), snap, reply); err != nil { 583 n.logger.Error("failed to populate NodeUpdateResponse", "error", err) 584 return err 585 } 586 587 return nil 588 } 589 590 // nodeStatusTransitionRequiresEval is a helper that takes a nodes new and old status and 591 // returns whether it has transitioned to ready. 592 func nodeStatusTransitionRequiresEval(newStatus, oldStatus string) bool { 593 initToReady := oldStatus == structs.NodeStatusInit && newStatus == structs.NodeStatusReady 594 terminalToReady := oldStatus == structs.NodeStatusDown && newStatus == structs.NodeStatusReady 595 disconnectedToOther := oldStatus == structs.NodeStatusDisconnected && newStatus != structs.NodeStatusDisconnected 596 otherToDisconnected := oldStatus != structs.NodeStatusDisconnected && newStatus == structs.NodeStatusDisconnected 597 return initToReady || terminalToReady || disconnectedToOther || otherToDisconnected 598 } 599 600 // UpdateDrain is used to update the drain mode of a client node 601 func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest, 602 reply *structs.NodeDrainUpdateResponse) error { 603 if done, err := n.srv.forward("Node.UpdateDrain", args, args, reply); done { 604 return err 605 } 606 defer metrics.MeasureSince([]string{"nomad", "client", "update_drain"}, time.Now()) 607 608 // Check node write permissions 609 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 610 return err 611 } else if aclObj != nil && !aclObj.AllowNodeWrite() { 612 return structs.ErrPermissionDenied 613 } 614 615 // Verify the arguments 616 if args.NodeID == "" { 617 return fmt.Errorf("missing node ID for drain update") 618 } 619 if args.NodeEvent != nil { 620 return fmt.Errorf("node event must not be set") 621 } 622 623 // Look for the node 624 snap, err := n.srv.fsm.State().Snapshot() 625 if err != nil { 626 return err 627 } 628 node, err := snap.NodeByID(nil, args.NodeID) 629 if err != nil { 630 return err 631 } 632 if node == nil { 633 return fmt.Errorf("node not found") 634 } 635 636 now := time.Now().UTC() 637 638 // Update the timestamp of when the node status was updated 639 args.UpdatedAt = now.Unix() 640 641 // Setup drain strategy 642 if args.DrainStrategy != nil { 643 // Mark start time for the drain 644 if node.DrainStrategy == nil { 645 args.DrainStrategy.StartedAt = now 646 } else { 647 args.DrainStrategy.StartedAt = node.DrainStrategy.StartedAt 648 } 649 650 // Mark the deadline time 651 if args.DrainStrategy.Deadline.Nanoseconds() > 0 { 652 args.DrainStrategy.ForceDeadline = now.Add(args.DrainStrategy.Deadline) 653 } 654 } 655 656 // Construct the node event 657 args.NodeEvent = structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemDrain) 658 if node.DrainStrategy == nil && args.DrainStrategy != nil { 659 args.NodeEvent.SetMessage(NodeDrainEventDrainSet) 660 } else if node.DrainStrategy != nil && args.DrainStrategy != nil { 661 args.NodeEvent.SetMessage(NodeDrainEventDrainUpdated) 662 } else if node.DrainStrategy != nil && args.DrainStrategy == nil { 663 args.NodeEvent.SetMessage(NodeDrainEventDrainDisabled) 664 } else { 665 args.NodeEvent = nil 666 } 667 668 // Commit this update via Raft 669 _, index, err := n.srv.raftApply(structs.NodeUpdateDrainRequestType, args) 670 if err != nil { 671 n.logger.Error("drain update failed", "error", err) 672 return err 673 } 674 reply.NodeModifyIndex = index 675 676 // If the node is transitioning to be eligible, create Node evaluations 677 // because there may be a System job registered that should be evaluated. 678 if node.SchedulingEligibility == structs.NodeSchedulingIneligible && args.MarkEligible && args.DrainStrategy == nil { 679 n.logger.Info("node transitioning to eligible state", "node_id", node.ID) 680 evalIDs, evalIndex, err := n.createNodeEvals(node, index) 681 if err != nil { 682 n.logger.Error("eval creation failed", "error", err) 683 return err 684 } 685 reply.EvalIDs = evalIDs 686 reply.EvalCreateIndex = evalIndex 687 } 688 689 // Set the reply index 690 reply.Index = index 691 return nil 692 } 693 694 // UpdateEligibility is used to update the scheduling eligibility of a node 695 func (n *Node) UpdateEligibility(args *structs.NodeUpdateEligibilityRequest, 696 reply *structs.NodeEligibilityUpdateResponse) error { 697 if done, err := n.srv.forward("Node.UpdateEligibility", args, args, reply); done { 698 return err 699 } 700 defer metrics.MeasureSince([]string{"nomad", "client", "update_eligibility"}, time.Now()) 701 702 // Check node write permissions 703 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 704 return err 705 } else if aclObj != nil && !aclObj.AllowNodeWrite() { 706 return structs.ErrPermissionDenied 707 } 708 709 // Verify the arguments 710 if args.NodeID == "" { 711 return fmt.Errorf("missing node ID for setting scheduling eligibility") 712 } 713 if args.NodeEvent != nil { 714 return fmt.Errorf("node event must not be set") 715 } 716 717 // Check that only allowed types are set 718 switch args.Eligibility { 719 case structs.NodeSchedulingEligible, structs.NodeSchedulingIneligible: 720 default: 721 return fmt.Errorf("invalid scheduling eligibility %q", args.Eligibility) 722 } 723 724 // Look for the node 725 snap, err := n.srv.fsm.State().Snapshot() 726 if err != nil { 727 return err 728 } 729 node, err := snap.NodeByID(nil, args.NodeID) 730 if err != nil { 731 return err 732 } 733 if node == nil { 734 return fmt.Errorf("node not found") 735 } 736 737 if node.DrainStrategy != nil && args.Eligibility == structs.NodeSchedulingEligible { 738 return fmt.Errorf("can not set node's scheduling eligibility to eligible while it is draining") 739 } 740 741 switch args.Eligibility { 742 case structs.NodeSchedulingEligible, structs.NodeSchedulingIneligible: 743 default: 744 return fmt.Errorf("invalid scheduling eligibility %q", args.Eligibility) 745 } 746 747 // Update the timestamp of when the node status was updated 748 args.UpdatedAt = time.Now().Unix() 749 750 // Construct the node event 751 args.NodeEvent = structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster) 752 if node.SchedulingEligibility == args.Eligibility { 753 return nil // Nothing to do 754 } else if args.Eligibility == structs.NodeSchedulingEligible { 755 n.logger.Info("node transitioning to eligible state", "node_id", node.ID) 756 args.NodeEvent.SetMessage(NodeEligibilityEventEligible) 757 } else { 758 n.logger.Info("node transitioning to ineligible state", "node_id", node.ID) 759 args.NodeEvent.SetMessage(NodeEligibilityEventIneligible) 760 } 761 762 // Commit this update via Raft 763 outErr, index, err := n.srv.raftApply(structs.NodeUpdateEligibilityRequestType, args) 764 if err != nil { 765 n.logger.Error("eligibility update failed", "error", err) 766 return err 767 } 768 if outErr != nil { 769 if err, ok := outErr.(error); ok && err != nil { 770 n.logger.Error("eligibility update failed", "error", err) 771 return err 772 } 773 } 774 775 // If the node is transitioning to be eligible, create Node evaluations 776 // because there may be a System job registered that should be evaluated. 777 if node.SchedulingEligibility == structs.NodeSchedulingIneligible && args.Eligibility == structs.NodeSchedulingEligible { 778 evalIDs, evalIndex, err := n.createNodeEvals(node, index) 779 if err != nil { 780 n.logger.Error("eval creation failed", "error", err) 781 return err 782 } 783 reply.EvalIDs = evalIDs 784 reply.EvalCreateIndex = evalIndex 785 } 786 787 // Set the reply index 788 reply.Index = index 789 return nil 790 } 791 792 // Evaluate is used to force a re-evaluation of the node 793 func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUpdateResponse) error { 794 if done, err := n.srv.forward("Node.Evaluate", args, args, reply); done { 795 return err 796 } 797 defer metrics.MeasureSince([]string{"nomad", "client", "evaluate"}, time.Now()) 798 799 // Check node write permissions 800 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 801 return err 802 } else if aclObj != nil && !aclObj.AllowNodeWrite() { 803 return structs.ErrPermissionDenied 804 } 805 806 // Verify the arguments 807 if args.NodeID == "" { 808 return fmt.Errorf("missing node ID for evaluation") 809 } 810 811 // Look for the node 812 snap, err := n.srv.fsm.State().Snapshot() 813 if err != nil { 814 return err 815 } 816 ws := memdb.NewWatchSet() 817 node, err := snap.NodeByID(ws, args.NodeID) 818 if err != nil { 819 return err 820 } 821 if node == nil { 822 return fmt.Errorf("node not found") 823 } 824 825 // Create the evaluation 826 evalIDs, evalIndex, err := n.createNodeEvals(node, node.ModifyIndex) 827 if err != nil { 828 n.logger.Error("eval creation failed", "error", err) 829 return err 830 } 831 reply.EvalIDs = evalIDs 832 reply.EvalCreateIndex = evalIndex 833 834 // Set the reply index 835 reply.Index = evalIndex 836 837 n.srv.peerLock.RLock() 838 defer n.srv.peerLock.RUnlock() 839 if err := n.constructNodeServerInfoResponse(node.GetID(), snap, reply); err != nil { 840 n.logger.Error("failed to populate NodeUpdateResponse", "error", err) 841 return err 842 } 843 return nil 844 } 845 846 // GetNode is used to request information about a specific node 847 func (n *Node) GetNode(args *structs.NodeSpecificRequest, 848 reply *structs.SingleNodeResponse) error { 849 if done, err := n.srv.forward("Node.GetNode", args, args, reply); done { 850 return err 851 } 852 defer metrics.MeasureSince([]string{"nomad", "client", "get_node"}, time.Now()) 853 854 // Check node read permissions 855 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 856 // If ResolveToken had an unexpected error return that 857 if err != structs.ErrTokenNotFound { 858 return err 859 } 860 861 // Attempt to lookup AuthToken as a Node.SecretID since nodes 862 // call this endpoint and don't have an ACL token. 863 node, stateErr := n.srv.fsm.State().NodeBySecretID(nil, args.AuthToken) 864 if stateErr != nil { 865 // Return the original ResolveToken error with this err 866 var merr multierror.Error 867 merr.Errors = append(merr.Errors, err, stateErr) 868 return merr.ErrorOrNil() 869 } 870 871 // Not a node or a valid ACL token 872 if node == nil { 873 return structs.ErrTokenNotFound 874 } 875 } else if aclObj != nil && !aclObj.AllowNodeRead() { 876 return structs.ErrPermissionDenied 877 } 878 879 // Setup the blocking query 880 opts := blockingOptions{ 881 queryOpts: &args.QueryOptions, 882 queryMeta: &reply.QueryMeta, 883 run: func(ws memdb.WatchSet, state *state.StateStore) error { 884 // Verify the arguments 885 if args.NodeID == "" { 886 return fmt.Errorf("missing node ID") 887 } 888 889 // Look for the node 890 out, err := state.NodeByID(ws, args.NodeID) 891 if err != nil { 892 return err 893 } 894 895 // Setup the output 896 if out != nil { 897 out = out.Sanitize() 898 reply.Node = out 899 reply.Index = out.ModifyIndex 900 } else { 901 // Use the last index that affected the nodes table 902 index, err := state.Index("nodes") 903 if err != nil { 904 return err 905 } 906 reply.Node = nil 907 reply.Index = index 908 } 909 910 // Set the query response 911 n.srv.setQueryMeta(&reply.QueryMeta) 912 return nil 913 }} 914 return n.srv.blockingRPC(&opts) 915 } 916 917 // GetAllocs is used to request allocations for a specific node 918 func (n *Node) GetAllocs(args *structs.NodeSpecificRequest, 919 reply *structs.NodeAllocsResponse) error { 920 if done, err := n.srv.forward("Node.GetAllocs", args, args, reply); done { 921 return err 922 } 923 defer metrics.MeasureSince([]string{"nomad", "client", "get_allocs"}, time.Now()) 924 925 // Check node read and namespace job read permissions 926 aclObj, err := n.srv.ResolveToken(args.AuthToken) 927 if err != nil { 928 return err 929 } 930 if aclObj != nil && !aclObj.AllowNodeRead() { 931 return structs.ErrPermissionDenied 932 } 933 934 // cache namespace perms 935 readableNamespaces := map[string]bool{} 936 937 // readNS is a caching namespace read-job helper 938 readNS := func(ns string) bool { 939 if aclObj == nil { 940 // ACLs are disabled; everything is readable 941 return true 942 } 943 944 if readable, ok := readableNamespaces[ns]; ok { 945 // cache hit 946 return readable 947 } 948 949 // cache miss 950 readable := aclObj.AllowNsOp(ns, acl.NamespaceCapabilityReadJob) 951 readableNamespaces[ns] = readable 952 return readable 953 } 954 955 // Verify the arguments 956 if args.NodeID == "" { 957 return fmt.Errorf("missing node ID") 958 } 959 960 // Setup the blocking query 961 opts := blockingOptions{ 962 queryOpts: &args.QueryOptions, 963 queryMeta: &reply.QueryMeta, 964 run: func(ws memdb.WatchSet, state *state.StateStore) error { 965 // Look for the node 966 allocs, err := state.AllocsByNode(ws, args.NodeID) 967 if err != nil { 968 return err 969 } 970 971 // Setup the output 972 if n := len(allocs); n != 0 { 973 reply.Allocs = make([]*structs.Allocation, 0, n) 974 for _, alloc := range allocs { 975 if readNS(alloc.Namespace) { 976 reply.Allocs = append(reply.Allocs, alloc) 977 } 978 979 // Get the max of all allocs since 980 // subsequent requests need to start 981 // from the latest index 982 reply.Index = maxUint64(reply.Index, alloc.ModifyIndex) 983 } 984 } else { 985 reply.Allocs = nil 986 987 // Use the last index that affected the nodes table 988 index, err := state.Index("allocs") 989 if err != nil { 990 return err 991 } 992 993 // Must provide non-zero index to prevent blocking 994 // Index 1 is impossible anyways (due to Raft internals) 995 if index == 0 { 996 reply.Index = 1 997 } else { 998 reply.Index = index 999 } 1000 } 1001 return nil 1002 }} 1003 return n.srv.blockingRPC(&opts) 1004 } 1005 1006 // GetClientAllocs is used to request a lightweight list of alloc modify indexes 1007 // per allocation. 1008 func (n *Node) GetClientAllocs(args *structs.NodeSpecificRequest, 1009 reply *structs.NodeClientAllocsResponse) error { 1010 isForwarded := args.IsForwarded() 1011 if done, err := n.srv.forward("Node.GetClientAllocs", args, args, reply); done { 1012 // We have a valid node connection since there is no error from the 1013 // forwarded server, so add the mapping to cache the 1014 // connection and allow the server to send RPCs to the client. 1015 if err == nil && n.ctx != nil && n.ctx.NodeID == "" && !isForwarded { 1016 n.ctx.NodeID = args.NodeID 1017 n.srv.addNodeConn(n.ctx) 1018 } 1019 1020 return err 1021 } 1022 defer metrics.MeasureSince([]string{"nomad", "client", "get_client_allocs"}, time.Now()) 1023 1024 // Verify the arguments 1025 if args.NodeID == "" { 1026 return fmt.Errorf("missing node ID") 1027 } 1028 1029 // numOldAllocs is used to detect if there is a garbage collection event 1030 // that effects the node. When an allocation is garbage collected, that does 1031 // not change the modify index changes and thus the query won't unblock, 1032 // even though the set of allocations on the node has changed. 1033 var numOldAllocs int 1034 1035 // Setup the blocking query 1036 opts := blockingOptions{ 1037 queryOpts: &args.QueryOptions, 1038 queryMeta: &reply.QueryMeta, 1039 run: func(ws memdb.WatchSet, state *state.StateStore) error { 1040 // Look for the node 1041 node, err := state.NodeByID(ws, args.NodeID) 1042 if err != nil { 1043 return err 1044 } 1045 1046 var allocs []*structs.Allocation 1047 if node != nil { 1048 if args.SecretID == "" { 1049 return fmt.Errorf("missing node secret ID for client status update") 1050 } else if args.SecretID != node.SecretID { 1051 return fmt.Errorf("node secret ID does not match") 1052 } 1053 1054 // We have a valid node connection, so add the mapping to cache the 1055 // connection and allow the server to send RPCs to the client. We only cache 1056 // the connection if it is not being forwarded from another server. 1057 if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() { 1058 n.ctx.NodeID = args.NodeID 1059 n.srv.addNodeConn(n.ctx) 1060 } 1061 1062 var err error 1063 allocs, err = state.AllocsByNode(ws, args.NodeID) 1064 if err != nil { 1065 return err 1066 } 1067 } 1068 1069 reply.Allocs = make(map[string]uint64) 1070 reply.MigrateTokens = make(map[string]string) 1071 1072 // preferTableIndex is used to determine whether we should build the 1073 // response index based on the full table indexes versus the modify 1074 // indexes of the allocations on the specific node. This is 1075 // preferred in the case that the node doesn't yet have allocations 1076 // or when we detect a GC that effects the node. 1077 preferTableIndex := true 1078 1079 // Setup the output 1080 if numAllocs := len(allocs); numAllocs != 0 { 1081 preferTableIndex = false 1082 1083 for _, alloc := range allocs { 1084 reply.Allocs[alloc.ID] = alloc.AllocModifyIndex 1085 1086 // If the allocation is going to do a migration, create a 1087 // migration token so that the client can authenticate with 1088 // the node hosting the previous allocation. 1089 if alloc.ShouldMigrate() { 1090 prevAllocation, err := state.AllocByID(ws, alloc.PreviousAllocation) 1091 if err != nil { 1092 return err 1093 } 1094 1095 if prevAllocation != nil && prevAllocation.NodeID != alloc.NodeID { 1096 allocNode, err := state.NodeByID(ws, prevAllocation.NodeID) 1097 if err != nil { 1098 return err 1099 } 1100 if allocNode == nil { 1101 // Node must have been GC'd so skip the token 1102 continue 1103 } 1104 1105 token, err := structs.GenerateMigrateToken(prevAllocation.ID, allocNode.SecretID) 1106 if err != nil { 1107 return err 1108 } 1109 reply.MigrateTokens[alloc.ID] = token 1110 } 1111 } 1112 1113 reply.Index = maxUint64(reply.Index, alloc.ModifyIndex) 1114 } 1115 1116 // Determine if we have less allocations than before. This 1117 // indicates there was a garbage collection 1118 if numAllocs < numOldAllocs { 1119 preferTableIndex = true 1120 } 1121 1122 // Store the new number of allocations 1123 numOldAllocs = numAllocs 1124 } 1125 1126 if preferTableIndex { 1127 // Use the last index that affected the nodes table 1128 index, err := state.Index("allocs") 1129 if err != nil { 1130 return err 1131 } 1132 1133 // Must provide non-zero index to prevent blocking 1134 // Index 1 is impossible anyways (due to Raft internals) 1135 if index == 0 { 1136 reply.Index = 1 1137 } else { 1138 reply.Index = index 1139 } 1140 } 1141 return nil 1142 }} 1143 return n.srv.blockingRPC(&opts) 1144 } 1145 1146 // UpdateAlloc is used to update the client status of an allocation. It should 1147 // only be called by clients. 1148 // 1149 // Clients must first register and heartbeat successfully before they are able 1150 // to call this method. 1151 func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.GenericResponse) error { 1152 // Ensure the connection was initiated by another client if TLS is used. 1153 err := validateTLSCertificateLevel(n.srv, n.ctx, tlsCertificateLevelClient) 1154 if err != nil { 1155 return err 1156 } 1157 1158 if done, err := n.srv.forward("Node.UpdateAlloc", args, args, reply); done { 1159 return err 1160 } 1161 defer metrics.MeasureSince([]string{"nomad", "client", "update_alloc"}, time.Now()) 1162 1163 // Ensure at least a single alloc 1164 if len(args.Alloc) == 0 { 1165 return fmt.Errorf("must update at least one allocation") 1166 } 1167 1168 // Ensure the node is allowed to update allocs. 1169 // The node needs to successfully heartbeat before updating its allocs. 1170 nodeID := args.Alloc[0].NodeID 1171 if nodeID == "" { 1172 return fmt.Errorf("missing node ID") 1173 } 1174 1175 node, err := n.srv.State().NodeByID(nil, nodeID) 1176 if err != nil { 1177 return fmt.Errorf("failed to retrieve node %s: %v", nodeID, err) 1178 } 1179 if node == nil { 1180 return fmt.Errorf("node %s not found", nodeID) 1181 } 1182 if node.Status != structs.NodeStatusReady { 1183 return fmt.Errorf("node %s is %s, not %s", nodeID, node.Status, structs.NodeStatusReady) 1184 } 1185 1186 // Ensure that evals aren't set from client RPCs 1187 // We create them here before the raft update 1188 if len(args.Evals) != 0 { 1189 return fmt.Errorf("evals field must not be set") 1190 } 1191 1192 // Update modified timestamp for client initiated allocation updates 1193 now := time.Now() 1194 var evals []*structs.Evaluation 1195 1196 for _, allocToUpdate := range args.Alloc { 1197 evalTriggerBy := "" 1198 allocToUpdate.ModifyTime = now.UTC().UnixNano() 1199 1200 alloc, _ := n.srv.State().AllocByID(nil, allocToUpdate.ID) 1201 if alloc == nil { 1202 continue 1203 } 1204 1205 if !allocToUpdate.TerminalStatus() && alloc.ClientStatus != structs.AllocClientStatusUnknown { 1206 continue 1207 } 1208 1209 var job *structs.Job 1210 var jobType string 1211 var jobPriority int 1212 1213 job, err = n.srv.State().JobByID(nil, alloc.Namespace, alloc.JobID) 1214 if err != nil { 1215 n.logger.Debug("UpdateAlloc unable to find job", "job", alloc.JobID, "error", err) 1216 continue 1217 } 1218 1219 // If the job is nil it means it has been de-registered. 1220 if job == nil { 1221 jobType = alloc.Job.Type 1222 jobPriority = alloc.Job.Priority 1223 evalTriggerBy = structs.EvalTriggerJobDeregister 1224 allocToUpdate.DesiredStatus = structs.AllocDesiredStatusStop 1225 n.logger.Debug("UpdateAlloc unable to find job - shutting down alloc", "job", alloc.JobID) 1226 } 1227 1228 var taskGroup *structs.TaskGroup 1229 if job != nil { 1230 jobType = job.Type 1231 jobPriority = job.Priority 1232 taskGroup = job.LookupTaskGroup(alloc.TaskGroup) 1233 } 1234 1235 // If we cannot find the task group for a failed alloc we cannot continue, unless it is an orphan. 1236 if evalTriggerBy != structs.EvalTriggerJobDeregister && 1237 allocToUpdate.ClientStatus == structs.AllocClientStatusFailed && 1238 alloc.FollowupEvalID == "" { 1239 1240 if taskGroup == nil { 1241 n.logger.Debug("UpdateAlloc unable to find task group for job", "job", alloc.JobID, "alloc", alloc.ID, "task_group", alloc.TaskGroup) 1242 continue 1243 } 1244 1245 // Set trigger by failed if not an orphan. 1246 if alloc.RescheduleEligible(taskGroup.ReschedulePolicy, now) { 1247 evalTriggerBy = structs.EvalTriggerRetryFailedAlloc 1248 } 1249 } 1250 1251 var eval *structs.Evaluation 1252 // If unknown, and not an orphan, set the trigger by. 1253 if evalTriggerBy != structs.EvalTriggerJobDeregister && 1254 alloc.ClientStatus == structs.AllocClientStatusUnknown { 1255 evalTriggerBy = structs.EvalTriggerReconnect 1256 } 1257 1258 // If we weren't able to determine one of our expected eval triggers, 1259 // continue and don't create an eval. 1260 if evalTriggerBy == "" { 1261 continue 1262 } 1263 1264 eval = &structs.Evaluation{ 1265 ID: uuid.Generate(), 1266 Namespace: alloc.Namespace, 1267 TriggeredBy: evalTriggerBy, 1268 JobID: alloc.JobID, 1269 Type: jobType, 1270 Priority: jobPriority, 1271 Status: structs.EvalStatusPending, 1272 CreateTime: now.UTC().UnixNano(), 1273 ModifyTime: now.UTC().UnixNano(), 1274 } 1275 evals = append(evals, eval) 1276 } 1277 1278 // Add this to the batch 1279 n.updatesLock.Lock() 1280 n.updates = append(n.updates, args.Alloc...) 1281 n.evals = append(n.evals, evals...) 1282 1283 // Start a new batch if none 1284 future := n.updateFuture 1285 if future == nil { 1286 future = structs.NewBatchFuture() 1287 n.updateFuture = future 1288 n.updateTimer = time.AfterFunc(batchUpdateInterval, func() { 1289 // Get the pending updates 1290 n.updatesLock.Lock() 1291 updates := n.updates 1292 evals := n.evals 1293 future := n.updateFuture 1294 1295 // Assume future update patterns will be similar to 1296 // current batch and set cap appropriately to avoid 1297 // slice resizing. 1298 n.updates = make([]*structs.Allocation, 0, len(updates)) 1299 n.evals = make([]*structs.Evaluation, 0, len(evals)) 1300 1301 n.updateFuture = nil 1302 n.updateTimer = nil 1303 n.updatesLock.Unlock() 1304 1305 // Perform the batch update 1306 n.batchUpdate(future, updates, evals) 1307 }) 1308 } 1309 n.updatesLock.Unlock() 1310 1311 // Wait for the future 1312 if err := future.Wait(); err != nil { 1313 return err 1314 } 1315 1316 // Setup the response 1317 reply.Index = future.Index() 1318 return nil 1319 } 1320 1321 // batchUpdate is used to update all the allocations 1322 func (n *Node) batchUpdate(future *structs.BatchFuture, updates []*structs.Allocation, evals []*structs.Evaluation) { 1323 var mErr multierror.Error 1324 // Group pending evals by jobID to prevent creating unnecessary evals 1325 evalsByJobId := make(map[structs.NamespacedID]struct{}) 1326 var trimmedEvals []*structs.Evaluation 1327 for _, eval := range evals { 1328 namespacedID := structs.NamespacedID{ 1329 ID: eval.JobID, 1330 Namespace: eval.Namespace, 1331 } 1332 _, exists := evalsByJobId[namespacedID] 1333 if !exists { 1334 now := time.Now().UTC().UnixNano() 1335 eval.CreateTime = now 1336 eval.ModifyTime = now 1337 trimmedEvals = append(trimmedEvals, eval) 1338 evalsByJobId[namespacedID] = struct{}{} 1339 } 1340 } 1341 1342 if len(trimmedEvals) > 0 { 1343 n.logger.Debug("adding evaluations for rescheduling failed allocations", "num_evals", len(trimmedEvals)) 1344 } 1345 // Prepare the batch update 1346 batch := &structs.AllocUpdateRequest{ 1347 Alloc: updates, 1348 Evals: trimmedEvals, 1349 WriteRequest: structs.WriteRequest{Region: n.srv.config.Region}, 1350 } 1351 1352 // Commit this update via Raft 1353 _, index, err := n.srv.raftApply(structs.AllocClientUpdateRequestType, batch) 1354 if err != nil { 1355 n.logger.Error("alloc update failed", "error", err) 1356 mErr.Errors = append(mErr.Errors, err) 1357 } 1358 1359 // For each allocation we are updating, check if we should revoke any 1360 // - Vault token accessors 1361 // - Service Identity token accessors 1362 var ( 1363 revokeVault []*structs.VaultAccessor 1364 revokeSI []*structs.SITokenAccessor 1365 ) 1366 1367 for _, alloc := range updates { 1368 // Skip any allocation that isn't dead on the client 1369 if !alloc.Terminated() { 1370 continue 1371 } 1372 1373 ws := memdb.NewWatchSet() 1374 1375 // Determine if there are any orphaned Vault accessors for the allocation 1376 if accessors, err := n.srv.State().VaultAccessorsByAlloc(ws, alloc.ID); err != nil { 1377 n.logger.Error("looking up vault accessors for alloc failed", "alloc_id", alloc.ID, "error", err) 1378 mErr.Errors = append(mErr.Errors, err) 1379 } else { 1380 revokeVault = append(revokeVault, accessors...) 1381 } 1382 1383 // Determine if there are any orphaned SI accessors for the allocation 1384 if accessors, err := n.srv.State().SITokenAccessorsByAlloc(ws, alloc.ID); err != nil { 1385 n.logger.Error("looking up si accessors for alloc failed", "alloc_id", alloc.ID, "error", err) 1386 mErr.Errors = append(mErr.Errors, err) 1387 } else { 1388 revokeSI = append(revokeSI, accessors...) 1389 } 1390 } 1391 1392 // Revoke any orphaned Vault token accessors 1393 if l := len(revokeVault); l > 0 { 1394 n.logger.Debug("revoking vault accessors due to terminal allocations", "num_accessors", l) 1395 if err := n.srv.vault.RevokeTokens(context.Background(), revokeVault, true); err != nil { 1396 n.logger.Error("batched vault accessor revocation failed", "error", err) 1397 mErr.Errors = append(mErr.Errors, err) 1398 } 1399 } 1400 1401 // Revoke any orphaned SI token accessors 1402 if l := len(revokeSI); l > 0 { 1403 n.logger.Debug("revoking si accessors due to terminal allocations", "num_accessors", l) 1404 _ = n.srv.consulACLs.RevokeTokens(context.Background(), revokeSI, true) 1405 } 1406 1407 // Respond to the future 1408 future.Respond(index, mErr.ErrorOrNil()) 1409 } 1410 1411 // List is used to list the available nodes 1412 func (n *Node) List(args *structs.NodeListRequest, 1413 reply *structs.NodeListResponse) error { 1414 if done, err := n.srv.forward("Node.List", args, args, reply); done { 1415 return err 1416 } 1417 defer metrics.MeasureSince([]string{"nomad", "client", "list"}, time.Now()) 1418 1419 // Check node read permissions 1420 if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil { 1421 return err 1422 } else if aclObj != nil && !aclObj.AllowNodeRead() { 1423 return structs.ErrPermissionDenied 1424 } 1425 1426 // Set up the blocking query. 1427 opts := blockingOptions{ 1428 queryOpts: &args.QueryOptions, 1429 queryMeta: &reply.QueryMeta, 1430 run: func(ws memdb.WatchSet, state *state.StateStore) error { 1431 1432 var err error 1433 var iter memdb.ResultIterator 1434 if prefix := args.QueryOptions.Prefix; prefix != "" { 1435 iter, err = state.NodesByIDPrefix(ws, prefix) 1436 } else { 1437 iter, err = state.Nodes(ws) 1438 } 1439 if err != nil { 1440 return err 1441 } 1442 1443 // Generate the tokenizer to use for pagination using the populated 1444 // paginatorOpts object. The ID of a node must be unique within the 1445 // region, therefore we only need WithID on the paginator options. 1446 tokenizer := paginator.NewStructsTokenizer(iter, paginator.StructsTokenizerOptions{WithID: true}) 1447 1448 var nodes []*structs.NodeListStub 1449 1450 // Build the paginator. This includes the function that is 1451 // responsible for appending a node to the nodes array. 1452 paginatorImpl, err := paginator.NewPaginator(iter, tokenizer, nil, args.QueryOptions, 1453 func(raw interface{}) error { 1454 nodes = append(nodes, raw.(*structs.Node).Stub(args.Fields)) 1455 return nil 1456 }) 1457 if err != nil { 1458 return structs.NewErrRPCCodedf( 1459 http.StatusBadRequest, "failed to create result paginator: %v", err) 1460 } 1461 1462 // Calling page populates our output nodes array as well as returns 1463 // the next token. 1464 nextToken, err := paginatorImpl.Page() 1465 if err != nil { 1466 return structs.NewErrRPCCodedf( 1467 http.StatusBadRequest, "failed to read result page: %v", err) 1468 } 1469 1470 // Populate the reply. 1471 reply.Nodes = nodes 1472 reply.NextToken = nextToken 1473 1474 // Use the last index that affected the jobs table 1475 index, err := state.Index("nodes") 1476 if err != nil { 1477 return err 1478 } 1479 reply.Index = index 1480 1481 // Set the query response 1482 n.srv.setQueryMeta(&reply.QueryMeta) 1483 return nil 1484 }} 1485 return n.srv.blockingRPC(&opts) 1486 } 1487 1488 // createNodeEvals is used to create evaluations for each alloc on a node. 1489 // Each Eval is scoped to a job, so we need to potentially trigger many evals. 1490 func (n *Node) createNodeEvals(node *structs.Node, nodeIndex uint64) ([]string, uint64, error) { 1491 nodeID := node.ID 1492 1493 // Snapshot the state 1494 snap, err := n.srv.fsm.State().Snapshot() 1495 if err != nil { 1496 return nil, 0, fmt.Errorf("failed to snapshot state: %v", err) 1497 } 1498 1499 // Find all the allocations for this node 1500 allocs, err := snap.AllocsByNode(nil, nodeID) 1501 if err != nil { 1502 return nil, 0, fmt.Errorf("failed to find allocs for '%s': %v", nodeID, err) 1503 } 1504 1505 sysJobsIter, err := snap.JobsByScheduler(nil, "system") 1506 if err != nil { 1507 return nil, 0, fmt.Errorf("failed to find system jobs for '%s': %v", nodeID, err) 1508 } 1509 1510 var sysJobs []*structs.Job 1511 for jobI := sysJobsIter.Next(); jobI != nil; jobI = sysJobsIter.Next() { 1512 job := jobI.(*structs.Job) 1513 // Avoid creating evals for jobs that don't run in this 1514 // datacenter. We could perform an entire feasibility check 1515 // here, but datacenter is a good optimization to start with as 1516 // datacenter cardinality tends to be low so the check 1517 // shouldn't add much work. 1518 for _, dc := range job.Datacenters { 1519 if dc == node.Datacenter { 1520 sysJobs = append(sysJobs, job) 1521 break 1522 } 1523 } 1524 } 1525 1526 // Fast-path if nothing to do 1527 if len(allocs) == 0 && len(sysJobs) == 0 { 1528 return nil, 0, nil 1529 } 1530 1531 // Create an eval for each JobID affected 1532 var evals []*structs.Evaluation 1533 var evalIDs []string 1534 jobIDs := map[structs.NamespacedID]struct{}{} 1535 now := time.Now().UTC().UnixNano() 1536 1537 for _, alloc := range allocs { 1538 // Deduplicate on JobID 1539 if _, ok := jobIDs[alloc.JobNamespacedID()]; ok { 1540 continue 1541 } 1542 jobIDs[alloc.JobNamespacedID()] = struct{}{} 1543 1544 // Create a new eval 1545 eval := &structs.Evaluation{ 1546 ID: uuid.Generate(), 1547 Namespace: alloc.Namespace, 1548 Priority: alloc.Job.Priority, 1549 Type: alloc.Job.Type, 1550 TriggeredBy: structs.EvalTriggerNodeUpdate, 1551 JobID: alloc.JobID, 1552 NodeID: nodeID, 1553 NodeModifyIndex: nodeIndex, 1554 Status: structs.EvalStatusPending, 1555 CreateTime: now, 1556 ModifyTime: now, 1557 } 1558 1559 evals = append(evals, eval) 1560 evalIDs = append(evalIDs, eval.ID) 1561 } 1562 1563 // Create an evaluation for each system job. 1564 for _, job := range sysJobs { 1565 // Still dedup on JobID as the node may already have the system job. 1566 if _, ok := jobIDs[job.NamespacedID()]; ok { 1567 continue 1568 } 1569 jobIDs[job.NamespacedID()] = struct{}{} 1570 1571 // Create a new eval 1572 eval := &structs.Evaluation{ 1573 ID: uuid.Generate(), 1574 Namespace: job.Namespace, 1575 Priority: job.Priority, 1576 Type: job.Type, 1577 TriggeredBy: structs.EvalTriggerNodeUpdate, 1578 JobID: job.ID, 1579 NodeID: nodeID, 1580 NodeModifyIndex: nodeIndex, 1581 Status: structs.EvalStatusPending, 1582 CreateTime: now, 1583 ModifyTime: now, 1584 } 1585 evals = append(evals, eval) 1586 evalIDs = append(evalIDs, eval.ID) 1587 } 1588 1589 // Create the Raft transaction 1590 update := &structs.EvalUpdateRequest{ 1591 Evals: evals, 1592 WriteRequest: structs.WriteRequest{Region: n.srv.config.Region}, 1593 } 1594 1595 // Commit this evaluation via Raft 1596 // XXX: There is a risk of partial failure where the node update succeeds 1597 // but that the EvalUpdate does not. 1598 _, evalIndex, err := n.srv.raftApply(structs.EvalUpdateRequestType, update) 1599 if err != nil { 1600 return nil, 0, err 1601 } 1602 return evalIDs, evalIndex, nil 1603 } 1604 1605 // DeriveVaultToken is used by the clients to request wrapped Vault tokens for 1606 // tasks 1607 func (n *Node) DeriveVaultToken(args *structs.DeriveVaultTokenRequest, reply *structs.DeriveVaultTokenResponse) error { 1608 setError := func(e error, recoverable bool) { 1609 if e != nil { 1610 if re, ok := e.(*structs.RecoverableError); ok { 1611 reply.Error = re // No need to wrap if error is already a RecoverableError 1612 } else { 1613 reply.Error = structs.NewRecoverableError(e, recoverable).(*structs.RecoverableError) 1614 } 1615 n.logger.Error("DeriveVaultToken failed", "recoverable", recoverable, "error", e) 1616 } 1617 } 1618 1619 if done, err := n.srv.forward("Node.DeriveVaultToken", args, args, reply); done { 1620 setError(err, structs.IsRecoverable(err) || err == structs.ErrNoLeader) 1621 return nil 1622 } 1623 defer metrics.MeasureSince([]string{"nomad", "client", "derive_vault_token"}, time.Now()) 1624 1625 // Verify the arguments 1626 if args.NodeID == "" { 1627 setError(fmt.Errorf("missing node ID"), false) 1628 return nil 1629 } 1630 if args.SecretID == "" { 1631 setError(fmt.Errorf("missing node SecretID"), false) 1632 return nil 1633 } 1634 if args.AllocID == "" { 1635 setError(fmt.Errorf("missing allocation ID"), false) 1636 return nil 1637 } 1638 if len(args.Tasks) == 0 { 1639 setError(fmt.Errorf("no tasks specified"), false) 1640 return nil 1641 } 1642 1643 // Verify the following: 1644 // * The Node exists and has the correct SecretID 1645 // * The Allocation exists on the specified Node 1646 // * The Allocation contains the given tasks and they each require Vault 1647 // tokens 1648 snap, err := n.srv.fsm.State().Snapshot() 1649 if err != nil { 1650 setError(err, false) 1651 return nil 1652 } 1653 ws := memdb.NewWatchSet() 1654 node, err := snap.NodeByID(ws, args.NodeID) 1655 if err != nil { 1656 setError(err, false) 1657 return nil 1658 } 1659 if node == nil { 1660 setError(fmt.Errorf("Node %q does not exist", args.NodeID), false) 1661 return nil 1662 } 1663 if node.SecretID != args.SecretID { 1664 setError(fmt.Errorf("SecretID mismatch"), false) 1665 return nil 1666 } 1667 1668 alloc, err := snap.AllocByID(ws, args.AllocID) 1669 if err != nil { 1670 setError(err, false) 1671 return nil 1672 } 1673 if alloc == nil { 1674 setError(fmt.Errorf("Allocation %q does not exist", args.AllocID), false) 1675 return nil 1676 } 1677 if alloc.NodeID != args.NodeID { 1678 setError(fmt.Errorf("Allocation %q not running on Node %q", args.AllocID, args.NodeID), false) 1679 return nil 1680 } 1681 if alloc.TerminalStatus() { 1682 setError(fmt.Errorf("Can't request Vault token for terminal allocation"), false) 1683 return nil 1684 } 1685 1686 // Check if alloc has Vault 1687 vaultBlocks := alloc.Job.Vault() 1688 if vaultBlocks == nil { 1689 setError(fmt.Errorf("Job does not require Vault token"), false) 1690 return nil 1691 } 1692 tg, ok := vaultBlocks[alloc.TaskGroup] 1693 if !ok { 1694 setError(fmt.Errorf("Task group does not require Vault token"), false) 1695 return nil 1696 } 1697 1698 var unneeded []string 1699 for _, task := range args.Tasks { 1700 taskVault := tg[task] 1701 if taskVault == nil || len(taskVault.Policies) == 0 { 1702 unneeded = append(unneeded, task) 1703 } 1704 } 1705 1706 if len(unneeded) != 0 { 1707 e := fmt.Errorf("Requested Vault tokens for tasks without defined Vault policies: %s", 1708 strings.Join(unneeded, ", ")) 1709 setError(e, false) 1710 return nil 1711 } 1712 1713 // At this point the request is valid and we should contact Vault for 1714 // tokens. 1715 1716 // Create an error group where we will spin up a fixed set of goroutines to 1717 // handle deriving tokens but where if any fails the whole group is 1718 // canceled. 1719 g, ctx := errgroup.WithContext(context.Background()) 1720 1721 // Cap the handlers 1722 handlers := len(args.Tasks) 1723 if handlers > maxParallelRequestsPerDerive { 1724 handlers = maxParallelRequestsPerDerive 1725 } 1726 1727 // Create the Vault Tokens 1728 input := make(chan string, handlers) 1729 results := make(map[string]*vapi.Secret, len(args.Tasks)) 1730 for i := 0; i < handlers; i++ { 1731 g.Go(func() error { 1732 for { 1733 select { 1734 case task, ok := <-input: 1735 if !ok { 1736 return nil 1737 } 1738 1739 secret, err := n.srv.vault.CreateToken(ctx, alloc, task) 1740 if err != nil { 1741 return err 1742 } 1743 1744 results[task] = secret 1745 case <-ctx.Done(): 1746 return nil 1747 } 1748 } 1749 }) 1750 } 1751 1752 // Send the input 1753 go func() { 1754 defer close(input) 1755 for _, task := range args.Tasks { 1756 select { 1757 case <-ctx.Done(): 1758 return 1759 case input <- task: 1760 } 1761 } 1762 }() 1763 1764 // Wait for everything to complete or for an error 1765 createErr := g.Wait() 1766 1767 // Retrieve the results 1768 accessors := make([]*structs.VaultAccessor, 0, len(results)) 1769 tokens := make(map[string]string, len(results)) 1770 for task, secret := range results { 1771 w := secret.WrapInfo 1772 tokens[task] = w.Token 1773 accessor := &structs.VaultAccessor{ 1774 Accessor: w.WrappedAccessor, 1775 Task: task, 1776 NodeID: alloc.NodeID, 1777 AllocID: alloc.ID, 1778 CreationTTL: w.TTL, 1779 } 1780 1781 accessors = append(accessors, accessor) 1782 } 1783 1784 // If there was an error revoke the created tokens 1785 if createErr != nil { 1786 n.logger.Error("Vault token creation for alloc failed", "alloc_id", alloc.ID, "error", createErr) 1787 1788 if revokeErr := n.srv.vault.RevokeTokens(context.Background(), accessors, false); revokeErr != nil { 1789 n.logger.Error("Vault token revocation for alloc failed", "alloc_id", alloc.ID, "error", revokeErr) 1790 } 1791 1792 if rerr, ok := createErr.(*structs.RecoverableError); ok { 1793 reply.Error = rerr 1794 } else { 1795 reply.Error = structs.NewRecoverableError(createErr, false).(*structs.RecoverableError) 1796 } 1797 1798 return nil 1799 } 1800 1801 // Commit to Raft before returning any of the tokens 1802 req := structs.VaultAccessorsRequest{Accessors: accessors} 1803 _, index, err := n.srv.raftApply(structs.VaultAccessorRegisterRequestType, &req) 1804 if err != nil { 1805 n.logger.Error("registering Vault accessors for alloc failed", "alloc_id", alloc.ID, "error", err) 1806 1807 // Determine if we can recover from the error 1808 retry := false 1809 switch err { 1810 case raft.ErrNotLeader, raft.ErrLeadershipLost, raft.ErrRaftShutdown, raft.ErrEnqueueTimeout: 1811 retry = true 1812 } 1813 1814 setError(err, retry) 1815 return nil 1816 } 1817 1818 reply.Index = index 1819 reply.Tasks = tokens 1820 n.srv.setQueryMeta(&reply.QueryMeta) 1821 return nil 1822 } 1823 1824 type connectTask struct { 1825 TaskKind structs.TaskKind 1826 TaskName string 1827 } 1828 1829 func (n *Node) DeriveSIToken(args *structs.DeriveSITokenRequest, reply *structs.DeriveSITokenResponse) error { 1830 setError := func(e error, recoverable bool) { 1831 if e != nil { 1832 if re, ok := e.(*structs.RecoverableError); ok { 1833 reply.Error = re // No need to wrap if error is already a RecoverableError 1834 } else { 1835 reply.Error = structs.NewRecoverableError(e, recoverable).(*structs.RecoverableError) 1836 } 1837 n.logger.Error("DeriveSIToken failed", "recoverable", recoverable, "error", e) 1838 } 1839 } 1840 1841 if done, err := n.srv.forward("Node.DeriveSIToken", args, args, reply); done { 1842 setError(err, structs.IsRecoverable(err) || err == structs.ErrNoLeader) 1843 return nil 1844 } 1845 defer metrics.MeasureSince([]string{"nomad", "client", "derive_si_token"}, time.Now()) 1846 1847 // Verify the arguments 1848 if err := args.Validate(); err != nil { 1849 setError(err, false) 1850 return nil 1851 } 1852 1853 // Get the ClusterID 1854 clusterID, err := n.srv.ClusterID() 1855 if err != nil { 1856 setError(err, false) 1857 return nil 1858 } 1859 1860 // Verify the following: 1861 // * The Node exists and has the correct SecretID. 1862 // * The Allocation exists on the specified Node. 1863 // * The Allocation contains the given tasks, and each task requires a 1864 // SI token. 1865 1866 snap, err := n.srv.fsm.State().Snapshot() 1867 if err != nil { 1868 setError(err, false) 1869 return nil 1870 } 1871 node, err := snap.NodeByID(nil, args.NodeID) 1872 if err != nil { 1873 setError(err, false) 1874 return nil 1875 } 1876 if node == nil { 1877 setError(fmt.Errorf("Node %q does not exist", args.NodeID), false) 1878 return nil 1879 } 1880 if node.SecretID != args.SecretID { 1881 setError(errors.New("SecretID mismatch"), false) 1882 return nil 1883 } 1884 1885 alloc, err := snap.AllocByID(nil, args.AllocID) 1886 if err != nil { 1887 setError(err, false) 1888 return nil 1889 } 1890 if alloc == nil { 1891 setError(fmt.Errorf("Allocation %q does not exist", args.AllocID), false) 1892 return nil 1893 } 1894 if alloc.NodeID != args.NodeID { 1895 setError(fmt.Errorf("Allocation %q not running on node %q", args.AllocID, args.NodeID), false) 1896 return nil 1897 } 1898 if alloc.TerminalStatus() { 1899 setError(errors.New("Cannot request SI token for terminal allocation"), false) 1900 return nil 1901 } 1902 1903 // make sure task group contains at least one connect enabled service 1904 tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) 1905 if tg == nil { 1906 setError(fmt.Errorf("Allocation %q does not contain TaskGroup %q", args.AllocID, alloc.TaskGroup), false) 1907 return nil 1908 } 1909 if !tg.UsesConnect() { 1910 setError(fmt.Errorf("TaskGroup %q does not use Connect", tg.Name), false) 1911 return nil 1912 } 1913 1914 // make sure each task in args.Tasks is a connect-enabled task 1915 notConnect, tasks := connectTasks(tg, args.Tasks) 1916 if len(notConnect) > 0 { 1917 setError(fmt.Errorf( 1918 "Requested Consul Service Identity tokens for tasks that are not Connect enabled: %v", 1919 strings.Join(notConnect, ", "), 1920 ), false) 1921 } 1922 1923 // At this point the request is valid and we should contact Consul for tokens. 1924 1925 // A lot of the following is copied from DeriveVaultToken which has been 1926 // working fine for years. 1927 1928 // Create an error group where we will spin up a fixed set of goroutines to 1929 // handle deriving tokens but where if any fails the whole group is 1930 // canceled. 1931 g, ctx := errgroup.WithContext(context.Background()) 1932 1933 // Cap the worker threads 1934 numWorkers := len(args.Tasks) 1935 if numWorkers > maxParallelRequestsPerDerive { 1936 numWorkers = maxParallelRequestsPerDerive 1937 } 1938 1939 // would like to pull some of this out... 1940 1941 // Create the SI tokens from a slice of task name + connect service 1942 input := make(chan connectTask, numWorkers) 1943 results := make(map[string]*structs.SIToken, numWorkers) 1944 for i := 0; i < numWorkers; i++ { 1945 g.Go(func() error { 1946 for { 1947 select { 1948 case task, ok := <-input: 1949 if !ok { 1950 return nil 1951 } 1952 secret, err := n.srv.consulACLs.CreateToken(ctx, ServiceIdentityRequest{ 1953 ConsulNamespace: tg.Consul.GetNamespace(), 1954 TaskKind: task.TaskKind, 1955 TaskName: task.TaskName, 1956 ClusterID: clusterID, 1957 AllocID: alloc.ID, 1958 }) 1959 if err != nil { 1960 return err 1961 } 1962 results[task.TaskName] = secret 1963 case <-ctx.Done(): 1964 return nil 1965 } 1966 } 1967 }) 1968 } 1969 1970 // Send the input 1971 go func() { 1972 defer close(input) 1973 for _, connectTask := range tasks { 1974 select { 1975 case <-ctx.Done(): 1976 return 1977 case input <- connectTask: 1978 } 1979 } 1980 }() 1981 1982 // Wait for everything to complete or for an error 1983 createErr := g.Wait() 1984 1985 accessors := make([]*structs.SITokenAccessor, 0, len(results)) 1986 tokens := make(map[string]string, len(results)) 1987 for task, secret := range results { 1988 tokens[task] = secret.SecretID 1989 accessor := &structs.SITokenAccessor{ 1990 ConsulNamespace: tg.Consul.GetNamespace(), 1991 NodeID: alloc.NodeID, 1992 AllocID: alloc.ID, 1993 TaskName: task, 1994 AccessorID: secret.AccessorID, 1995 } 1996 accessors = append(accessors, accessor) 1997 } 1998 1999 // If there was an error, revoke all created tokens. These tokens have not 2000 // yet been committed to the persistent store. 2001 if createErr != nil { 2002 n.logger.Error("Consul Service Identity token creation for alloc failed", "alloc_id", alloc.ID, "error", createErr) 2003 _ = n.srv.consulACLs.RevokeTokens(context.Background(), accessors, false) 2004 2005 if recoverable, ok := createErr.(*structs.RecoverableError); ok { 2006 reply.Error = recoverable 2007 } else { 2008 reply.Error = structs.NewRecoverableError(createErr, false).(*structs.RecoverableError) 2009 } 2010 2011 return nil 2012 } 2013 2014 // Commit the derived tokens to raft before returning them 2015 requested := structs.SITokenAccessorsRequest{Accessors: accessors} 2016 _, index, err := n.srv.raftApply(structs.ServiceIdentityAccessorRegisterRequestType, &requested) 2017 if err != nil { 2018 n.logger.Error("registering Service Identity token accessors for alloc failed", "alloc_id", alloc.ID, "error", err) 2019 2020 // Determine if we can recover from the error 2021 retry := false 2022 switch err { 2023 case raft.ErrNotLeader, raft.ErrLeadershipLost, raft.ErrRaftShutdown, raft.ErrEnqueueTimeout: 2024 retry = true 2025 } 2026 setError(err, retry) 2027 return nil 2028 } 2029 2030 // We made it! Now we can set the reply. 2031 reply.Index = index 2032 reply.Tokens = tokens 2033 n.srv.setQueryMeta(&reply.QueryMeta) 2034 return nil 2035 } 2036 2037 func connectTasks(tg *structs.TaskGroup, tasks []string) ([]string, []connectTask) { 2038 var notConnect []string 2039 var usesConnect []connectTask 2040 for _, task := range tasks { 2041 tgTask := tg.LookupTask(task) 2042 if !taskUsesConnect(tgTask) { 2043 notConnect = append(notConnect, task) 2044 } else { 2045 usesConnect = append(usesConnect, connectTask{ 2046 TaskName: task, 2047 TaskKind: tgTask.Kind, 2048 }) 2049 } 2050 } 2051 return notConnect, usesConnect 2052 } 2053 2054 func taskUsesConnect(task *structs.Task) bool { 2055 if task == nil { 2056 // not even in the task group 2057 return false 2058 } 2059 return task.UsesConnect() 2060 } 2061 2062 func (n *Node) EmitEvents(args *structs.EmitNodeEventsRequest, reply *structs.EmitNodeEventsResponse) error { 2063 // Ensure the connection was initiated by another client if TLS is used. 2064 err := validateTLSCertificateLevel(n.srv, n.ctx, tlsCertificateLevelClient) 2065 if err != nil { 2066 return err 2067 } 2068 2069 if done, err := n.srv.forward("Node.EmitEvents", args, args, reply); done { 2070 return err 2071 } 2072 defer metrics.MeasureSince([]string{"nomad", "client", "emit_events"}, time.Now()) 2073 2074 if len(args.NodeEvents) == 0 { 2075 return fmt.Errorf("no node events given") 2076 } 2077 for nodeID, events := range args.NodeEvents { 2078 if len(events) == 0 { 2079 return fmt.Errorf("no node events given for node %q", nodeID) 2080 } 2081 } 2082 2083 _, index, err := n.srv.raftApply(structs.UpsertNodeEventsType, args) 2084 if err != nil { 2085 n.logger.Error("upserting node events failed", "error", err) 2086 return err 2087 } 2088 2089 reply.Index = index 2090 return nil 2091 }