gopkg.in/hashicorp/nomad.v0@v0.11.8/nomad/node_endpoint.go (about)

     1  package nomad
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"strings"
     7  	"sync"
     8  	"time"
     9  
    10  	"golang.org/x/sync/errgroup"
    11  
    12  	metrics "github.com/armon/go-metrics"
    13  	log "github.com/hashicorp/go-hclog"
    14  	memdb "github.com/hashicorp/go-memdb"
    15  	multierror "github.com/hashicorp/go-multierror"
    16  	vapi "github.com/hashicorp/vault/api"
    17  
    18  	"github.com/hashicorp/nomad/acl"
    19  	"github.com/hashicorp/nomad/helper/uuid"
    20  	"github.com/hashicorp/nomad/nomad/state"
    21  	"github.com/hashicorp/nomad/nomad/structs"
    22  	"github.com/hashicorp/raft"
    23  	"github.com/pkg/errors"
    24  )
    25  
    26  const (
    27  	// batchUpdateInterval is how long we wait to batch updates
    28  	batchUpdateInterval = 50 * time.Millisecond
    29  
    30  	// maxParallelRequestsPerDerive  is the maximum number of parallel Vault
    31  	// create token requests that may be outstanding per derive request
    32  	maxParallelRequestsPerDerive = 16
    33  
    34  	// NodeDrainEvents are the various drain messages
    35  	NodeDrainEventDrainSet      = "Node drain strategy set"
    36  	NodeDrainEventDrainDisabled = "Node drain disabled"
    37  	NodeDrainEventDrainUpdated  = "Node drain stategy updated"
    38  
    39  	// NodeEligibilityEventEligible is used when the nodes eligiblity is marked
    40  	// eligible
    41  	NodeEligibilityEventEligible = "Node marked as eligible for scheduling"
    42  
    43  	// NodeEligibilityEventIneligible is used when the nodes eligiblity is marked
    44  	// ineligible
    45  	NodeEligibilityEventIneligible = "Node marked as ineligible for scheduling"
    46  
    47  	// NodeHeartbeatEventReregistered is the message used when the node becomes
    48  	// reregistered by the heartbeat.
    49  	NodeHeartbeatEventReregistered = "Node reregistered by heartbeat"
    50  )
    51  
    52  // Node endpoint is used for client interactions
    53  type Node struct {
    54  	srv    *Server
    55  	logger log.Logger
    56  
    57  	// ctx provides context regarding the underlying connection
    58  	ctx *RPCContext
    59  
    60  	// updates holds pending client status updates for allocations
    61  	updates []*structs.Allocation
    62  
    63  	// evals holds pending rescheduling eval updates triggered by failed allocations
    64  	evals []*structs.Evaluation
    65  
    66  	// updateFuture is used to wait for the pending batch update
    67  	// to complete. This may be nil if no batch is pending.
    68  	updateFuture *structs.BatchFuture
    69  
    70  	// updateTimer is the timer that will trigger the next batch
    71  	// update, and may be nil if there is no batch pending.
    72  	updateTimer *time.Timer
    73  
    74  	// updatesLock synchronizes access to the updates list,
    75  	// the future and the timer.
    76  	updatesLock sync.Mutex
    77  }
    78  
    79  // Register is used to upsert a client that is available for scheduling
    80  func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUpdateResponse) error {
    81  	isForwarded := args.IsForwarded()
    82  	if done, err := n.srv.forward("Node.Register", args, args, reply); done {
    83  		// We have a valid node connection since there is no error from the
    84  		// forwarded server, so add the mapping to cache the
    85  		// connection and allow the server to send RPCs to the client.
    86  		if err == nil && n.ctx != nil && n.ctx.NodeID == "" && !isForwarded {
    87  			n.ctx.NodeID = args.Node.ID
    88  			n.srv.addNodeConn(n.ctx)
    89  		}
    90  
    91  		return err
    92  	}
    93  	defer metrics.MeasureSince([]string{"nomad", "client", "register"}, time.Now())
    94  
    95  	// Validate the arguments
    96  	if args.Node == nil {
    97  		return fmt.Errorf("missing node for client registration")
    98  	}
    99  	if args.Node.ID == "" {
   100  		return fmt.Errorf("missing node ID for client registration")
   101  	}
   102  	if args.Node.Datacenter == "" {
   103  		return fmt.Errorf("missing datacenter for client registration")
   104  	}
   105  	if args.Node.Name == "" {
   106  		return fmt.Errorf("missing node name for client registration")
   107  	}
   108  	if len(args.Node.Attributes) == 0 {
   109  		return fmt.Errorf("missing attributes for client registration")
   110  	}
   111  	if args.Node.SecretID == "" {
   112  		return fmt.Errorf("missing node secret ID for client registration")
   113  	}
   114  
   115  	// Default the status if none is given
   116  	if args.Node.Status == "" {
   117  		args.Node.Status = structs.NodeStatusInit
   118  	}
   119  	if !structs.ValidNodeStatus(args.Node.Status) {
   120  		return fmt.Errorf("invalid status for node")
   121  	}
   122  
   123  	// Default to eligible for scheduling if unset
   124  	if args.Node.SchedulingEligibility == "" {
   125  		args.Node.SchedulingEligibility = structs.NodeSchedulingEligible
   126  	}
   127  
   128  	// Set the timestamp when the node is registered
   129  	args.Node.StatusUpdatedAt = time.Now().Unix()
   130  
   131  	// Compute the node class
   132  	if err := args.Node.ComputeClass(); err != nil {
   133  		return fmt.Errorf("failed to computed node class: %v", err)
   134  	}
   135  
   136  	// Look for the node so we can detect a state transition
   137  	snap, err := n.srv.fsm.State().Snapshot()
   138  	if err != nil {
   139  		return err
   140  	}
   141  
   142  	ws := memdb.NewWatchSet()
   143  	originalNode, err := snap.NodeByID(ws, args.Node.ID)
   144  	if err != nil {
   145  		return err
   146  	}
   147  
   148  	// Check if the SecretID has been tampered with
   149  	if originalNode != nil {
   150  		if args.Node.SecretID != originalNode.SecretID && originalNode.SecretID != "" {
   151  			return fmt.Errorf("node secret ID does not match. Not registering node.")
   152  		}
   153  	}
   154  
   155  	// We have a valid node connection, so add the mapping to cache the
   156  	// connection and allow the server to send RPCs to the client. We only cache
   157  	// the connection if it is not being forwarded from another server.
   158  	if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() {
   159  		n.ctx.NodeID = args.Node.ID
   160  		n.srv.addNodeConn(n.ctx)
   161  	}
   162  
   163  	// Commit this update via Raft
   164  	_, index, err := n.srv.raftApply(structs.NodeRegisterRequestType, args)
   165  	if err != nil {
   166  		n.logger.Error("register failed", "error", err)
   167  		return err
   168  	}
   169  	reply.NodeModifyIndex = index
   170  
   171  	// Check if we should trigger evaluations
   172  	originalStatus := structs.NodeStatusInit
   173  	if originalNode != nil {
   174  		originalStatus = originalNode.Status
   175  	}
   176  	transitionToReady := transitionedToReady(args.Node.Status, originalStatus)
   177  	if structs.ShouldDrainNode(args.Node.Status) || transitionToReady {
   178  		evalIDs, evalIndex, err := n.createNodeEvals(args.Node.ID, index)
   179  		if err != nil {
   180  			n.logger.Error("eval creation failed", "error", err)
   181  			return err
   182  		}
   183  		reply.EvalIDs = evalIDs
   184  		reply.EvalCreateIndex = evalIndex
   185  	}
   186  
   187  	// Check if we need to setup a heartbeat
   188  	if !args.Node.TerminalStatus() {
   189  		ttl, err := n.srv.resetHeartbeatTimer(args.Node.ID)
   190  		if err != nil {
   191  			n.logger.Error("heartbeat reset failed", "error", err)
   192  			return err
   193  		}
   194  		reply.HeartbeatTTL = ttl
   195  	}
   196  
   197  	// Set the reply index
   198  	reply.Index = index
   199  	snap, err = n.srv.fsm.State().Snapshot()
   200  	if err != nil {
   201  		return err
   202  	}
   203  
   204  	n.srv.peerLock.RLock()
   205  	defer n.srv.peerLock.RUnlock()
   206  	if err := n.constructNodeServerInfoResponse(snap, reply); err != nil {
   207  		n.logger.Error("failed to populate NodeUpdateResponse", "error", err)
   208  		return err
   209  	}
   210  
   211  	return nil
   212  }
   213  
   214  // updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading.
   215  func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error {
   216  	reply.LeaderRPCAddr = string(n.srv.raft.Leader())
   217  
   218  	// Reply with config information required for future RPC requests
   219  	reply.Servers = make([]*structs.NodeServerInfo, 0, len(n.srv.localPeers))
   220  	for _, v := range n.srv.localPeers {
   221  		reply.Servers = append(reply.Servers,
   222  			&structs.NodeServerInfo{
   223  				RPCAdvertiseAddr: v.RPCAddr.String(),
   224  				RPCMajorVersion:  int32(v.MajorVersion),
   225  				RPCMinorVersion:  int32(v.MinorVersion),
   226  				Datacenter:       v.Datacenter,
   227  			})
   228  	}
   229  
   230  	// TODO(sean@): Use an indexed node count instead
   231  	//
   232  	// Snapshot is used only to iterate over all nodes to create a node
   233  	// count to send back to Nomad Clients in their heartbeat so Clients
   234  	// can estimate the size of the cluster.
   235  	ws := memdb.NewWatchSet()
   236  	iter, err := snap.Nodes(ws)
   237  	if err == nil {
   238  		for {
   239  			raw := iter.Next()
   240  			if raw == nil {
   241  				break
   242  			}
   243  			reply.NumNodes++
   244  		}
   245  	}
   246  
   247  	return nil
   248  }
   249  
   250  // Deregister is used to remove a client from the cluster. If a client should
   251  // just be made unavailable for scheduling, a status update is preferred.
   252  func (n *Node) Deregister(args *structs.NodeDeregisterRequest, reply *structs.NodeUpdateResponse) error {
   253  	if done, err := n.srv.forward("Node.Deregister", args, args, reply); done {
   254  		return err
   255  	}
   256  	defer metrics.MeasureSince([]string{"nomad", "client", "deregister"}, time.Now())
   257  
   258  	if args.NodeID == "" {
   259  		return fmt.Errorf("missing node ID for client deregistration")
   260  	}
   261  
   262  	// deregister takes a batch
   263  	repack := &structs.NodeBatchDeregisterRequest{
   264  		NodeIDs:      []string{args.NodeID},
   265  		WriteRequest: args.WriteRequest,
   266  	}
   267  
   268  	return n.deregister(repack, reply, func() (interface{}, uint64, error) {
   269  		return n.srv.raftApply(structs.NodeDeregisterRequestType, args)
   270  	})
   271  }
   272  
   273  // BatchDeregister is used to remove client nodes from the cluster.
   274  func (n *Node) BatchDeregister(args *structs.NodeBatchDeregisterRequest, reply *structs.NodeUpdateResponse) error {
   275  	if done, err := n.srv.forward("Node.BatchDeregister", args, args, reply); done {
   276  		return err
   277  	}
   278  	defer metrics.MeasureSince([]string{"nomad", "client", "batch_deregister"}, time.Now())
   279  
   280  	if len(args.NodeIDs) == 0 {
   281  		return fmt.Errorf("missing node IDs for client deregistration")
   282  	}
   283  
   284  	return n.deregister(args, reply, func() (interface{}, uint64, error) {
   285  		return n.srv.raftApply(structs.NodeBatchDeregisterRequestType, args)
   286  	})
   287  }
   288  
   289  // deregister takes a raftMessage closure, to support both Deregister and BatchDeregister
   290  func (n *Node) deregister(args *structs.NodeBatchDeregisterRequest,
   291  	reply *structs.NodeUpdateResponse,
   292  	raftApplyFn func() (interface{}, uint64, error),
   293  ) error {
   294  	// Check request permissions
   295  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   296  		return err
   297  	} else if aclObj != nil && !aclObj.AllowNodeWrite() {
   298  		return structs.ErrPermissionDenied
   299  	}
   300  
   301  	// Look for the node
   302  	snap, err := n.srv.fsm.State().Snapshot()
   303  	if err != nil {
   304  		return err
   305  	}
   306  
   307  	ws := memdb.NewWatchSet()
   308  	for _, nodeID := range args.NodeIDs {
   309  		node, err := snap.NodeByID(ws, nodeID)
   310  		if err != nil {
   311  			return err
   312  		}
   313  		if node == nil {
   314  			return fmt.Errorf("node not found")
   315  		}
   316  	}
   317  
   318  	// Commit this update via Raft
   319  	_, index, err := raftApplyFn()
   320  	if err != nil {
   321  		n.logger.Error("raft message failed", "error", err)
   322  		return err
   323  	}
   324  
   325  	for _, nodeID := range args.NodeIDs {
   326  		// Clear the heartbeat timer if any
   327  		n.srv.clearHeartbeatTimer(nodeID)
   328  
   329  		// Create the evaluations for this node
   330  		evalIDs, evalIndex, err := n.createNodeEvals(nodeID, index)
   331  		if err != nil {
   332  			n.logger.Error("eval creation failed", "error", err)
   333  			return err
   334  		}
   335  
   336  		// Determine if there are any Vault accessors on the node
   337  		if accessors, err := snap.VaultAccessorsByNode(ws, nodeID); err != nil {
   338  			n.logger.Error("looking up vault accessors for node failed", "node_id", nodeID, "error", err)
   339  			return err
   340  		} else if l := len(accessors); l > 0 {
   341  			n.logger.Debug("revoking vault accessors on node due to deregister", "num_accessors", l, "node_id", nodeID)
   342  			if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil {
   343  				n.logger.Error("revoking vault accessors for node failed", "node_id", nodeID, "error", err)
   344  				return err
   345  			}
   346  		}
   347  
   348  		// Determine if there are any SI token accessors on the node
   349  		if accessors, err := snap.SITokenAccessorsByNode(ws, nodeID); err != nil {
   350  			n.logger.Error("looking up si accessors for node failed", "node_id", nodeID, "error", err)
   351  			return err
   352  		} else if l := len(accessors); l > 0 {
   353  			n.logger.Debug("revoking si accessors on node due to deregister", "num_accessors", l, "node_id", nodeID)
   354  			// Unlike with the Vault integration, there's no error returned here, since
   355  			// bootstrapping the Consul client is elsewhere. Errors in revocation trigger
   356  			// background retry attempts rather than inline error handling.
   357  			_ = n.srv.consulACLs.RevokeTokens(context.Background(), accessors, true)
   358  		}
   359  
   360  		reply.EvalIDs = append(reply.EvalIDs, evalIDs...)
   361  		// Set the reply eval create index just the first time
   362  		if reply.EvalCreateIndex == 0 {
   363  			reply.EvalCreateIndex = evalIndex
   364  		}
   365  	}
   366  
   367  	reply.NodeModifyIndex = index
   368  	reply.Index = index
   369  	return nil
   370  }
   371  
   372  // UpdateStatus is used to update the status of a client node
   373  func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *structs.NodeUpdateResponse) error {
   374  	isForwarded := args.IsForwarded()
   375  	if done, err := n.srv.forward("Node.UpdateStatus", args, args, reply); done {
   376  		// We have a valid node connection since there is no error from the
   377  		// forwarded server, so add the mapping to cache the
   378  		// connection and allow the server to send RPCs to the client.
   379  		if err == nil && n.ctx != nil && n.ctx.NodeID == "" && !isForwarded {
   380  			n.ctx.NodeID = args.NodeID
   381  			n.srv.addNodeConn(n.ctx)
   382  		}
   383  
   384  		return err
   385  	}
   386  	defer metrics.MeasureSince([]string{"nomad", "client", "update_status"}, time.Now())
   387  
   388  	// Verify the arguments
   389  	if args.NodeID == "" {
   390  		return fmt.Errorf("missing node ID for client status update")
   391  	}
   392  	if !structs.ValidNodeStatus(args.Status) {
   393  		return fmt.Errorf("invalid status for node")
   394  	}
   395  
   396  	// Look for the node
   397  	snap, err := n.srv.fsm.State().Snapshot()
   398  	if err != nil {
   399  		return err
   400  	}
   401  
   402  	ws := memdb.NewWatchSet()
   403  	node, err := snap.NodeByID(ws, args.NodeID)
   404  	if err != nil {
   405  		return err
   406  	}
   407  	if node == nil {
   408  		return fmt.Errorf("node not found")
   409  	}
   410  
   411  	// We have a valid node connection, so add the mapping to cache the
   412  	// connection and allow the server to send RPCs to the client. We only cache
   413  	// the connection if it is not being forwarded from another server.
   414  	if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() {
   415  		n.ctx.NodeID = args.NodeID
   416  		n.srv.addNodeConn(n.ctx)
   417  	}
   418  
   419  	// XXX: Could use the SecretID here but have to update the heartbeat system
   420  	// to track SecretIDs.
   421  
   422  	// Update the timestamp of when the node status was updated
   423  	args.UpdatedAt = time.Now().Unix()
   424  
   425  	// Commit this update via Raft
   426  	var index uint64
   427  	if node.Status != args.Status {
   428  		// Attach an event if we are updating the node status to ready when it
   429  		// is down via a heartbeat
   430  		if node.Status == structs.NodeStatusDown && args.NodeEvent == nil {
   431  			args.NodeEvent = structs.NewNodeEvent().
   432  				SetSubsystem(structs.NodeEventSubsystemCluster).
   433  				SetMessage(NodeHeartbeatEventReregistered)
   434  		}
   435  
   436  		_, index, err = n.srv.raftApply(structs.NodeUpdateStatusRequestType, args)
   437  		if err != nil {
   438  			n.logger.Error("status update failed", "error", err)
   439  			return err
   440  		}
   441  		reply.NodeModifyIndex = index
   442  	}
   443  
   444  	// Check if we should trigger evaluations
   445  	transitionToReady := transitionedToReady(args.Status, node.Status)
   446  	if structs.ShouldDrainNode(args.Status) || transitionToReady {
   447  		evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
   448  		if err != nil {
   449  			n.logger.Error("eval creation failed", "error", err)
   450  			return err
   451  		}
   452  		reply.EvalIDs = evalIDs
   453  		reply.EvalCreateIndex = evalIndex
   454  	}
   455  
   456  	// Check if we need to setup a heartbeat
   457  	switch args.Status {
   458  	case structs.NodeStatusDown:
   459  		// Determine if there are any Vault accessors on the node to cleanup
   460  		if accessors, err := n.srv.State().VaultAccessorsByNode(ws, args.NodeID); err != nil {
   461  			n.logger.Error("looking up vault accessors for node failed", "node_id", args.NodeID, "error", err)
   462  			return err
   463  		} else if l := len(accessors); l > 0 {
   464  			n.logger.Debug("revoking vault accessors on node due to down state", "num_accessors", l, "node_id", args.NodeID)
   465  			if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil {
   466  				n.logger.Error("revoking vault accessors for node failed", "node_id", args.NodeID, "error", err)
   467  				return err
   468  			}
   469  		}
   470  
   471  		// Determine if there are any SI token accessors on the node to cleanup
   472  		if accessors, err := n.srv.State().SITokenAccessorsByNode(ws, args.NodeID); err != nil {
   473  			n.logger.Error("looking up SI accessors for node failed", "node_id", args.NodeID, "error", err)
   474  			return err
   475  		} else if l := len(accessors); l > 0 {
   476  			n.logger.Debug("revoking SI accessors on node due to down state", "num_accessors", l, "node_id", args.NodeID)
   477  			_ = n.srv.consulACLs.RevokeTokens(context.Background(), accessors, true)
   478  		}
   479  	default:
   480  		ttl, err := n.srv.resetHeartbeatTimer(args.NodeID)
   481  		if err != nil {
   482  			n.logger.Error("heartbeat reset failed", "error", err)
   483  			return err
   484  		}
   485  		reply.HeartbeatTTL = ttl
   486  	}
   487  
   488  	// Set the reply index and leader
   489  	reply.Index = index
   490  	n.srv.peerLock.RLock()
   491  	defer n.srv.peerLock.RUnlock()
   492  	if err := n.constructNodeServerInfoResponse(snap, reply); err != nil {
   493  		n.logger.Error("failed to populate NodeUpdateResponse", "error", err)
   494  		return err
   495  	}
   496  
   497  	return nil
   498  }
   499  
   500  // transitionedToReady is a helper that takes a nodes new and old status and
   501  // returns whether it has transitioned to ready.
   502  func transitionedToReady(newStatus, oldStatus string) bool {
   503  	initToReady := oldStatus == structs.NodeStatusInit && newStatus == structs.NodeStatusReady
   504  	terminalToReady := oldStatus == structs.NodeStatusDown && newStatus == structs.NodeStatusReady
   505  	return initToReady || terminalToReady
   506  }
   507  
   508  // UpdateDrain is used to update the drain mode of a client node
   509  func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest,
   510  	reply *structs.NodeDrainUpdateResponse) error {
   511  	if done, err := n.srv.forward("Node.UpdateDrain", args, args, reply); done {
   512  		return err
   513  	}
   514  	defer metrics.MeasureSince([]string{"nomad", "client", "update_drain"}, time.Now())
   515  
   516  	// Check node write permissions
   517  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   518  		return err
   519  	} else if aclObj != nil && !aclObj.AllowNodeWrite() {
   520  		return structs.ErrPermissionDenied
   521  	}
   522  
   523  	// Verify the arguments
   524  	if args.NodeID == "" {
   525  		return fmt.Errorf("missing node ID for drain update")
   526  	}
   527  	if args.NodeEvent != nil {
   528  		return fmt.Errorf("node event must not be set")
   529  	}
   530  
   531  	// Look for the node
   532  	snap, err := n.srv.fsm.State().Snapshot()
   533  	if err != nil {
   534  		return err
   535  	}
   536  	node, err := snap.NodeByID(nil, args.NodeID)
   537  	if err != nil {
   538  		return err
   539  	}
   540  	if node == nil {
   541  		return fmt.Errorf("node not found")
   542  	}
   543  
   544  	now := time.Now().UTC()
   545  
   546  	// Update the timestamp of when the node status was updated
   547  	args.UpdatedAt = now.Unix()
   548  
   549  	// COMPAT: Remove in 0.9. Attempt to upgrade the request if it is of the old
   550  	// format.
   551  	if args.Drain && args.DrainStrategy == nil {
   552  		args.DrainStrategy = &structs.DrainStrategy{
   553  			DrainSpec: structs.DrainSpec{
   554  				Deadline: -1 * time.Second, // Force drain
   555  			},
   556  		}
   557  	}
   558  
   559  	// Setup drain strategy
   560  	if args.DrainStrategy != nil {
   561  		// Mark start time for the drain
   562  		if node.DrainStrategy == nil {
   563  			args.DrainStrategy.StartedAt = now
   564  		} else {
   565  			args.DrainStrategy.StartedAt = node.DrainStrategy.StartedAt
   566  		}
   567  
   568  		// Mark the deadline time
   569  		if args.DrainStrategy.Deadline.Nanoseconds() > 0 {
   570  			args.DrainStrategy.ForceDeadline = now.Add(args.DrainStrategy.Deadline)
   571  		}
   572  	}
   573  
   574  	// Construct the node event
   575  	args.NodeEvent = structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemDrain)
   576  	if node.DrainStrategy == nil && args.DrainStrategy != nil {
   577  		args.NodeEvent.SetMessage(NodeDrainEventDrainSet)
   578  	} else if node.DrainStrategy != nil && args.DrainStrategy != nil {
   579  		args.NodeEvent.SetMessage(NodeDrainEventDrainUpdated)
   580  	} else if node.DrainStrategy != nil && args.DrainStrategy == nil {
   581  		args.NodeEvent.SetMessage(NodeDrainEventDrainDisabled)
   582  	} else {
   583  		args.NodeEvent = nil
   584  	}
   585  
   586  	// Commit this update via Raft
   587  	_, index, err := n.srv.raftApply(structs.NodeUpdateDrainRequestType, args)
   588  	if err != nil {
   589  		n.logger.Error("drain update failed", "error", err)
   590  		return err
   591  	}
   592  	reply.NodeModifyIndex = index
   593  
   594  	// If the node is transitioning to be eligible, create Node evaluations
   595  	// because there may be a System job registered that should be evaluated.
   596  	if node.SchedulingEligibility == structs.NodeSchedulingIneligible && args.MarkEligible && args.DrainStrategy == nil {
   597  		evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
   598  		if err != nil {
   599  			n.logger.Error("eval creation failed", "error", err)
   600  			return err
   601  		}
   602  		reply.EvalIDs = evalIDs
   603  		reply.EvalCreateIndex = evalIndex
   604  	}
   605  
   606  	// Set the reply index
   607  	reply.Index = index
   608  	return nil
   609  }
   610  
   611  // UpdateEligibility is used to update the scheduling eligibility of a node
   612  func (n *Node) UpdateEligibility(args *structs.NodeUpdateEligibilityRequest,
   613  	reply *structs.NodeEligibilityUpdateResponse) error {
   614  	if done, err := n.srv.forward("Node.UpdateEligibility", args, args, reply); done {
   615  		return err
   616  	}
   617  	defer metrics.MeasureSince([]string{"nomad", "client", "update_eligibility"}, time.Now())
   618  
   619  	// Check node write permissions
   620  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   621  		return err
   622  	} else if aclObj != nil && !aclObj.AllowNodeWrite() {
   623  		return structs.ErrPermissionDenied
   624  	}
   625  
   626  	// Verify the arguments
   627  	if args.NodeID == "" {
   628  		return fmt.Errorf("missing node ID for setting scheduling eligibility")
   629  	}
   630  	if args.NodeEvent != nil {
   631  		return fmt.Errorf("node event must not be set")
   632  	}
   633  
   634  	// Check that only allowed types are set
   635  	switch args.Eligibility {
   636  	case structs.NodeSchedulingEligible, structs.NodeSchedulingIneligible:
   637  	default:
   638  		return fmt.Errorf("invalid scheduling eligibility %q", args.Eligibility)
   639  	}
   640  
   641  	// Look for the node
   642  	snap, err := n.srv.fsm.State().Snapshot()
   643  	if err != nil {
   644  		return err
   645  	}
   646  	node, err := snap.NodeByID(nil, args.NodeID)
   647  	if err != nil {
   648  		return err
   649  	}
   650  	if node == nil {
   651  		return fmt.Errorf("node not found")
   652  	}
   653  
   654  	if node.DrainStrategy != nil && args.Eligibility == structs.NodeSchedulingEligible {
   655  		return fmt.Errorf("can not set node's scheduling eligibility to eligible while it is draining")
   656  	}
   657  
   658  	switch args.Eligibility {
   659  	case structs.NodeSchedulingEligible, structs.NodeSchedulingIneligible:
   660  	default:
   661  		return fmt.Errorf("invalid scheduling eligibility %q", args.Eligibility)
   662  	}
   663  
   664  	// Update the timestamp of when the node status was updated
   665  	args.UpdatedAt = time.Now().Unix()
   666  
   667  	// Construct the node event
   668  	args.NodeEvent = structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster)
   669  	if node.SchedulingEligibility == args.Eligibility {
   670  		return nil // Nothing to do
   671  	} else if args.Eligibility == structs.NodeSchedulingEligible {
   672  		args.NodeEvent.SetMessage(NodeEligibilityEventEligible)
   673  	} else {
   674  		args.NodeEvent.SetMessage(NodeEligibilityEventIneligible)
   675  	}
   676  
   677  	// Commit this update via Raft
   678  	outErr, index, err := n.srv.raftApply(structs.NodeUpdateEligibilityRequestType, args)
   679  	if err != nil {
   680  		n.logger.Error("eligibility update failed", "error", err)
   681  		return err
   682  	}
   683  	if outErr != nil {
   684  		if err, ok := outErr.(error); ok && err != nil {
   685  			n.logger.Error("eligibility update failed", "error", err)
   686  			return err
   687  		}
   688  	}
   689  
   690  	// If the node is transitioning to be eligible, create Node evaluations
   691  	// because there may be a System job registered that should be evaluated.
   692  	if node.SchedulingEligibility == structs.NodeSchedulingIneligible && args.Eligibility == structs.NodeSchedulingEligible {
   693  		evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
   694  		if err != nil {
   695  			n.logger.Error("eval creation failed", "error", err)
   696  			return err
   697  		}
   698  		reply.EvalIDs = evalIDs
   699  		reply.EvalCreateIndex = evalIndex
   700  	}
   701  
   702  	// Set the reply index
   703  	reply.Index = index
   704  	return nil
   705  }
   706  
   707  // Evaluate is used to force a re-evaluation of the node
   708  func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUpdateResponse) error {
   709  	if done, err := n.srv.forward("Node.Evaluate", args, args, reply); done {
   710  		return err
   711  	}
   712  	defer metrics.MeasureSince([]string{"nomad", "client", "evaluate"}, time.Now())
   713  
   714  	// Check node write permissions
   715  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   716  		return err
   717  	} else if aclObj != nil && !aclObj.AllowNodeWrite() {
   718  		return structs.ErrPermissionDenied
   719  	}
   720  
   721  	// Verify the arguments
   722  	if args.NodeID == "" {
   723  		return fmt.Errorf("missing node ID for evaluation")
   724  	}
   725  
   726  	// Look for the node
   727  	snap, err := n.srv.fsm.State().Snapshot()
   728  	if err != nil {
   729  		return err
   730  	}
   731  	ws := memdb.NewWatchSet()
   732  	node, err := snap.NodeByID(ws, args.NodeID)
   733  	if err != nil {
   734  		return err
   735  	}
   736  	if node == nil {
   737  		return fmt.Errorf("node not found")
   738  	}
   739  
   740  	// Create the evaluation
   741  	evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, node.ModifyIndex)
   742  	if err != nil {
   743  		n.logger.Error("eval creation failed", "error", err)
   744  		return err
   745  	}
   746  	reply.EvalIDs = evalIDs
   747  	reply.EvalCreateIndex = evalIndex
   748  
   749  	// Set the reply index
   750  	reply.Index = evalIndex
   751  
   752  	n.srv.peerLock.RLock()
   753  	defer n.srv.peerLock.RUnlock()
   754  	if err := n.constructNodeServerInfoResponse(snap, reply); err != nil {
   755  		n.logger.Error("failed to populate NodeUpdateResponse", "error", err)
   756  		return err
   757  	}
   758  	return nil
   759  }
   760  
   761  // GetNode is used to request information about a specific node
   762  func (n *Node) GetNode(args *structs.NodeSpecificRequest,
   763  	reply *structs.SingleNodeResponse) error {
   764  	if done, err := n.srv.forward("Node.GetNode", args, args, reply); done {
   765  		return err
   766  	}
   767  	defer metrics.MeasureSince([]string{"nomad", "client", "get_node"}, time.Now())
   768  
   769  	// Check node read permissions
   770  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   771  		// If ResolveToken had an unexpected error return that
   772  		if err != structs.ErrTokenNotFound {
   773  			return err
   774  		}
   775  
   776  		// Attempt to lookup AuthToken as a Node.SecretID since nodes
   777  		// call this endpoint and don't have an ACL token.
   778  		node, stateErr := n.srv.fsm.State().NodeBySecretID(nil, args.AuthToken)
   779  		if stateErr != nil {
   780  			// Return the original ResolveToken error with this err
   781  			var merr multierror.Error
   782  			merr.Errors = append(merr.Errors, err, stateErr)
   783  			return merr.ErrorOrNil()
   784  		}
   785  
   786  		// Not a node or a valid ACL token
   787  		if node == nil {
   788  			return structs.ErrTokenNotFound
   789  		}
   790  	} else if aclObj != nil && !aclObj.AllowNodeRead() {
   791  		return structs.ErrPermissionDenied
   792  	}
   793  
   794  	// Setup the blocking query
   795  	opts := blockingOptions{
   796  		queryOpts: &args.QueryOptions,
   797  		queryMeta: &reply.QueryMeta,
   798  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   799  			// Verify the arguments
   800  			if args.NodeID == "" {
   801  				return fmt.Errorf("missing node ID")
   802  			}
   803  
   804  			// Look for the node
   805  			out, err := state.NodeByID(ws, args.NodeID)
   806  			if err != nil {
   807  				return err
   808  			}
   809  
   810  			// Setup the output
   811  			if out != nil {
   812  				// Clear the secret ID
   813  				reply.Node = out.Copy()
   814  				reply.Node.SecretID = ""
   815  				reply.Index = out.ModifyIndex
   816  			} else {
   817  				// Use the last index that affected the nodes table
   818  				index, err := state.Index("nodes")
   819  				if err != nil {
   820  					return err
   821  				}
   822  				reply.Node = nil
   823  				reply.Index = index
   824  			}
   825  
   826  			// Set the query response
   827  			n.srv.setQueryMeta(&reply.QueryMeta)
   828  			return nil
   829  		}}
   830  	return n.srv.blockingRPC(&opts)
   831  }
   832  
   833  // GetAllocs is used to request allocations for a specific node
   834  func (n *Node) GetAllocs(args *structs.NodeSpecificRequest,
   835  	reply *structs.NodeAllocsResponse) error {
   836  	if done, err := n.srv.forward("Node.GetAllocs", args, args, reply); done {
   837  		return err
   838  	}
   839  	defer metrics.MeasureSince([]string{"nomad", "client", "get_allocs"}, time.Now())
   840  
   841  	// Check node read and namespace job read permissions
   842  	aclObj, err := n.srv.ResolveToken(args.AuthToken)
   843  	if err != nil {
   844  		return err
   845  	}
   846  	if aclObj != nil && !aclObj.AllowNodeRead() {
   847  		return structs.ErrPermissionDenied
   848  	}
   849  
   850  	// cache namespace perms
   851  	readableNamespaces := map[string]bool{}
   852  
   853  	// readNS is a caching namespace read-job helper
   854  	readNS := func(ns string) bool {
   855  		if aclObj == nil {
   856  			// ACLs are disabled; everything is readable
   857  			return true
   858  		}
   859  
   860  		if readable, ok := readableNamespaces[ns]; ok {
   861  			// cache hit
   862  			return readable
   863  		}
   864  
   865  		// cache miss
   866  		readable := aclObj.AllowNsOp(ns, acl.NamespaceCapabilityReadJob)
   867  		readableNamespaces[ns] = readable
   868  		return readable
   869  	}
   870  
   871  	// Verify the arguments
   872  	if args.NodeID == "" {
   873  		return fmt.Errorf("missing node ID")
   874  	}
   875  
   876  	// Setup the blocking query
   877  	opts := blockingOptions{
   878  		queryOpts: &args.QueryOptions,
   879  		queryMeta: &reply.QueryMeta,
   880  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   881  			// Look for the node
   882  			allocs, err := state.AllocsByNode(ws, args.NodeID)
   883  			if err != nil {
   884  				return err
   885  			}
   886  
   887  			// Setup the output
   888  			if n := len(allocs); n != 0 {
   889  				reply.Allocs = make([]*structs.Allocation, 0, n)
   890  				for _, alloc := range allocs {
   891  					if readNS(alloc.Namespace) {
   892  						reply.Allocs = append(reply.Allocs, alloc)
   893  					}
   894  
   895  					// Get the max of all allocs since
   896  					// subsequent requests need to start
   897  					// from the latest index
   898  					reply.Index = maxUint64(reply.Index, alloc.ModifyIndex)
   899  				}
   900  			} else {
   901  				reply.Allocs = nil
   902  
   903  				// Use the last index that affected the nodes table
   904  				index, err := state.Index("allocs")
   905  				if err != nil {
   906  					return err
   907  				}
   908  
   909  				// Must provide non-zero index to prevent blocking
   910  				// Index 1 is impossible anyways (due to Raft internals)
   911  				if index == 0 {
   912  					reply.Index = 1
   913  				} else {
   914  					reply.Index = index
   915  				}
   916  			}
   917  			return nil
   918  		}}
   919  	return n.srv.blockingRPC(&opts)
   920  }
   921  
   922  // GetClientAllocs is used to request a lightweight list of alloc modify indexes
   923  // per allocation.
   924  func (n *Node) GetClientAllocs(args *structs.NodeSpecificRequest,
   925  	reply *structs.NodeClientAllocsResponse) error {
   926  	isForwarded := args.IsForwarded()
   927  	if done, err := n.srv.forward("Node.GetClientAllocs", args, args, reply); done {
   928  		// We have a valid node connection since there is no error from the
   929  		// forwarded server, so add the mapping to cache the
   930  		// connection and allow the server to send RPCs to the client.
   931  		if err == nil && n.ctx != nil && n.ctx.NodeID == "" && !isForwarded {
   932  			n.ctx.NodeID = args.NodeID
   933  			n.srv.addNodeConn(n.ctx)
   934  		}
   935  
   936  		return err
   937  	}
   938  	defer metrics.MeasureSince([]string{"nomad", "client", "get_client_allocs"}, time.Now())
   939  
   940  	// Verify the arguments
   941  	if args.NodeID == "" {
   942  		return fmt.Errorf("missing node ID")
   943  	}
   944  
   945  	// numOldAllocs is used to detect if there is a garbage collection event
   946  	// that effects the node. When an allocation is garbage collected, that does
   947  	// not change the modify index changes and thus the query won't unblock,
   948  	// even though the set of allocations on the node has changed.
   949  	var numOldAllocs int
   950  
   951  	// Setup the blocking query
   952  	opts := blockingOptions{
   953  		queryOpts: &args.QueryOptions,
   954  		queryMeta: &reply.QueryMeta,
   955  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   956  			// Look for the node
   957  			node, err := state.NodeByID(ws, args.NodeID)
   958  			if err != nil {
   959  				return err
   960  			}
   961  
   962  			var allocs []*structs.Allocation
   963  			if node != nil {
   964  				if args.SecretID == "" {
   965  					return fmt.Errorf("missing node secret ID for client status update")
   966  				} else if args.SecretID != node.SecretID {
   967  					return fmt.Errorf("node secret ID does not match")
   968  				}
   969  
   970  				// We have a valid node connection, so add the mapping to cache the
   971  				// connection and allow the server to send RPCs to the client. We only cache
   972  				// the connection if it is not being forwarded from another server.
   973  				if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() {
   974  					n.ctx.NodeID = args.NodeID
   975  					n.srv.addNodeConn(n.ctx)
   976  				}
   977  
   978  				var err error
   979  				allocs, err = state.AllocsByNode(ws, args.NodeID)
   980  				if err != nil {
   981  					return err
   982  				}
   983  			}
   984  
   985  			reply.Allocs = make(map[string]uint64)
   986  			reply.MigrateTokens = make(map[string]string)
   987  
   988  			// preferTableIndex is used to determine whether we should build the
   989  			// response index based on the full table indexes versus the modify
   990  			// indexes of the allocations on the specific node. This is
   991  			// preferred in the case that the node doesn't yet have allocations
   992  			// or when we detect a GC that effects the node.
   993  			preferTableIndex := true
   994  
   995  			// Setup the output
   996  			if numAllocs := len(allocs); numAllocs != 0 {
   997  				preferTableIndex = false
   998  
   999  				for _, alloc := range allocs {
  1000  					reply.Allocs[alloc.ID] = alloc.AllocModifyIndex
  1001  
  1002  					// If the allocation is going to do a migration, create a
  1003  					// migration token so that the client can authenticate with
  1004  					// the node hosting the previous allocation.
  1005  					if alloc.ShouldMigrate() {
  1006  						prevAllocation, err := state.AllocByID(ws, alloc.PreviousAllocation)
  1007  						if err != nil {
  1008  							return err
  1009  						}
  1010  
  1011  						if prevAllocation != nil && prevAllocation.NodeID != alloc.NodeID {
  1012  							allocNode, err := state.NodeByID(ws, prevAllocation.NodeID)
  1013  							if err != nil {
  1014  								return err
  1015  							}
  1016  							if allocNode == nil {
  1017  								// Node must have been GC'd so skip the token
  1018  								continue
  1019  							}
  1020  
  1021  							token, err := structs.GenerateMigrateToken(prevAllocation.ID, allocNode.SecretID)
  1022  							if err != nil {
  1023  								return err
  1024  							}
  1025  							reply.MigrateTokens[alloc.ID] = token
  1026  						}
  1027  					}
  1028  
  1029  					reply.Index = maxUint64(reply.Index, alloc.ModifyIndex)
  1030  				}
  1031  
  1032  				// Determine if we have less allocations than before. This
  1033  				// indicates there was a garbage collection
  1034  				if numAllocs < numOldAllocs {
  1035  					preferTableIndex = true
  1036  				}
  1037  
  1038  				// Store the new number of allocations
  1039  				numOldAllocs = numAllocs
  1040  			}
  1041  
  1042  			if preferTableIndex {
  1043  				// Use the last index that affected the nodes table
  1044  				index, err := state.Index("allocs")
  1045  				if err != nil {
  1046  					return err
  1047  				}
  1048  
  1049  				// Must provide non-zero index to prevent blocking
  1050  				// Index 1 is impossible anyways (due to Raft internals)
  1051  				if index == 0 {
  1052  					reply.Index = 1
  1053  				} else {
  1054  					reply.Index = index
  1055  				}
  1056  			}
  1057  			return nil
  1058  		}}
  1059  	return n.srv.blockingRPC(&opts)
  1060  }
  1061  
  1062  // UpdateAlloc is used to update the client status of an allocation
  1063  func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.GenericResponse) error {
  1064  	if done, err := n.srv.forward("Node.UpdateAlloc", args, args, reply); done {
  1065  		return err
  1066  	}
  1067  	defer metrics.MeasureSince([]string{"nomad", "client", "update_alloc"}, time.Now())
  1068  
  1069  	// Ensure at least a single alloc
  1070  	if len(args.Alloc) == 0 {
  1071  		return fmt.Errorf("must update at least one allocation")
  1072  	}
  1073  
  1074  	// Ensure that evals aren't set from client RPCs
  1075  	// We create them here before the raft update
  1076  	if len(args.Evals) != 0 {
  1077  		return fmt.Errorf("evals field must not be set")
  1078  	}
  1079  
  1080  	// Update modified timestamp for client initiated allocation updates
  1081  	now := time.Now()
  1082  	var evals []*structs.Evaluation
  1083  
  1084  	// A set of de-duplicated volumes that need their volume claims released.
  1085  	// Later we'll apply this raft.
  1086  	volumesToGC := newCSIBatchRelease(n.srv, n.logger, 100)
  1087  
  1088  	for _, allocToUpdate := range args.Alloc {
  1089  		allocToUpdate.ModifyTime = now.UTC().UnixNano()
  1090  
  1091  		if !allocToUpdate.TerminalStatus() {
  1092  			continue
  1093  		}
  1094  
  1095  		alloc, _ := n.srv.State().AllocByID(nil, allocToUpdate.ID)
  1096  		if alloc == nil {
  1097  			continue
  1098  		}
  1099  
  1100  		// if the job has been purged, this will always return error
  1101  		job, err := n.srv.State().JobByID(nil, alloc.Namespace, alloc.JobID)
  1102  		if err != nil {
  1103  			n.logger.Debug("UpdateAlloc unable to find job", "job", alloc.JobID, "error", err)
  1104  			continue
  1105  		}
  1106  		if job == nil {
  1107  			n.logger.Debug("UpdateAlloc unable to find job", "job", alloc.JobID)
  1108  			continue
  1109  		}
  1110  
  1111  		taskGroup := job.LookupTaskGroup(alloc.TaskGroup)
  1112  		if taskGroup == nil {
  1113  			continue
  1114  		}
  1115  
  1116  		// If the terminal alloc has CSI volumes, add the volumes to the batch
  1117  		// of volumes we'll release the claims of.
  1118  		for _, vol := range taskGroup.Volumes {
  1119  			if vol.Type == structs.VolumeTypeCSI {
  1120  				volumesToGC.add(vol.Source, alloc.Namespace)
  1121  			}
  1122  		}
  1123  
  1124  		// Add an evaluation if this is a failed alloc that is eligible for rescheduling
  1125  		if allocToUpdate.ClientStatus == structs.AllocClientStatusFailed && alloc.FollowupEvalID == "" && alloc.RescheduleEligible(taskGroup.ReschedulePolicy, now) {
  1126  			eval := &structs.Evaluation{
  1127  				ID:          uuid.Generate(),
  1128  				Namespace:   alloc.Namespace,
  1129  				TriggeredBy: structs.EvalTriggerRetryFailedAlloc,
  1130  				JobID:       alloc.JobID,
  1131  				Type:        job.Type,
  1132  				Priority:    job.Priority,
  1133  				Status:      structs.EvalStatusPending,
  1134  				CreateTime:  now.UTC().UnixNano(),
  1135  				ModifyTime:  now.UTC().UnixNano(),
  1136  			}
  1137  			evals = append(evals, eval)
  1138  		}
  1139  	}
  1140  
  1141  	// Make a raft apply to release the CSI volume claims of terminal allocs.
  1142  	var result *multierror.Error
  1143  	err := volumesToGC.apply()
  1144  	if err != nil {
  1145  		result = multierror.Append(result, err)
  1146  	}
  1147  
  1148  	// Add this to the batch
  1149  	n.updatesLock.Lock()
  1150  	n.updates = append(n.updates, args.Alloc...)
  1151  	n.evals = append(n.evals, evals...)
  1152  
  1153  	// Start a new batch if none
  1154  	future := n.updateFuture
  1155  	if future == nil {
  1156  		future = structs.NewBatchFuture()
  1157  		n.updateFuture = future
  1158  		n.updateTimer = time.AfterFunc(batchUpdateInterval, func() {
  1159  			// Get the pending updates
  1160  			n.updatesLock.Lock()
  1161  			updates := n.updates
  1162  			evals := n.evals
  1163  			future := n.updateFuture
  1164  			n.updates = nil
  1165  			n.evals = nil
  1166  			n.updateFuture = nil
  1167  			n.updateTimer = nil
  1168  			n.updatesLock.Unlock()
  1169  
  1170  			// Perform the batch update
  1171  			n.batchUpdate(future, updates, evals)
  1172  		})
  1173  	}
  1174  	n.updatesLock.Unlock()
  1175  
  1176  	// Wait for the future
  1177  	if err := future.Wait(); err != nil {
  1178  		result = multierror.Append(result, err)
  1179  		return result.ErrorOrNil()
  1180  	}
  1181  
  1182  	// Setup the response
  1183  	reply.Index = future.Index()
  1184  	return result.ErrorOrNil()
  1185  }
  1186  
  1187  // batchUpdate is used to update all the allocations
  1188  func (n *Node) batchUpdate(future *structs.BatchFuture, updates []*structs.Allocation, evals []*structs.Evaluation) {
  1189  	// Group pending evals by jobID to prevent creating unnecessary evals
  1190  	evalsByJobId := make(map[structs.NamespacedID]struct{})
  1191  	var trimmedEvals []*structs.Evaluation
  1192  	for _, eval := range evals {
  1193  		namespacedID := structs.NamespacedID{
  1194  			ID:        eval.JobID,
  1195  			Namespace: eval.Namespace,
  1196  		}
  1197  		_, exists := evalsByJobId[namespacedID]
  1198  		if !exists {
  1199  			now := time.Now().UTC().UnixNano()
  1200  			eval.CreateTime = now
  1201  			eval.ModifyTime = now
  1202  			trimmedEvals = append(trimmedEvals, eval)
  1203  			evalsByJobId[namespacedID] = struct{}{}
  1204  		}
  1205  	}
  1206  
  1207  	if len(trimmedEvals) > 0 {
  1208  		n.logger.Debug("adding evaluations for rescheduling failed allocations", "num_evals", len(trimmedEvals))
  1209  	}
  1210  	// Prepare the batch update
  1211  	batch := &structs.AllocUpdateRequest{
  1212  		Alloc:        updates,
  1213  		Evals:        trimmedEvals,
  1214  		WriteRequest: structs.WriteRequest{Region: n.srv.config.Region},
  1215  	}
  1216  
  1217  	// Commit this update via Raft
  1218  	var mErr multierror.Error
  1219  	_, index, err := n.srv.raftApply(structs.AllocClientUpdateRequestType, batch)
  1220  	if err != nil {
  1221  		n.logger.Error("alloc update failed", "error", err)
  1222  		mErr.Errors = append(mErr.Errors, err)
  1223  	}
  1224  
  1225  	// For each allocation we are updating, check if we should revoke any
  1226  	// - Vault token accessors
  1227  	// - Service Identity token accessors
  1228  	var (
  1229  		revokeVault []*structs.VaultAccessor
  1230  		revokeSI    []*structs.SITokenAccessor
  1231  	)
  1232  
  1233  	for _, alloc := range updates {
  1234  		// Skip any allocation that isn't dead on the client
  1235  		if !alloc.Terminated() {
  1236  			continue
  1237  		}
  1238  
  1239  		ws := memdb.NewWatchSet()
  1240  
  1241  		// Determine if there are any orphaned Vault accessors for the allocation
  1242  		if accessors, err := n.srv.State().VaultAccessorsByAlloc(ws, alloc.ID); err != nil {
  1243  			n.logger.Error("looking up vault accessors for alloc failed", "alloc_id", alloc.ID, "error", err)
  1244  			mErr.Errors = append(mErr.Errors, err)
  1245  		} else {
  1246  			revokeVault = append(revokeVault, accessors...)
  1247  		}
  1248  
  1249  		// Determine if there are any orphaned SI accessors for the allocation
  1250  		if accessors, err := n.srv.State().SITokenAccessorsByAlloc(ws, alloc.ID); err != nil {
  1251  			n.logger.Error("looking up si accessors for alloc failed", "alloc_id", alloc.ID, "error", err)
  1252  			mErr.Errors = append(mErr.Errors, err)
  1253  		} else {
  1254  			revokeSI = append(revokeSI, accessors...)
  1255  		}
  1256  	}
  1257  
  1258  	// Revoke any orphaned Vault token accessors
  1259  	if l := len(revokeVault); l > 0 {
  1260  		n.logger.Debug("revoking vault accessors due to terminal allocations", "num_accessors", l)
  1261  		if err := n.srv.vault.RevokeTokens(context.Background(), revokeVault, true); err != nil {
  1262  			n.logger.Error("batched vault accessor revocation failed", "error", err)
  1263  			mErr.Errors = append(mErr.Errors, err)
  1264  		}
  1265  	}
  1266  
  1267  	// Revoke any orphaned SI token accessors
  1268  	if l := len(revokeSI); l > 0 {
  1269  		n.logger.Debug("revoking si accessors due to terminal allocations", "num_accessors", l)
  1270  		_ = n.srv.consulACLs.RevokeTokens(context.Background(), revokeSI, true)
  1271  	}
  1272  
  1273  	// Respond to the future
  1274  	future.Respond(index, mErr.ErrorOrNil())
  1275  }
  1276  
  1277  // List is used to list the available nodes
  1278  func (n *Node) List(args *structs.NodeListRequest,
  1279  	reply *structs.NodeListResponse) error {
  1280  	if done, err := n.srv.forward("Node.List", args, args, reply); done {
  1281  		return err
  1282  	}
  1283  	defer metrics.MeasureSince([]string{"nomad", "client", "list"}, time.Now())
  1284  
  1285  	// Check node read permissions
  1286  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
  1287  		return err
  1288  	} else if aclObj != nil && !aclObj.AllowNodeRead() {
  1289  		return structs.ErrPermissionDenied
  1290  	}
  1291  
  1292  	// Setup the blocking query
  1293  	opts := blockingOptions{
  1294  		queryOpts: &args.QueryOptions,
  1295  		queryMeta: &reply.QueryMeta,
  1296  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
  1297  			// Capture all the nodes
  1298  			var err error
  1299  			var iter memdb.ResultIterator
  1300  			if prefix := args.QueryOptions.Prefix; prefix != "" {
  1301  				iter, err = state.NodesByIDPrefix(ws, prefix)
  1302  			} else {
  1303  				iter, err = state.Nodes(ws)
  1304  			}
  1305  			if err != nil {
  1306  				return err
  1307  			}
  1308  
  1309  			var nodes []*structs.NodeListStub
  1310  			for {
  1311  				raw := iter.Next()
  1312  				if raw == nil {
  1313  					break
  1314  				}
  1315  				node := raw.(*structs.Node)
  1316  				nodes = append(nodes, node.Stub())
  1317  			}
  1318  			reply.Nodes = nodes
  1319  
  1320  			// Use the last index that affected the jobs table
  1321  			index, err := state.Index("nodes")
  1322  			if err != nil {
  1323  				return err
  1324  			}
  1325  			reply.Index = index
  1326  
  1327  			// Set the query response
  1328  			n.srv.setQueryMeta(&reply.QueryMeta)
  1329  			return nil
  1330  		}}
  1331  	return n.srv.blockingRPC(&opts)
  1332  }
  1333  
  1334  // createNodeEvals is used to create evaluations for each alloc on a node.
  1335  // Each Eval is scoped to a job, so we need to potentially trigger many evals.
  1336  func (n *Node) createNodeEvals(nodeID string, nodeIndex uint64) ([]string, uint64, error) {
  1337  	// Snapshot the state
  1338  	snap, err := n.srv.fsm.State().Snapshot()
  1339  	if err != nil {
  1340  		return nil, 0, fmt.Errorf("failed to snapshot state: %v", err)
  1341  	}
  1342  
  1343  	// Find all the allocations for this node
  1344  	ws := memdb.NewWatchSet()
  1345  	allocs, err := snap.AllocsByNode(ws, nodeID)
  1346  	if err != nil {
  1347  		return nil, 0, fmt.Errorf("failed to find allocs for '%s': %v", nodeID, err)
  1348  	}
  1349  
  1350  	sysJobsIter, err := snap.JobsByScheduler(ws, "system")
  1351  	if err != nil {
  1352  		return nil, 0, fmt.Errorf("failed to find system jobs for '%s': %v", nodeID, err)
  1353  	}
  1354  
  1355  	var sysJobs []*structs.Job
  1356  	for job := sysJobsIter.Next(); job != nil; job = sysJobsIter.Next() {
  1357  		sysJobs = append(sysJobs, job.(*structs.Job))
  1358  	}
  1359  
  1360  	// Fast-path if nothing to do
  1361  	if len(allocs) == 0 && len(sysJobs) == 0 {
  1362  		return nil, 0, nil
  1363  	}
  1364  
  1365  	// Create an eval for each JobID affected
  1366  	var evals []*structs.Evaluation
  1367  	var evalIDs []string
  1368  	jobIDs := make(map[string]struct{})
  1369  	now := time.Now().UTC().UnixNano()
  1370  
  1371  	for _, alloc := range allocs {
  1372  		// Deduplicate on JobID
  1373  		if _, ok := jobIDs[alloc.JobID]; ok {
  1374  			continue
  1375  		}
  1376  		jobIDs[alloc.JobID] = struct{}{}
  1377  
  1378  		// Create a new eval
  1379  		eval := &structs.Evaluation{
  1380  			ID:              uuid.Generate(),
  1381  			Namespace:       alloc.Namespace,
  1382  			Priority:        alloc.Job.Priority,
  1383  			Type:            alloc.Job.Type,
  1384  			TriggeredBy:     structs.EvalTriggerNodeUpdate,
  1385  			JobID:           alloc.JobID,
  1386  			NodeID:          nodeID,
  1387  			NodeModifyIndex: nodeIndex,
  1388  			Status:          structs.EvalStatusPending,
  1389  			CreateTime:      now,
  1390  			ModifyTime:      now,
  1391  		}
  1392  		evals = append(evals, eval)
  1393  		evalIDs = append(evalIDs, eval.ID)
  1394  	}
  1395  
  1396  	// Create an evaluation for each system job.
  1397  	for _, job := range sysJobs {
  1398  		// Still dedup on JobID as the node may already have the system job.
  1399  		if _, ok := jobIDs[job.ID]; ok {
  1400  			continue
  1401  		}
  1402  		jobIDs[job.ID] = struct{}{}
  1403  
  1404  		// Create a new eval
  1405  		eval := &structs.Evaluation{
  1406  			ID:              uuid.Generate(),
  1407  			Namespace:       job.Namespace,
  1408  			Priority:        job.Priority,
  1409  			Type:            job.Type,
  1410  			TriggeredBy:     structs.EvalTriggerNodeUpdate,
  1411  			JobID:           job.ID,
  1412  			NodeID:          nodeID,
  1413  			NodeModifyIndex: nodeIndex,
  1414  			Status:          structs.EvalStatusPending,
  1415  			CreateTime:      now,
  1416  			ModifyTime:      now,
  1417  		}
  1418  		evals = append(evals, eval)
  1419  		evalIDs = append(evalIDs, eval.ID)
  1420  	}
  1421  
  1422  	// Create the Raft transaction
  1423  	update := &structs.EvalUpdateRequest{
  1424  		Evals:        evals,
  1425  		WriteRequest: structs.WriteRequest{Region: n.srv.config.Region},
  1426  	}
  1427  
  1428  	// Commit this evaluation via Raft
  1429  	// XXX: There is a risk of partial failure where the node update succeeds
  1430  	// but that the EvalUpdate does not.
  1431  	_, evalIndex, err := n.srv.raftApply(structs.EvalUpdateRequestType, update)
  1432  	if err != nil {
  1433  		return nil, 0, err
  1434  	}
  1435  	return evalIDs, evalIndex, nil
  1436  }
  1437  
  1438  // DeriveVaultToken is used by the clients to request wrapped Vault tokens for
  1439  // tasks
  1440  func (n *Node) DeriveVaultToken(args *structs.DeriveVaultTokenRequest, reply *structs.DeriveVaultTokenResponse) error {
  1441  	setError := func(e error, recoverable bool) {
  1442  		if e != nil {
  1443  			if re, ok := e.(*structs.RecoverableError); ok {
  1444  				reply.Error = re // No need to wrap if error is already a RecoverableError
  1445  			} else {
  1446  				reply.Error = structs.NewRecoverableError(e, recoverable).(*structs.RecoverableError)
  1447  			}
  1448  			n.logger.Error("DeriveVaultToken failed", "recoverable", recoverable, "error", e)
  1449  		}
  1450  	}
  1451  
  1452  	if done, err := n.srv.forward("Node.DeriveVaultToken", args, args, reply); done {
  1453  		setError(err, structs.IsRecoverable(err) || err == structs.ErrNoLeader)
  1454  		return nil
  1455  	}
  1456  	defer metrics.MeasureSince([]string{"nomad", "client", "derive_vault_token"}, time.Now())
  1457  
  1458  	// Verify the arguments
  1459  	if args.NodeID == "" {
  1460  		setError(fmt.Errorf("missing node ID"), false)
  1461  		return nil
  1462  	}
  1463  	if args.SecretID == "" {
  1464  		setError(fmt.Errorf("missing node SecretID"), false)
  1465  		return nil
  1466  	}
  1467  	if args.AllocID == "" {
  1468  		setError(fmt.Errorf("missing allocation ID"), false)
  1469  		return nil
  1470  	}
  1471  	if len(args.Tasks) == 0 {
  1472  		setError(fmt.Errorf("no tasks specified"), false)
  1473  		return nil
  1474  	}
  1475  
  1476  	// Verify the following:
  1477  	// * The Node exists and has the correct SecretID
  1478  	// * The Allocation exists on the specified Node
  1479  	// * The Allocation contains the given tasks and they each require Vault
  1480  	//   tokens
  1481  	snap, err := n.srv.fsm.State().Snapshot()
  1482  	if err != nil {
  1483  		setError(err, false)
  1484  		return nil
  1485  	}
  1486  	ws := memdb.NewWatchSet()
  1487  	node, err := snap.NodeByID(ws, args.NodeID)
  1488  	if err != nil {
  1489  		setError(err, false)
  1490  		return nil
  1491  	}
  1492  	if node == nil {
  1493  		setError(fmt.Errorf("Node %q does not exist", args.NodeID), false)
  1494  		return nil
  1495  	}
  1496  	if node.SecretID != args.SecretID {
  1497  		setError(fmt.Errorf("SecretID mismatch"), false)
  1498  		return nil
  1499  	}
  1500  
  1501  	alloc, err := snap.AllocByID(ws, args.AllocID)
  1502  	if err != nil {
  1503  		setError(err, false)
  1504  		return nil
  1505  	}
  1506  	if alloc == nil {
  1507  		setError(fmt.Errorf("Allocation %q does not exist", args.AllocID), false)
  1508  		return nil
  1509  	}
  1510  	if alloc.NodeID != args.NodeID {
  1511  		setError(fmt.Errorf("Allocation %q not running on Node %q", args.AllocID, args.NodeID), false)
  1512  		return nil
  1513  	}
  1514  	if alloc.TerminalStatus() {
  1515  		setError(fmt.Errorf("Can't request Vault token for terminal allocation"), false)
  1516  		return nil
  1517  	}
  1518  
  1519  	// Check the policies
  1520  	policies := alloc.Job.VaultPolicies()
  1521  	if policies == nil {
  1522  		setError(fmt.Errorf("Job doesn't require Vault policies"), false)
  1523  		return nil
  1524  	}
  1525  	tg, ok := policies[alloc.TaskGroup]
  1526  	if !ok {
  1527  		setError(fmt.Errorf("Task group does not require Vault policies"), false)
  1528  		return nil
  1529  	}
  1530  
  1531  	var unneeded []string
  1532  	for _, task := range args.Tasks {
  1533  		taskVault := tg[task]
  1534  		if taskVault == nil || len(taskVault.Policies) == 0 {
  1535  			unneeded = append(unneeded, task)
  1536  		}
  1537  	}
  1538  
  1539  	if len(unneeded) != 0 {
  1540  		e := fmt.Errorf("Requested Vault tokens for tasks without defined Vault policies: %s",
  1541  			strings.Join(unneeded, ", "))
  1542  		setError(e, false)
  1543  		return nil
  1544  	}
  1545  
  1546  	// At this point the request is valid and we should contact Vault for
  1547  	// tokens.
  1548  
  1549  	// Create an error group where we will spin up a fixed set of goroutines to
  1550  	// handle deriving tokens but where if any fails the whole group is
  1551  	// canceled.
  1552  	g, ctx := errgroup.WithContext(context.Background())
  1553  
  1554  	// Cap the handlers
  1555  	handlers := len(args.Tasks)
  1556  	if handlers > maxParallelRequestsPerDerive {
  1557  		handlers = maxParallelRequestsPerDerive
  1558  	}
  1559  
  1560  	// Create the Vault Tokens
  1561  	input := make(chan string, handlers)
  1562  	results := make(map[string]*vapi.Secret, len(args.Tasks))
  1563  	for i := 0; i < handlers; i++ {
  1564  		g.Go(func() error {
  1565  			for {
  1566  				select {
  1567  				case task, ok := <-input:
  1568  					if !ok {
  1569  						return nil
  1570  					}
  1571  
  1572  					secret, err := n.srv.vault.CreateToken(ctx, alloc, task)
  1573  					if err != nil {
  1574  						return err
  1575  					}
  1576  
  1577  					results[task] = secret
  1578  				case <-ctx.Done():
  1579  					return nil
  1580  				}
  1581  			}
  1582  		})
  1583  	}
  1584  
  1585  	// Send the input
  1586  	go func() {
  1587  		defer close(input)
  1588  		for _, task := range args.Tasks {
  1589  			select {
  1590  			case <-ctx.Done():
  1591  				return
  1592  			case input <- task:
  1593  			}
  1594  		}
  1595  	}()
  1596  
  1597  	// Wait for everything to complete or for an error
  1598  	createErr := g.Wait()
  1599  
  1600  	// Retrieve the results
  1601  	accessors := make([]*structs.VaultAccessor, 0, len(results))
  1602  	tokens := make(map[string]string, len(results))
  1603  	for task, secret := range results {
  1604  		w := secret.WrapInfo
  1605  		tokens[task] = w.Token
  1606  		accessor := &structs.VaultAccessor{
  1607  			Accessor:    w.WrappedAccessor,
  1608  			Task:        task,
  1609  			NodeID:      alloc.NodeID,
  1610  			AllocID:     alloc.ID,
  1611  			CreationTTL: w.TTL,
  1612  		}
  1613  
  1614  		accessors = append(accessors, accessor)
  1615  	}
  1616  
  1617  	// If there was an error revoke the created tokens
  1618  	if createErr != nil {
  1619  		n.logger.Error("Vault token creation for alloc failed", "alloc_id", alloc.ID, "error", createErr)
  1620  
  1621  		if revokeErr := n.srv.vault.RevokeTokens(context.Background(), accessors, false); revokeErr != nil {
  1622  			n.logger.Error("Vault token revocation for alloc failed", "alloc_id", alloc.ID, "error", revokeErr)
  1623  		}
  1624  
  1625  		if rerr, ok := createErr.(*structs.RecoverableError); ok {
  1626  			reply.Error = rerr
  1627  		} else {
  1628  			reply.Error = structs.NewRecoverableError(createErr, false).(*structs.RecoverableError)
  1629  		}
  1630  
  1631  		return nil
  1632  	}
  1633  
  1634  	// Commit to Raft before returning any of the tokens
  1635  	req := structs.VaultAccessorsRequest{Accessors: accessors}
  1636  	_, index, err := n.srv.raftApply(structs.VaultAccessorRegisterRequestType, &req)
  1637  	if err != nil {
  1638  		n.logger.Error("registering Vault accessors for alloc failed", "alloc_id", alloc.ID, "error", err)
  1639  
  1640  		// Determine if we can recover from the error
  1641  		retry := false
  1642  		switch err {
  1643  		case raft.ErrNotLeader, raft.ErrLeadershipLost, raft.ErrRaftShutdown, raft.ErrEnqueueTimeout:
  1644  			retry = true
  1645  		}
  1646  
  1647  		setError(err, retry)
  1648  		return nil
  1649  	}
  1650  
  1651  	reply.Index = index
  1652  	reply.Tasks = tokens
  1653  	n.srv.setQueryMeta(&reply.QueryMeta)
  1654  	return nil
  1655  }
  1656  
  1657  type connectTask struct {
  1658  	TaskKind structs.TaskKind
  1659  	TaskName string
  1660  }
  1661  
  1662  func (n *Node) DeriveSIToken(args *structs.DeriveSITokenRequest, reply *structs.DeriveSITokenResponse) error {
  1663  	setError := func(e error, recoverable bool) {
  1664  		if e != nil {
  1665  			if re, ok := e.(*structs.RecoverableError); ok {
  1666  				reply.Error = re // No need to wrap if error is already a RecoverableError
  1667  			} else {
  1668  				reply.Error = structs.NewRecoverableError(e, recoverable).(*structs.RecoverableError)
  1669  			}
  1670  			n.logger.Error("DeriveSIToken failed", "recoverable", recoverable, "error", e)
  1671  		}
  1672  	}
  1673  
  1674  	if done, err := n.srv.forward("Node.DeriveSIToken", args, args, reply); done {
  1675  		setError(err, structs.IsRecoverable(err) || err == structs.ErrNoLeader)
  1676  		return nil
  1677  	}
  1678  	defer metrics.MeasureSince([]string{"nomad", "client", "derive_si_token"}, time.Now())
  1679  
  1680  	// Verify the arguments
  1681  	if err := args.Validate(); err != nil {
  1682  		setError(err, false)
  1683  		return nil
  1684  	}
  1685  
  1686  	// Get the ClusterID
  1687  	clusterID, err := n.srv.ClusterID()
  1688  	if err != nil {
  1689  		setError(err, false)
  1690  		return nil
  1691  	}
  1692  
  1693  	// Verify the following:
  1694  	// * The Node exists and has the correct SecretID.
  1695  	// * The Allocation exists on the specified Node.
  1696  	// * The Allocation contains the given tasks, and each task requires a
  1697  	//   SI token.
  1698  
  1699  	snap, err := n.srv.fsm.State().Snapshot()
  1700  	if err != nil {
  1701  		setError(err, false)
  1702  		return nil
  1703  	}
  1704  	node, err := snap.NodeByID(nil, args.NodeID)
  1705  	if err != nil {
  1706  		setError(err, false)
  1707  		return nil
  1708  	}
  1709  	if node == nil {
  1710  		setError(errors.Errorf("Node %q does not exist", args.NodeID), false)
  1711  		return nil
  1712  	}
  1713  	if node.SecretID != args.SecretID {
  1714  		setError(errors.Errorf("SecretID mismatch"), false)
  1715  		return nil
  1716  	}
  1717  
  1718  	alloc, err := snap.AllocByID(nil, args.AllocID)
  1719  	if err != nil {
  1720  		setError(err, false)
  1721  		return nil
  1722  	}
  1723  	if alloc == nil {
  1724  		setError(errors.Errorf("Allocation %q does not exist", args.AllocID), false)
  1725  		return nil
  1726  	}
  1727  	if alloc.NodeID != args.NodeID {
  1728  		setError(errors.Errorf("Allocation %q not running on node %q", args.AllocID, args.NodeID), false)
  1729  		return nil
  1730  	}
  1731  	if alloc.TerminalStatus() {
  1732  		setError(errors.Errorf("Cannot request SI token for terminal allocation"), false)
  1733  		return nil
  1734  	}
  1735  
  1736  	// make sure task group contains at least one connect enabled service
  1737  	tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
  1738  	if tg == nil {
  1739  		setError(errors.Errorf("Allocation %q does not contain TaskGroup %q", args.AllocID, alloc.TaskGroup), false)
  1740  		return nil
  1741  	}
  1742  	if !tg.UsesConnect() {
  1743  		setError(errors.Errorf("TaskGroup %q does not use Connect", tg.Name), false)
  1744  		return nil
  1745  	}
  1746  
  1747  	// make sure each task in args.Tasks is a connect-enabled task
  1748  	notConnect, tasks := connectTasks(tg, args.Tasks)
  1749  	if len(notConnect) > 0 {
  1750  		setError(fmt.Errorf(
  1751  			"Requested Consul Service Identity tokens for tasks that are not Connect enabled: %v",
  1752  			strings.Join(notConnect, ", "),
  1753  		), false)
  1754  	}
  1755  
  1756  	// At this point the request is valid and we should contact Consul for tokens.
  1757  
  1758  	// A lot of the following is copied from DeriveVaultToken which has been
  1759  	// working fine for years.
  1760  
  1761  	// Create an error group where we will spin up a fixed set of goroutines to
  1762  	// handle deriving tokens but where if any fails the whole group is
  1763  	// canceled.
  1764  	g, ctx := errgroup.WithContext(context.Background())
  1765  
  1766  	// Cap the worker threads
  1767  	numWorkers := len(args.Tasks)
  1768  	if numWorkers > maxParallelRequestsPerDerive {
  1769  		numWorkers = maxParallelRequestsPerDerive
  1770  	}
  1771  
  1772  	// would like to pull some of this out...
  1773  
  1774  	// Create the SI tokens from a slice of task name + connect service
  1775  	input := make(chan connectTask, numWorkers)
  1776  	results := make(map[string]*structs.SIToken, numWorkers)
  1777  	for i := 0; i < numWorkers; i++ {
  1778  		g.Go(func() error {
  1779  			for {
  1780  				select {
  1781  				case task, ok := <-input:
  1782  					if !ok {
  1783  						return nil
  1784  					}
  1785  					secret, err := n.srv.consulACLs.CreateToken(ctx, ServiceIdentityRequest{
  1786  						TaskKind:  task.TaskKind,
  1787  						TaskName:  task.TaskName,
  1788  						ClusterID: clusterID,
  1789  						AllocID:   alloc.ID,
  1790  					})
  1791  					if err != nil {
  1792  						return err
  1793  					}
  1794  					results[task.TaskName] = secret
  1795  				case <-ctx.Done():
  1796  					return nil
  1797  				}
  1798  			}
  1799  		})
  1800  	}
  1801  
  1802  	// Send the input
  1803  	go func() {
  1804  		defer close(input)
  1805  		for _, connectTask := range tasks {
  1806  			select {
  1807  			case <-ctx.Done():
  1808  				return
  1809  			case input <- connectTask:
  1810  			}
  1811  		}
  1812  	}()
  1813  
  1814  	// Wait for everything to complete or for an error
  1815  	createErr := g.Wait()
  1816  
  1817  	accessors := make([]*structs.SITokenAccessor, 0, len(results))
  1818  	tokens := make(map[string]string, len(results))
  1819  	for task, secret := range results {
  1820  		tokens[task] = secret.SecretID
  1821  		accessor := &structs.SITokenAccessor{
  1822  			NodeID:     alloc.NodeID,
  1823  			AllocID:    alloc.ID,
  1824  			TaskName:   task,
  1825  			AccessorID: secret.AccessorID,
  1826  		}
  1827  		accessors = append(accessors, accessor)
  1828  	}
  1829  
  1830  	// If there was an error, revoke all created tokens. These tokens have not
  1831  	// yet been committed to the persistent store.
  1832  	if createErr != nil {
  1833  		n.logger.Error("Consul Service Identity token creation for alloc failed", "alloc_id", alloc.ID, "error", createErr)
  1834  		_ = n.srv.consulACLs.RevokeTokens(context.Background(), accessors, false)
  1835  
  1836  		if recoverable, ok := createErr.(*structs.RecoverableError); ok {
  1837  			reply.Error = recoverable
  1838  		} else {
  1839  			reply.Error = structs.NewRecoverableError(createErr, false).(*structs.RecoverableError)
  1840  		}
  1841  
  1842  		return nil
  1843  	}
  1844  
  1845  	// Commit the derived tokens to raft before returning them
  1846  	requested := structs.SITokenAccessorsRequest{Accessors: accessors}
  1847  	_, index, err := n.srv.raftApply(structs.ServiceIdentityAccessorRegisterRequestType, &requested)
  1848  	if err != nil {
  1849  		n.logger.Error("registering Service Identity token accessors for alloc failed", "alloc_id", alloc.ID, "error", err)
  1850  
  1851  		// Determine if we can recover from the error
  1852  		retry := false
  1853  		switch err {
  1854  		case raft.ErrNotLeader, raft.ErrLeadershipLost, raft.ErrRaftShutdown, raft.ErrEnqueueTimeout:
  1855  			retry = true
  1856  		}
  1857  		setError(err, retry)
  1858  		return nil
  1859  	}
  1860  
  1861  	// We made it! Now we can set the reply.
  1862  	reply.Index = index
  1863  	reply.Tokens = tokens
  1864  	n.srv.setQueryMeta(&reply.QueryMeta)
  1865  	return nil
  1866  }
  1867  
  1868  func connectTasks(tg *structs.TaskGroup, tasks []string) ([]string, []connectTask) {
  1869  	var notConnect []string
  1870  	var usesConnect []connectTask
  1871  	for _, task := range tasks {
  1872  		tgTask := tg.LookupTask(task)
  1873  		if !taskUsesConnect(tgTask) {
  1874  			notConnect = append(notConnect, task)
  1875  		} else {
  1876  			usesConnect = append(usesConnect, connectTask{
  1877  				TaskName: task,
  1878  				TaskKind: tgTask.Kind,
  1879  			})
  1880  		}
  1881  	}
  1882  	return notConnect, usesConnect
  1883  }
  1884  
  1885  func taskUsesConnect(task *structs.Task) bool {
  1886  	if task == nil {
  1887  		// not even in the task group
  1888  		return false
  1889  	}
  1890  
  1891  	return task.Kind.IsConnectProxy() || task.Kind.IsConnectNative()
  1892  }
  1893  
  1894  func (n *Node) EmitEvents(args *structs.EmitNodeEventsRequest, reply *structs.EmitNodeEventsResponse) error {
  1895  	if done, err := n.srv.forward("Node.EmitEvents", args, args, reply); done {
  1896  		return err
  1897  	}
  1898  	defer metrics.MeasureSince([]string{"nomad", "client", "emit_events"}, time.Now())
  1899  
  1900  	if len(args.NodeEvents) == 0 {
  1901  		return fmt.Errorf("no node events given")
  1902  	}
  1903  	for nodeID, events := range args.NodeEvents {
  1904  		if len(events) == 0 {
  1905  			return fmt.Errorf("no node events given for node %q", nodeID)
  1906  		}
  1907  	}
  1908  
  1909  	_, index, err := n.srv.raftApply(structs.UpsertNodeEventsType, args)
  1910  	if err != nil {
  1911  		n.logger.Error("upserting node events failed", "error", err)
  1912  		return err
  1913  	}
  1914  
  1915  	reply.Index = index
  1916  	return nil
  1917  }