github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/nomad/node_endpoint.go (about)

     1  package nomad
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"net/http"
     8  	"reflect"
     9  	"strings"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/armon/go-metrics"
    14  	"github.com/hashicorp/go-hclog"
    15  	"github.com/hashicorp/go-memdb"
    16  	"github.com/hashicorp/go-multierror"
    17  	vapi "github.com/hashicorp/vault/api"
    18  	"golang.org/x/sync/errgroup"
    19  
    20  	"github.com/hashicorp/nomad/acl"
    21  	"github.com/hashicorp/nomad/helper/uuid"
    22  	"github.com/hashicorp/nomad/nomad/state"
    23  	"github.com/hashicorp/nomad/nomad/state/paginator"
    24  	"github.com/hashicorp/nomad/nomad/structs"
    25  	"github.com/hashicorp/raft"
    26  )
    27  
    28  const (
    29  	// batchUpdateInterval is how long we wait to batch updates
    30  	batchUpdateInterval = 50 * time.Millisecond
    31  
    32  	// maxParallelRequestsPerDerive  is the maximum number of parallel Vault
    33  	// create token requests that may be outstanding per derive request
    34  	maxParallelRequestsPerDerive = 16
    35  
    36  	// NodeDrainEvents are the various drain messages
    37  	NodeDrainEventDrainSet      = "Node drain strategy set"
    38  	NodeDrainEventDrainDisabled = "Node drain disabled"
    39  	NodeDrainEventDrainUpdated  = "Node drain strategy updated"
    40  
    41  	// NodeEligibilityEventEligible is used when the nodes eligiblity is marked
    42  	// eligible
    43  	NodeEligibilityEventEligible = "Node marked as eligible for scheduling"
    44  
    45  	// NodeEligibilityEventIneligible is used when the nodes eligiblity is marked
    46  	// ineligible
    47  	NodeEligibilityEventIneligible = "Node marked as ineligible for scheduling"
    48  
    49  	// NodeHeartbeatEventReregistered is the message used when the node becomes
    50  	// reregistered by the heartbeat.
    51  	NodeHeartbeatEventReregistered = "Node reregistered by heartbeat"
    52  )
    53  
    54  // Node endpoint is used for client interactions
    55  type Node struct {
    56  	srv    *Server
    57  	logger hclog.Logger
    58  
    59  	// ctx provides context regarding the underlying connection
    60  	ctx *RPCContext
    61  
    62  	// updates holds pending client status updates for allocations
    63  	updates []*structs.Allocation
    64  
    65  	// evals holds pending rescheduling eval updates triggered by failed allocations
    66  	evals []*structs.Evaluation
    67  
    68  	// updateFuture is used to wait for the pending batch update
    69  	// to complete. This may be nil if no batch is pending.
    70  	updateFuture *structs.BatchFuture
    71  
    72  	// updateTimer is the timer that will trigger the next batch
    73  	// update, and may be nil if there is no batch pending.
    74  	updateTimer *time.Timer
    75  
    76  	// updatesLock synchronizes access to the updates list,
    77  	// the future and the timer.
    78  	updatesLock sync.Mutex
    79  }
    80  
    81  func NewNodeEndpoint(srv *Server, ctx *RPCContext) *Node {
    82  	return &Node{
    83  		srv:     srv,
    84  		ctx:     ctx,
    85  		logger:  srv.logger.Named("client"),
    86  		updates: []*structs.Allocation{},
    87  		evals:   []*structs.Evaluation{},
    88  	}
    89  }
    90  
    91  // Register is used to upsert a client that is available for scheduling
    92  func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUpdateResponse) error {
    93  	isForwarded := args.IsForwarded()
    94  	if done, err := n.srv.forward("Node.Register", args, args, reply); done {
    95  		// We have a valid node connection since there is no error from the
    96  		// forwarded server, so add the mapping to cache the
    97  		// connection and allow the server to send RPCs to the client.
    98  		if err == nil && n.ctx != nil && n.ctx.NodeID == "" && !isForwarded {
    99  			n.ctx.NodeID = args.Node.ID
   100  			n.srv.addNodeConn(n.ctx)
   101  		}
   102  
   103  		return err
   104  	}
   105  	defer metrics.MeasureSince([]string{"nomad", "client", "register"}, time.Now())
   106  
   107  	// Validate the arguments
   108  	if args.Node == nil {
   109  		return fmt.Errorf("missing node for client registration")
   110  	}
   111  	if args.Node.ID == "" {
   112  		return fmt.Errorf("missing node ID for client registration")
   113  	}
   114  	if args.Node.Datacenter == "" {
   115  		return fmt.Errorf("missing datacenter for client registration")
   116  	}
   117  	if args.Node.Name == "" {
   118  		return fmt.Errorf("missing node name for client registration")
   119  	}
   120  	if len(args.Node.Attributes) == 0 {
   121  		return fmt.Errorf("missing attributes for client registration")
   122  	}
   123  	if args.Node.SecretID == "" {
   124  		return fmt.Errorf("missing node secret ID for client registration")
   125  	}
   126  
   127  	// Default the status if none is given
   128  	if args.Node.Status == "" {
   129  		args.Node.Status = structs.NodeStatusInit
   130  	}
   131  	if !structs.ValidNodeStatus(args.Node.Status) {
   132  		return fmt.Errorf("invalid status for node")
   133  	}
   134  
   135  	// Default to eligible for scheduling if unset
   136  	if args.Node.SchedulingEligibility == "" {
   137  		args.Node.SchedulingEligibility = structs.NodeSchedulingEligible
   138  	}
   139  
   140  	// Set the timestamp when the node is registered
   141  	args.Node.StatusUpdatedAt = time.Now().Unix()
   142  
   143  	// Compute the node class
   144  	if err := args.Node.ComputeClass(); err != nil {
   145  		return fmt.Errorf("failed to computed node class: %v", err)
   146  	}
   147  
   148  	// Look for the node so we can detect a state transition
   149  	snap, err := n.srv.fsm.State().Snapshot()
   150  	if err != nil {
   151  		return err
   152  	}
   153  
   154  	ws := memdb.NewWatchSet()
   155  	originalNode, err := snap.NodeByID(ws, args.Node.ID)
   156  	if err != nil {
   157  		return err
   158  	}
   159  
   160  	// Check if the SecretID has been tampered with
   161  	if originalNode != nil {
   162  		if args.Node.SecretID != originalNode.SecretID && originalNode.SecretID != "" {
   163  			return fmt.Errorf("node secret ID does not match. Not registering node.")
   164  		}
   165  	}
   166  
   167  	// We have a valid node connection, so add the mapping to cache the
   168  	// connection and allow the server to send RPCs to the client. We only cache
   169  	// the connection if it is not being forwarded from another server.
   170  	if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() {
   171  		n.ctx.NodeID = args.Node.ID
   172  		n.srv.addNodeConn(n.ctx)
   173  	}
   174  
   175  	// Commit this update via Raft
   176  	_, index, err := n.srv.raftApply(structs.NodeRegisterRequestType, args)
   177  	if err != nil {
   178  		n.logger.Error("register failed", "error", err)
   179  		return err
   180  	}
   181  	reply.NodeModifyIndex = index
   182  
   183  	// Check if we should trigger evaluations
   184  	if shouldCreateNodeEval(originalNode, args.Node) {
   185  		evalIDs, evalIndex, err := n.createNodeEvals(args.Node, index)
   186  		if err != nil {
   187  			n.logger.Error("eval creation failed", "error", err)
   188  			return err
   189  		}
   190  		reply.EvalIDs = evalIDs
   191  		reply.EvalCreateIndex = evalIndex
   192  	}
   193  
   194  	// Check if we need to setup a heartbeat
   195  	if !args.Node.TerminalStatus() {
   196  		ttl, err := n.srv.resetHeartbeatTimer(args.Node.ID)
   197  		if err != nil {
   198  			n.logger.Error("heartbeat reset failed", "error", err)
   199  			return err
   200  		}
   201  		reply.HeartbeatTTL = ttl
   202  	}
   203  
   204  	// Set the reply index
   205  	reply.Index = index
   206  	snap, err = n.srv.fsm.State().Snapshot()
   207  	if err != nil {
   208  		return err
   209  	}
   210  
   211  	n.srv.peerLock.RLock()
   212  	defer n.srv.peerLock.RUnlock()
   213  	if err := n.constructNodeServerInfoResponse(args.Node.ID, snap, reply); err != nil {
   214  		n.logger.Error("failed to populate NodeUpdateResponse", "error", err)
   215  		return err
   216  	}
   217  
   218  	return nil
   219  }
   220  
   221  // shouldCreateNodeEval returns true if the node update may result into
   222  // allocation updates, so the node should be re-evaluating.
   223  //
   224  // Such cases might be:
   225  // * node health/drain status changes that may result into alloc rescheduling
   226  // * node drivers or attributes changing that may cause system job placement changes
   227  func shouldCreateNodeEval(original, updated *structs.Node) bool {
   228  	if structs.ShouldDrainNode(updated.Status) {
   229  		return true
   230  	}
   231  
   232  	if original == nil {
   233  		return nodeStatusTransitionRequiresEval(updated.Status, structs.NodeStatusInit)
   234  	}
   235  
   236  	if nodeStatusTransitionRequiresEval(updated.Status, original.Status) {
   237  		return true
   238  	}
   239  
   240  	// check fields used by the feasibility checks in ../scheduler/feasible.go,
   241  	// whether through a Constraint explicitly added by user or an implicit constraint
   242  	// added through a driver/volume check.
   243  	//
   244  	// Node Resources (e.g. CPU/Memory) are handled differently, using blocked evals,
   245  	// and not relevant in this check.
   246  	return !(original.ID == updated.ID &&
   247  		original.Datacenter == updated.Datacenter &&
   248  		original.Name == updated.Name &&
   249  		original.NodeClass == updated.NodeClass &&
   250  		reflect.DeepEqual(original.Attributes, updated.Attributes) &&
   251  		reflect.DeepEqual(original.Meta, updated.Meta) &&
   252  		reflect.DeepEqual(original.Drivers, updated.Drivers) &&
   253  		reflect.DeepEqual(original.HostVolumes, updated.HostVolumes) &&
   254  		equalDevices(original, updated))
   255  }
   256  
   257  func equalDevices(n1, n2 *structs.Node) bool {
   258  	// ignore super old nodes, mostly to avoid nil dereferencing
   259  	if n1.NodeResources == nil || n2.NodeResources == nil {
   260  		return n1.NodeResources == n2.NodeResources
   261  	}
   262  
   263  	// treat nil and empty value as equal
   264  	if len(n1.NodeResources.Devices) == 0 {
   265  		return len(n1.NodeResources.Devices) == len(n2.NodeResources.Devices)
   266  	}
   267  
   268  	return reflect.DeepEqual(n1.NodeResources.Devices, n2.NodeResources.Devices)
   269  }
   270  
   271  // updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading.
   272  func (n *Node) constructNodeServerInfoResponse(nodeID string, snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error {
   273  	reply.LeaderRPCAddr = string(n.srv.raft.Leader())
   274  
   275  	// Reply with config information required for future RPC requests
   276  	reply.Servers = make([]*structs.NodeServerInfo, 0, len(n.srv.localPeers))
   277  	for _, v := range n.srv.localPeers {
   278  		reply.Servers = append(reply.Servers,
   279  			&structs.NodeServerInfo{
   280  				RPCAdvertiseAddr: v.RPCAddr.String(),
   281  				Datacenter:       v.Datacenter,
   282  			})
   283  	}
   284  
   285  	// Add ClientStatus information to heartbeat response.
   286  	node, _ := snap.NodeByID(nil, nodeID)
   287  	reply.SchedulingEligibility = node.SchedulingEligibility
   288  
   289  	// TODO(sean@): Use an indexed node count instead
   290  	//
   291  	// Snapshot is used only to iterate over all nodes to create a node
   292  	// count to send back to Nomad Clients in their heartbeat so Clients
   293  	// can estimate the size of the cluster.
   294  	ws := memdb.NewWatchSet()
   295  	iter, err := snap.Nodes(ws)
   296  	if err == nil {
   297  		for {
   298  			raw := iter.Next()
   299  			if raw == nil {
   300  				break
   301  			}
   302  			reply.NumNodes++
   303  		}
   304  	}
   305  
   306  	reply.Features = n.srv.EnterpriseState.Features()
   307  
   308  	return nil
   309  }
   310  
   311  // Deregister is used to remove a client from the cluster. If a client should
   312  // just be made unavailable for scheduling, a status update is preferred.
   313  func (n *Node) Deregister(args *structs.NodeDeregisterRequest, reply *structs.NodeUpdateResponse) error {
   314  	if done, err := n.srv.forward("Node.Deregister", args, args, reply); done {
   315  		return err
   316  	}
   317  	defer metrics.MeasureSince([]string{"nomad", "client", "deregister"}, time.Now())
   318  
   319  	if args.NodeID == "" {
   320  		return fmt.Errorf("missing node ID for client deregistration")
   321  	}
   322  
   323  	// deregister takes a batch
   324  	repack := &structs.NodeBatchDeregisterRequest{
   325  		NodeIDs:      []string{args.NodeID},
   326  		WriteRequest: args.WriteRequest,
   327  	}
   328  
   329  	return n.deregister(repack, reply, func() (interface{}, uint64, error) {
   330  		return n.srv.raftApply(structs.NodeDeregisterRequestType, args)
   331  	})
   332  }
   333  
   334  // BatchDeregister is used to remove client nodes from the cluster.
   335  func (n *Node) BatchDeregister(args *structs.NodeBatchDeregisterRequest, reply *structs.NodeUpdateResponse) error {
   336  	if done, err := n.srv.forward("Node.BatchDeregister", args, args, reply); done {
   337  		return err
   338  	}
   339  	defer metrics.MeasureSince([]string{"nomad", "client", "batch_deregister"}, time.Now())
   340  
   341  	if len(args.NodeIDs) == 0 {
   342  		return fmt.Errorf("missing node IDs for client deregistration")
   343  	}
   344  
   345  	return n.deregister(args, reply, func() (interface{}, uint64, error) {
   346  		return n.srv.raftApply(structs.NodeBatchDeregisterRequestType, args)
   347  	})
   348  }
   349  
   350  // deregister takes a raftMessage closure, to support both Deregister and BatchDeregister
   351  func (n *Node) deregister(args *structs.NodeBatchDeregisterRequest,
   352  	reply *structs.NodeUpdateResponse,
   353  	raftApplyFn func() (interface{}, uint64, error),
   354  ) error {
   355  	// Check request permissions
   356  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   357  		return err
   358  	} else if aclObj != nil && !aclObj.AllowNodeWrite() {
   359  		return structs.ErrPermissionDenied
   360  	}
   361  
   362  	// Look for the node
   363  	snap, err := n.srv.fsm.State().Snapshot()
   364  	if err != nil {
   365  		return err
   366  	}
   367  
   368  	nodes := make([]*structs.Node, 0, len(args.NodeIDs))
   369  	for _, nodeID := range args.NodeIDs {
   370  		node, err := snap.NodeByID(nil, nodeID)
   371  		if err != nil {
   372  			return err
   373  		}
   374  		if node == nil {
   375  			return fmt.Errorf("node not found")
   376  		}
   377  		nodes = append(nodes, node)
   378  	}
   379  
   380  	// Commit this update via Raft
   381  	_, index, err := raftApplyFn()
   382  	if err != nil {
   383  		n.logger.Error("raft message failed", "error", err)
   384  		return err
   385  	}
   386  
   387  	for _, node := range nodes {
   388  		nodeID := node.ID
   389  
   390  		// Clear the heartbeat timer if any
   391  		n.srv.clearHeartbeatTimer(nodeID)
   392  
   393  		// Create the evaluations for this node
   394  		evalIDs, evalIndex, err := n.createNodeEvals(node, index)
   395  		if err != nil {
   396  			n.logger.Error("eval creation failed", "error", err)
   397  			return err
   398  		}
   399  
   400  		// Determine if there are any Vault accessors on the node
   401  		if accessors, err := snap.VaultAccessorsByNode(nil, nodeID); err != nil {
   402  			n.logger.Error("looking up vault accessors for node failed", "node_id", nodeID, "error", err)
   403  			return err
   404  		} else if l := len(accessors); l > 0 {
   405  			n.logger.Debug("revoking vault accessors on node due to deregister", "num_accessors", l, "node_id", nodeID)
   406  			if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil {
   407  				n.logger.Error("revoking vault accessors for node failed", "node_id", nodeID, "error", err)
   408  				return err
   409  			}
   410  		}
   411  
   412  		// Determine if there are any SI token accessors on the node
   413  		if accessors, err := snap.SITokenAccessorsByNode(nil, nodeID); err != nil {
   414  			n.logger.Error("looking up si accessors for node failed", "node_id", nodeID, "error", err)
   415  			return err
   416  		} else if l := len(accessors); l > 0 {
   417  			n.logger.Debug("revoking si accessors on node due to deregister", "num_accessors", l, "node_id", nodeID)
   418  			// Unlike with the Vault integration, there's no error returned here, since
   419  			// bootstrapping the Consul client is elsewhere. Errors in revocation trigger
   420  			// background retry attempts rather than inline error handling.
   421  			_ = n.srv.consulACLs.RevokeTokens(context.Background(), accessors, true)
   422  		}
   423  
   424  		reply.EvalIDs = append(reply.EvalIDs, evalIDs...)
   425  		// Set the reply eval create index just the first time
   426  		if reply.EvalCreateIndex == 0 {
   427  			reply.EvalCreateIndex = evalIndex
   428  		}
   429  	}
   430  
   431  	reply.NodeModifyIndex = index
   432  	reply.Index = index
   433  	return nil
   434  }
   435  
   436  // UpdateStatus is used to update the status of a client node
   437  func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *structs.NodeUpdateResponse) error {
   438  	isForwarded := args.IsForwarded()
   439  	if done, err := n.srv.forward("Node.UpdateStatus", args, args, reply); done {
   440  		// We have a valid node connection since there is no error from the
   441  		// forwarded server, so add the mapping to cache the
   442  		// connection and allow the server to send RPCs to the client.
   443  		if err == nil && n.ctx != nil && n.ctx.NodeID == "" && !isForwarded {
   444  			n.ctx.NodeID = args.NodeID
   445  			n.srv.addNodeConn(n.ctx)
   446  		}
   447  
   448  		return err
   449  	}
   450  	defer metrics.MeasureSince([]string{"nomad", "client", "update_status"}, time.Now())
   451  
   452  	// Verify the arguments
   453  	if args.NodeID == "" {
   454  		return fmt.Errorf("missing node ID for client status update")
   455  	}
   456  	if !structs.ValidNodeStatus(args.Status) {
   457  		return fmt.Errorf("invalid status for node")
   458  	}
   459  
   460  	// Look for the node
   461  	snap, err := n.srv.fsm.State().Snapshot()
   462  	if err != nil {
   463  		return err
   464  	}
   465  
   466  	ws := memdb.NewWatchSet()
   467  	node, err := snap.NodeByID(ws, args.NodeID)
   468  	if err != nil {
   469  		return err
   470  	}
   471  	if node == nil {
   472  		return fmt.Errorf("node not found")
   473  	}
   474  
   475  	// We have a valid node connection, so add the mapping to cache the
   476  	// connection and allow the server to send RPCs to the client. We only cache
   477  	// the connection if it is not being forwarded from another server.
   478  	if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() {
   479  		n.ctx.NodeID = args.NodeID
   480  		n.srv.addNodeConn(n.ctx)
   481  	}
   482  
   483  	// XXX: Could use the SecretID here but have to update the heartbeat system
   484  	// to track SecretIDs.
   485  
   486  	// Update the timestamp of when the node status was updated
   487  	args.UpdatedAt = time.Now().Unix()
   488  
   489  	// Commit this update via Raft
   490  	var index uint64
   491  	if node.Status != args.Status {
   492  		// Attach an event if we are updating the node status to ready when it
   493  		// is down via a heartbeat
   494  		if node.Status == structs.NodeStatusDown && args.NodeEvent == nil {
   495  			args.NodeEvent = structs.NewNodeEvent().
   496  				SetSubsystem(structs.NodeEventSubsystemCluster).
   497  				SetMessage(NodeHeartbeatEventReregistered)
   498  		}
   499  
   500  		_, index, err = n.srv.raftApply(structs.NodeUpdateStatusRequestType, args)
   501  		if err != nil {
   502  			n.logger.Error("status update failed", "error", err)
   503  			return err
   504  		}
   505  		reply.NodeModifyIndex = index
   506  	}
   507  
   508  	// Check if we should trigger evaluations
   509  	if structs.ShouldDrainNode(args.Status) ||
   510  		nodeStatusTransitionRequiresEval(args.Status, node.Status) {
   511  		evalIDs, evalIndex, err := n.createNodeEvals(node, index)
   512  		if err != nil {
   513  			n.logger.Error("eval creation failed", "error", err)
   514  			return err
   515  		}
   516  		reply.EvalIDs = evalIDs
   517  		reply.EvalCreateIndex = evalIndex
   518  	}
   519  
   520  	// Check if we need to setup a heartbeat
   521  	switch args.Status {
   522  	case structs.NodeStatusDown:
   523  		// Determine if there are any Vault accessors on the node to cleanup
   524  		if accessors, err := n.srv.State().VaultAccessorsByNode(ws, args.NodeID); err != nil {
   525  			n.logger.Error("looking up vault accessors for node failed", "node_id", args.NodeID, "error", err)
   526  			return err
   527  		} else if l := len(accessors); l > 0 {
   528  			n.logger.Debug("revoking vault accessors on node due to down state", "num_accessors", l, "node_id", args.NodeID)
   529  			if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil {
   530  				n.logger.Error("revoking vault accessors for node failed", "node_id", args.NodeID, "error", err)
   531  				return err
   532  			}
   533  		}
   534  
   535  		// Determine if there are any SI token accessors on the node to cleanup
   536  		if accessors, err := n.srv.State().SITokenAccessorsByNode(ws, args.NodeID); err != nil {
   537  			n.logger.Error("looking up SI accessors for node failed", "node_id", args.NodeID, "error", err)
   538  			return err
   539  		} else if l := len(accessors); l > 0 {
   540  			n.logger.Debug("revoking SI accessors on node due to down state", "num_accessors", l, "node_id", args.NodeID)
   541  			_ = n.srv.consulACLs.RevokeTokens(context.Background(), accessors, true)
   542  		}
   543  
   544  		// Identify the service registrations current placed on the downed
   545  		// node.
   546  		serviceRegistrations, err := n.srv.State().GetServiceRegistrationsByNodeID(ws, args.NodeID)
   547  		if err != nil {
   548  			n.logger.Error("looking up service registrations for node failed",
   549  				"node_id", args.NodeID, "error", err)
   550  			return err
   551  		}
   552  
   553  		// If the node has service registrations assigned to it, delete these
   554  		// via Raft.
   555  		if l := len(serviceRegistrations); l > 0 {
   556  			n.logger.Debug("deleting service registrations on node due to down state",
   557  				"num_service_registrations", l, "node_id", args.NodeID)
   558  
   559  			deleteRegReq := structs.ServiceRegistrationDeleteByNodeIDRequest{NodeID: args.NodeID}
   560  
   561  			_, index, err = n.srv.raftApply(structs.ServiceRegistrationDeleteByNodeIDRequestType, &deleteRegReq)
   562  			if err != nil {
   563  				n.logger.Error("failed to delete service registrations for node",
   564  					"node_id", args.NodeID, "error", err)
   565  				return err
   566  			}
   567  		}
   568  
   569  	default:
   570  		ttl, err := n.srv.resetHeartbeatTimer(args.NodeID)
   571  		if err != nil {
   572  			n.logger.Error("heartbeat reset failed", "error", err)
   573  			return err
   574  		}
   575  		reply.HeartbeatTTL = ttl
   576  	}
   577  
   578  	// Set the reply index and leader
   579  	reply.Index = index
   580  	n.srv.peerLock.RLock()
   581  	defer n.srv.peerLock.RUnlock()
   582  	if err := n.constructNodeServerInfoResponse(node.GetID(), snap, reply); err != nil {
   583  		n.logger.Error("failed to populate NodeUpdateResponse", "error", err)
   584  		return err
   585  	}
   586  
   587  	return nil
   588  }
   589  
   590  // nodeStatusTransitionRequiresEval is a helper that takes a nodes new and old status and
   591  // returns whether it has transitioned to ready.
   592  func nodeStatusTransitionRequiresEval(newStatus, oldStatus string) bool {
   593  	initToReady := oldStatus == structs.NodeStatusInit && newStatus == structs.NodeStatusReady
   594  	terminalToReady := oldStatus == structs.NodeStatusDown && newStatus == structs.NodeStatusReady
   595  	disconnectedToOther := oldStatus == structs.NodeStatusDisconnected && newStatus != structs.NodeStatusDisconnected
   596  	otherToDisconnected := oldStatus != structs.NodeStatusDisconnected && newStatus == structs.NodeStatusDisconnected
   597  	return initToReady || terminalToReady || disconnectedToOther || otherToDisconnected
   598  }
   599  
   600  // UpdateDrain is used to update the drain mode of a client node
   601  func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest,
   602  	reply *structs.NodeDrainUpdateResponse) error {
   603  	if done, err := n.srv.forward("Node.UpdateDrain", args, args, reply); done {
   604  		return err
   605  	}
   606  	defer metrics.MeasureSince([]string{"nomad", "client", "update_drain"}, time.Now())
   607  
   608  	// Check node write permissions
   609  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   610  		return err
   611  	} else if aclObj != nil && !aclObj.AllowNodeWrite() {
   612  		return structs.ErrPermissionDenied
   613  	}
   614  
   615  	// Verify the arguments
   616  	if args.NodeID == "" {
   617  		return fmt.Errorf("missing node ID for drain update")
   618  	}
   619  	if args.NodeEvent != nil {
   620  		return fmt.Errorf("node event must not be set")
   621  	}
   622  
   623  	// Look for the node
   624  	snap, err := n.srv.fsm.State().Snapshot()
   625  	if err != nil {
   626  		return err
   627  	}
   628  	node, err := snap.NodeByID(nil, args.NodeID)
   629  	if err != nil {
   630  		return err
   631  	}
   632  	if node == nil {
   633  		return fmt.Errorf("node not found")
   634  	}
   635  
   636  	now := time.Now().UTC()
   637  
   638  	// Update the timestamp of when the node status was updated
   639  	args.UpdatedAt = now.Unix()
   640  
   641  	// Setup drain strategy
   642  	if args.DrainStrategy != nil {
   643  		// Mark start time for the drain
   644  		if node.DrainStrategy == nil {
   645  			args.DrainStrategy.StartedAt = now
   646  		} else {
   647  			args.DrainStrategy.StartedAt = node.DrainStrategy.StartedAt
   648  		}
   649  
   650  		// Mark the deadline time
   651  		if args.DrainStrategy.Deadline.Nanoseconds() > 0 {
   652  			args.DrainStrategy.ForceDeadline = now.Add(args.DrainStrategy.Deadline)
   653  		}
   654  	}
   655  
   656  	// Construct the node event
   657  	args.NodeEvent = structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemDrain)
   658  	if node.DrainStrategy == nil && args.DrainStrategy != nil {
   659  		args.NodeEvent.SetMessage(NodeDrainEventDrainSet)
   660  	} else if node.DrainStrategy != nil && args.DrainStrategy != nil {
   661  		args.NodeEvent.SetMessage(NodeDrainEventDrainUpdated)
   662  	} else if node.DrainStrategy != nil && args.DrainStrategy == nil {
   663  		args.NodeEvent.SetMessage(NodeDrainEventDrainDisabled)
   664  	} else {
   665  		args.NodeEvent = nil
   666  	}
   667  
   668  	// Commit this update via Raft
   669  	_, index, err := n.srv.raftApply(structs.NodeUpdateDrainRequestType, args)
   670  	if err != nil {
   671  		n.logger.Error("drain update failed", "error", err)
   672  		return err
   673  	}
   674  	reply.NodeModifyIndex = index
   675  
   676  	// If the node is transitioning to be eligible, create Node evaluations
   677  	// because there may be a System job registered that should be evaluated.
   678  	if node.SchedulingEligibility == structs.NodeSchedulingIneligible && args.MarkEligible && args.DrainStrategy == nil {
   679  		n.logger.Info("node transitioning to eligible state", "node_id", node.ID)
   680  		evalIDs, evalIndex, err := n.createNodeEvals(node, index)
   681  		if err != nil {
   682  			n.logger.Error("eval creation failed", "error", err)
   683  			return err
   684  		}
   685  		reply.EvalIDs = evalIDs
   686  		reply.EvalCreateIndex = evalIndex
   687  	}
   688  
   689  	// Set the reply index
   690  	reply.Index = index
   691  	return nil
   692  }
   693  
   694  // UpdateEligibility is used to update the scheduling eligibility of a node
   695  func (n *Node) UpdateEligibility(args *structs.NodeUpdateEligibilityRequest,
   696  	reply *structs.NodeEligibilityUpdateResponse) error {
   697  	if done, err := n.srv.forward("Node.UpdateEligibility", args, args, reply); done {
   698  		return err
   699  	}
   700  	defer metrics.MeasureSince([]string{"nomad", "client", "update_eligibility"}, time.Now())
   701  
   702  	// Check node write permissions
   703  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   704  		return err
   705  	} else if aclObj != nil && !aclObj.AllowNodeWrite() {
   706  		return structs.ErrPermissionDenied
   707  	}
   708  
   709  	// Verify the arguments
   710  	if args.NodeID == "" {
   711  		return fmt.Errorf("missing node ID for setting scheduling eligibility")
   712  	}
   713  	if args.NodeEvent != nil {
   714  		return fmt.Errorf("node event must not be set")
   715  	}
   716  
   717  	// Check that only allowed types are set
   718  	switch args.Eligibility {
   719  	case structs.NodeSchedulingEligible, structs.NodeSchedulingIneligible:
   720  	default:
   721  		return fmt.Errorf("invalid scheduling eligibility %q", args.Eligibility)
   722  	}
   723  
   724  	// Look for the node
   725  	snap, err := n.srv.fsm.State().Snapshot()
   726  	if err != nil {
   727  		return err
   728  	}
   729  	node, err := snap.NodeByID(nil, args.NodeID)
   730  	if err != nil {
   731  		return err
   732  	}
   733  	if node == nil {
   734  		return fmt.Errorf("node not found")
   735  	}
   736  
   737  	if node.DrainStrategy != nil && args.Eligibility == structs.NodeSchedulingEligible {
   738  		return fmt.Errorf("can not set node's scheduling eligibility to eligible while it is draining")
   739  	}
   740  
   741  	switch args.Eligibility {
   742  	case structs.NodeSchedulingEligible, structs.NodeSchedulingIneligible:
   743  	default:
   744  		return fmt.Errorf("invalid scheduling eligibility %q", args.Eligibility)
   745  	}
   746  
   747  	// Update the timestamp of when the node status was updated
   748  	args.UpdatedAt = time.Now().Unix()
   749  
   750  	// Construct the node event
   751  	args.NodeEvent = structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster)
   752  	if node.SchedulingEligibility == args.Eligibility {
   753  		return nil // Nothing to do
   754  	} else if args.Eligibility == structs.NodeSchedulingEligible {
   755  		n.logger.Info("node transitioning to eligible state", "node_id", node.ID)
   756  		args.NodeEvent.SetMessage(NodeEligibilityEventEligible)
   757  	} else {
   758  		n.logger.Info("node transitioning to ineligible state", "node_id", node.ID)
   759  		args.NodeEvent.SetMessage(NodeEligibilityEventIneligible)
   760  	}
   761  
   762  	// Commit this update via Raft
   763  	outErr, index, err := n.srv.raftApply(structs.NodeUpdateEligibilityRequestType, args)
   764  	if err != nil {
   765  		n.logger.Error("eligibility update failed", "error", err)
   766  		return err
   767  	}
   768  	if outErr != nil {
   769  		if err, ok := outErr.(error); ok && err != nil {
   770  			n.logger.Error("eligibility update failed", "error", err)
   771  			return err
   772  		}
   773  	}
   774  
   775  	// If the node is transitioning to be eligible, create Node evaluations
   776  	// because there may be a System job registered that should be evaluated.
   777  	if node.SchedulingEligibility == structs.NodeSchedulingIneligible && args.Eligibility == structs.NodeSchedulingEligible {
   778  		evalIDs, evalIndex, err := n.createNodeEvals(node, index)
   779  		if err != nil {
   780  			n.logger.Error("eval creation failed", "error", err)
   781  			return err
   782  		}
   783  		reply.EvalIDs = evalIDs
   784  		reply.EvalCreateIndex = evalIndex
   785  	}
   786  
   787  	// Set the reply index
   788  	reply.Index = index
   789  	return nil
   790  }
   791  
   792  // Evaluate is used to force a re-evaluation of the node
   793  func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUpdateResponse) error {
   794  	if done, err := n.srv.forward("Node.Evaluate", args, args, reply); done {
   795  		return err
   796  	}
   797  	defer metrics.MeasureSince([]string{"nomad", "client", "evaluate"}, time.Now())
   798  
   799  	// Check node write permissions
   800  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   801  		return err
   802  	} else if aclObj != nil && !aclObj.AllowNodeWrite() {
   803  		return structs.ErrPermissionDenied
   804  	}
   805  
   806  	// Verify the arguments
   807  	if args.NodeID == "" {
   808  		return fmt.Errorf("missing node ID for evaluation")
   809  	}
   810  
   811  	// Look for the node
   812  	snap, err := n.srv.fsm.State().Snapshot()
   813  	if err != nil {
   814  		return err
   815  	}
   816  	ws := memdb.NewWatchSet()
   817  	node, err := snap.NodeByID(ws, args.NodeID)
   818  	if err != nil {
   819  		return err
   820  	}
   821  	if node == nil {
   822  		return fmt.Errorf("node not found")
   823  	}
   824  
   825  	// Create the evaluation
   826  	evalIDs, evalIndex, err := n.createNodeEvals(node, node.ModifyIndex)
   827  	if err != nil {
   828  		n.logger.Error("eval creation failed", "error", err)
   829  		return err
   830  	}
   831  	reply.EvalIDs = evalIDs
   832  	reply.EvalCreateIndex = evalIndex
   833  
   834  	// Set the reply index
   835  	reply.Index = evalIndex
   836  
   837  	n.srv.peerLock.RLock()
   838  	defer n.srv.peerLock.RUnlock()
   839  	if err := n.constructNodeServerInfoResponse(node.GetID(), snap, reply); err != nil {
   840  		n.logger.Error("failed to populate NodeUpdateResponse", "error", err)
   841  		return err
   842  	}
   843  	return nil
   844  }
   845  
   846  // GetNode is used to request information about a specific node
   847  func (n *Node) GetNode(args *structs.NodeSpecificRequest,
   848  	reply *structs.SingleNodeResponse) error {
   849  	if done, err := n.srv.forward("Node.GetNode", args, args, reply); done {
   850  		return err
   851  	}
   852  	defer metrics.MeasureSince([]string{"nomad", "client", "get_node"}, time.Now())
   853  
   854  	// Check node read permissions
   855  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   856  		// If ResolveToken had an unexpected error return that
   857  		if err != structs.ErrTokenNotFound {
   858  			return err
   859  		}
   860  
   861  		// Attempt to lookup AuthToken as a Node.SecretID since nodes
   862  		// call this endpoint and don't have an ACL token.
   863  		node, stateErr := n.srv.fsm.State().NodeBySecretID(nil, args.AuthToken)
   864  		if stateErr != nil {
   865  			// Return the original ResolveToken error with this err
   866  			var merr multierror.Error
   867  			merr.Errors = append(merr.Errors, err, stateErr)
   868  			return merr.ErrorOrNil()
   869  		}
   870  
   871  		// Not a node or a valid ACL token
   872  		if node == nil {
   873  			return structs.ErrTokenNotFound
   874  		}
   875  	} else if aclObj != nil && !aclObj.AllowNodeRead() {
   876  		return structs.ErrPermissionDenied
   877  	}
   878  
   879  	// Setup the blocking query
   880  	opts := blockingOptions{
   881  		queryOpts: &args.QueryOptions,
   882  		queryMeta: &reply.QueryMeta,
   883  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   884  			// Verify the arguments
   885  			if args.NodeID == "" {
   886  				return fmt.Errorf("missing node ID")
   887  			}
   888  
   889  			// Look for the node
   890  			out, err := state.NodeByID(ws, args.NodeID)
   891  			if err != nil {
   892  				return err
   893  			}
   894  
   895  			// Setup the output
   896  			if out != nil {
   897  				out = out.Sanitize()
   898  				reply.Node = out
   899  				reply.Index = out.ModifyIndex
   900  			} else {
   901  				// Use the last index that affected the nodes table
   902  				index, err := state.Index("nodes")
   903  				if err != nil {
   904  					return err
   905  				}
   906  				reply.Node = nil
   907  				reply.Index = index
   908  			}
   909  
   910  			// Set the query response
   911  			n.srv.setQueryMeta(&reply.QueryMeta)
   912  			return nil
   913  		}}
   914  	return n.srv.blockingRPC(&opts)
   915  }
   916  
   917  // GetAllocs is used to request allocations for a specific node
   918  func (n *Node) GetAllocs(args *structs.NodeSpecificRequest,
   919  	reply *structs.NodeAllocsResponse) error {
   920  	if done, err := n.srv.forward("Node.GetAllocs", args, args, reply); done {
   921  		return err
   922  	}
   923  	defer metrics.MeasureSince([]string{"nomad", "client", "get_allocs"}, time.Now())
   924  
   925  	// Check node read and namespace job read permissions
   926  	aclObj, err := n.srv.ResolveToken(args.AuthToken)
   927  	if err != nil {
   928  		return err
   929  	}
   930  	if aclObj != nil && !aclObj.AllowNodeRead() {
   931  		return structs.ErrPermissionDenied
   932  	}
   933  
   934  	// cache namespace perms
   935  	readableNamespaces := map[string]bool{}
   936  
   937  	// readNS is a caching namespace read-job helper
   938  	readNS := func(ns string) bool {
   939  		if aclObj == nil {
   940  			// ACLs are disabled; everything is readable
   941  			return true
   942  		}
   943  
   944  		if readable, ok := readableNamespaces[ns]; ok {
   945  			// cache hit
   946  			return readable
   947  		}
   948  
   949  		// cache miss
   950  		readable := aclObj.AllowNsOp(ns, acl.NamespaceCapabilityReadJob)
   951  		readableNamespaces[ns] = readable
   952  		return readable
   953  	}
   954  
   955  	// Verify the arguments
   956  	if args.NodeID == "" {
   957  		return fmt.Errorf("missing node ID")
   958  	}
   959  
   960  	// Setup the blocking query
   961  	opts := blockingOptions{
   962  		queryOpts: &args.QueryOptions,
   963  		queryMeta: &reply.QueryMeta,
   964  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   965  			// Look for the node
   966  			allocs, err := state.AllocsByNode(ws, args.NodeID)
   967  			if err != nil {
   968  				return err
   969  			}
   970  
   971  			// Setup the output
   972  			if n := len(allocs); n != 0 {
   973  				reply.Allocs = make([]*structs.Allocation, 0, n)
   974  				for _, alloc := range allocs {
   975  					if readNS(alloc.Namespace) {
   976  						reply.Allocs = append(reply.Allocs, alloc)
   977  					}
   978  
   979  					// Get the max of all allocs since
   980  					// subsequent requests need to start
   981  					// from the latest index
   982  					reply.Index = maxUint64(reply.Index, alloc.ModifyIndex)
   983  				}
   984  			} else {
   985  				reply.Allocs = nil
   986  
   987  				// Use the last index that affected the nodes table
   988  				index, err := state.Index("allocs")
   989  				if err != nil {
   990  					return err
   991  				}
   992  
   993  				// Must provide non-zero index to prevent blocking
   994  				// Index 1 is impossible anyways (due to Raft internals)
   995  				if index == 0 {
   996  					reply.Index = 1
   997  				} else {
   998  					reply.Index = index
   999  				}
  1000  			}
  1001  			return nil
  1002  		}}
  1003  	return n.srv.blockingRPC(&opts)
  1004  }
  1005  
  1006  // GetClientAllocs is used to request a lightweight list of alloc modify indexes
  1007  // per allocation.
  1008  func (n *Node) GetClientAllocs(args *structs.NodeSpecificRequest,
  1009  	reply *structs.NodeClientAllocsResponse) error {
  1010  	isForwarded := args.IsForwarded()
  1011  	if done, err := n.srv.forward("Node.GetClientAllocs", args, args, reply); done {
  1012  		// We have a valid node connection since there is no error from the
  1013  		// forwarded server, so add the mapping to cache the
  1014  		// connection and allow the server to send RPCs to the client.
  1015  		if err == nil && n.ctx != nil && n.ctx.NodeID == "" && !isForwarded {
  1016  			n.ctx.NodeID = args.NodeID
  1017  			n.srv.addNodeConn(n.ctx)
  1018  		}
  1019  
  1020  		return err
  1021  	}
  1022  	defer metrics.MeasureSince([]string{"nomad", "client", "get_client_allocs"}, time.Now())
  1023  
  1024  	// Verify the arguments
  1025  	if args.NodeID == "" {
  1026  		return fmt.Errorf("missing node ID")
  1027  	}
  1028  
  1029  	// numOldAllocs is used to detect if there is a garbage collection event
  1030  	// that effects the node. When an allocation is garbage collected, that does
  1031  	// not change the modify index changes and thus the query won't unblock,
  1032  	// even though the set of allocations on the node has changed.
  1033  	var numOldAllocs int
  1034  
  1035  	// Setup the blocking query
  1036  	opts := blockingOptions{
  1037  		queryOpts: &args.QueryOptions,
  1038  		queryMeta: &reply.QueryMeta,
  1039  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
  1040  			// Look for the node
  1041  			node, err := state.NodeByID(ws, args.NodeID)
  1042  			if err != nil {
  1043  				return err
  1044  			}
  1045  
  1046  			var allocs []*structs.Allocation
  1047  			if node != nil {
  1048  				if args.SecretID == "" {
  1049  					return fmt.Errorf("missing node secret ID for client status update")
  1050  				} else if args.SecretID != node.SecretID {
  1051  					return fmt.Errorf("node secret ID does not match")
  1052  				}
  1053  
  1054  				// We have a valid node connection, so add the mapping to cache the
  1055  				// connection and allow the server to send RPCs to the client. We only cache
  1056  				// the connection if it is not being forwarded from another server.
  1057  				if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() {
  1058  					n.ctx.NodeID = args.NodeID
  1059  					n.srv.addNodeConn(n.ctx)
  1060  				}
  1061  
  1062  				var err error
  1063  				allocs, err = state.AllocsByNode(ws, args.NodeID)
  1064  				if err != nil {
  1065  					return err
  1066  				}
  1067  			}
  1068  
  1069  			reply.Allocs = make(map[string]uint64)
  1070  			reply.MigrateTokens = make(map[string]string)
  1071  
  1072  			// preferTableIndex is used to determine whether we should build the
  1073  			// response index based on the full table indexes versus the modify
  1074  			// indexes of the allocations on the specific node. This is
  1075  			// preferred in the case that the node doesn't yet have allocations
  1076  			// or when we detect a GC that effects the node.
  1077  			preferTableIndex := true
  1078  
  1079  			// Setup the output
  1080  			if numAllocs := len(allocs); numAllocs != 0 {
  1081  				preferTableIndex = false
  1082  
  1083  				for _, alloc := range allocs {
  1084  					reply.Allocs[alloc.ID] = alloc.AllocModifyIndex
  1085  
  1086  					// If the allocation is going to do a migration, create a
  1087  					// migration token so that the client can authenticate with
  1088  					// the node hosting the previous allocation.
  1089  					if alloc.ShouldMigrate() {
  1090  						prevAllocation, err := state.AllocByID(ws, alloc.PreviousAllocation)
  1091  						if err != nil {
  1092  							return err
  1093  						}
  1094  
  1095  						if prevAllocation != nil && prevAllocation.NodeID != alloc.NodeID {
  1096  							allocNode, err := state.NodeByID(ws, prevAllocation.NodeID)
  1097  							if err != nil {
  1098  								return err
  1099  							}
  1100  							if allocNode == nil {
  1101  								// Node must have been GC'd so skip the token
  1102  								continue
  1103  							}
  1104  
  1105  							token, err := structs.GenerateMigrateToken(prevAllocation.ID, allocNode.SecretID)
  1106  							if err != nil {
  1107  								return err
  1108  							}
  1109  							reply.MigrateTokens[alloc.ID] = token
  1110  						}
  1111  					}
  1112  
  1113  					reply.Index = maxUint64(reply.Index, alloc.ModifyIndex)
  1114  				}
  1115  
  1116  				// Determine if we have less allocations than before. This
  1117  				// indicates there was a garbage collection
  1118  				if numAllocs < numOldAllocs {
  1119  					preferTableIndex = true
  1120  				}
  1121  
  1122  				// Store the new number of allocations
  1123  				numOldAllocs = numAllocs
  1124  			}
  1125  
  1126  			if preferTableIndex {
  1127  				// Use the last index that affected the nodes table
  1128  				index, err := state.Index("allocs")
  1129  				if err != nil {
  1130  					return err
  1131  				}
  1132  
  1133  				// Must provide non-zero index to prevent blocking
  1134  				// Index 1 is impossible anyways (due to Raft internals)
  1135  				if index == 0 {
  1136  					reply.Index = 1
  1137  				} else {
  1138  					reply.Index = index
  1139  				}
  1140  			}
  1141  			return nil
  1142  		}}
  1143  	return n.srv.blockingRPC(&opts)
  1144  }
  1145  
  1146  // UpdateAlloc is used to update the client status of an allocation. It should
  1147  // only be called by clients.
  1148  //
  1149  // Clients must first register and heartbeat successfully before they are able
  1150  // to call this method.
  1151  func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.GenericResponse) error {
  1152  	// Ensure the connection was initiated by another client if TLS is used.
  1153  	err := validateTLSCertificateLevel(n.srv, n.ctx, tlsCertificateLevelClient)
  1154  	if err != nil {
  1155  		return err
  1156  	}
  1157  
  1158  	if done, err := n.srv.forward("Node.UpdateAlloc", args, args, reply); done {
  1159  		return err
  1160  	}
  1161  	defer metrics.MeasureSince([]string{"nomad", "client", "update_alloc"}, time.Now())
  1162  
  1163  	// Ensure at least a single alloc
  1164  	if len(args.Alloc) == 0 {
  1165  		return fmt.Errorf("must update at least one allocation")
  1166  	}
  1167  
  1168  	// Ensure the node is allowed to update allocs.
  1169  	// The node needs to successfully heartbeat before updating its allocs.
  1170  	nodeID := args.Alloc[0].NodeID
  1171  	if nodeID == "" {
  1172  		return fmt.Errorf("missing node ID")
  1173  	}
  1174  
  1175  	node, err := n.srv.State().NodeByID(nil, nodeID)
  1176  	if err != nil {
  1177  		return fmt.Errorf("failed to retrieve node %s: %v", nodeID, err)
  1178  	}
  1179  	if node == nil {
  1180  		return fmt.Errorf("node %s not found", nodeID)
  1181  	}
  1182  	if node.Status != structs.NodeStatusReady {
  1183  		return fmt.Errorf("node %s is %s, not %s", nodeID, node.Status, structs.NodeStatusReady)
  1184  	}
  1185  
  1186  	// Ensure that evals aren't set from client RPCs
  1187  	// We create them here before the raft update
  1188  	if len(args.Evals) != 0 {
  1189  		return fmt.Errorf("evals field must not be set")
  1190  	}
  1191  
  1192  	// Update modified timestamp for client initiated allocation updates
  1193  	now := time.Now()
  1194  	var evals []*structs.Evaluation
  1195  
  1196  	for _, allocToUpdate := range args.Alloc {
  1197  		evalTriggerBy := ""
  1198  		allocToUpdate.ModifyTime = now.UTC().UnixNano()
  1199  
  1200  		alloc, _ := n.srv.State().AllocByID(nil, allocToUpdate.ID)
  1201  		if alloc == nil {
  1202  			continue
  1203  		}
  1204  
  1205  		if !allocToUpdate.TerminalStatus() && alloc.ClientStatus != structs.AllocClientStatusUnknown {
  1206  			continue
  1207  		}
  1208  
  1209  		var job *structs.Job
  1210  		var jobType string
  1211  		var jobPriority int
  1212  
  1213  		job, err = n.srv.State().JobByID(nil, alloc.Namespace, alloc.JobID)
  1214  		if err != nil {
  1215  			n.logger.Debug("UpdateAlloc unable to find job", "job", alloc.JobID, "error", err)
  1216  			continue
  1217  		}
  1218  
  1219  		// If the job is nil it means it has been de-registered.
  1220  		if job == nil {
  1221  			jobType = alloc.Job.Type
  1222  			jobPriority = alloc.Job.Priority
  1223  			evalTriggerBy = structs.EvalTriggerJobDeregister
  1224  			allocToUpdate.DesiredStatus = structs.AllocDesiredStatusStop
  1225  			n.logger.Debug("UpdateAlloc unable to find job - shutting down alloc", "job", alloc.JobID)
  1226  		}
  1227  
  1228  		var taskGroup *structs.TaskGroup
  1229  		if job != nil {
  1230  			jobType = job.Type
  1231  			jobPriority = job.Priority
  1232  			taskGroup = job.LookupTaskGroup(alloc.TaskGroup)
  1233  		}
  1234  
  1235  		// If we cannot find the task group for a failed alloc we cannot continue, unless it is an orphan.
  1236  		if evalTriggerBy != structs.EvalTriggerJobDeregister &&
  1237  			allocToUpdate.ClientStatus == structs.AllocClientStatusFailed &&
  1238  			alloc.FollowupEvalID == "" {
  1239  
  1240  			if taskGroup == nil {
  1241  				n.logger.Debug("UpdateAlloc unable to find task group for job", "job", alloc.JobID, "alloc", alloc.ID, "task_group", alloc.TaskGroup)
  1242  				continue
  1243  			}
  1244  
  1245  			// Set trigger by failed if not an orphan.
  1246  			if alloc.RescheduleEligible(taskGroup.ReschedulePolicy, now) {
  1247  				evalTriggerBy = structs.EvalTriggerRetryFailedAlloc
  1248  			}
  1249  		}
  1250  
  1251  		var eval *structs.Evaluation
  1252  		// If unknown, and not an orphan, set the trigger by.
  1253  		if evalTriggerBy != structs.EvalTriggerJobDeregister &&
  1254  			alloc.ClientStatus == structs.AllocClientStatusUnknown {
  1255  			evalTriggerBy = structs.EvalTriggerReconnect
  1256  		}
  1257  
  1258  		// If we weren't able to determine one of our expected eval triggers,
  1259  		// continue and don't create an eval.
  1260  		if evalTriggerBy == "" {
  1261  			continue
  1262  		}
  1263  
  1264  		eval = &structs.Evaluation{
  1265  			ID:          uuid.Generate(),
  1266  			Namespace:   alloc.Namespace,
  1267  			TriggeredBy: evalTriggerBy,
  1268  			JobID:       alloc.JobID,
  1269  			Type:        jobType,
  1270  			Priority:    jobPriority,
  1271  			Status:      structs.EvalStatusPending,
  1272  			CreateTime:  now.UTC().UnixNano(),
  1273  			ModifyTime:  now.UTC().UnixNano(),
  1274  		}
  1275  		evals = append(evals, eval)
  1276  	}
  1277  
  1278  	// Add this to the batch
  1279  	n.updatesLock.Lock()
  1280  	n.updates = append(n.updates, args.Alloc...)
  1281  	n.evals = append(n.evals, evals...)
  1282  
  1283  	// Start a new batch if none
  1284  	future := n.updateFuture
  1285  	if future == nil {
  1286  		future = structs.NewBatchFuture()
  1287  		n.updateFuture = future
  1288  		n.updateTimer = time.AfterFunc(batchUpdateInterval, func() {
  1289  			// Get the pending updates
  1290  			n.updatesLock.Lock()
  1291  			updates := n.updates
  1292  			evals := n.evals
  1293  			future := n.updateFuture
  1294  
  1295  			// Assume future update patterns will be similar to
  1296  			// current batch and set cap appropriately to avoid
  1297  			// slice resizing.
  1298  			n.updates = make([]*structs.Allocation, 0, len(updates))
  1299  			n.evals = make([]*structs.Evaluation, 0, len(evals))
  1300  
  1301  			n.updateFuture = nil
  1302  			n.updateTimer = nil
  1303  			n.updatesLock.Unlock()
  1304  
  1305  			// Perform the batch update
  1306  			n.batchUpdate(future, updates, evals)
  1307  		})
  1308  	}
  1309  	n.updatesLock.Unlock()
  1310  
  1311  	// Wait for the future
  1312  	if err := future.Wait(); err != nil {
  1313  		return err
  1314  	}
  1315  
  1316  	// Setup the response
  1317  	reply.Index = future.Index()
  1318  	return nil
  1319  }
  1320  
  1321  // batchUpdate is used to update all the allocations
  1322  func (n *Node) batchUpdate(future *structs.BatchFuture, updates []*structs.Allocation, evals []*structs.Evaluation) {
  1323  	var mErr multierror.Error
  1324  	// Group pending evals by jobID to prevent creating unnecessary evals
  1325  	evalsByJobId := make(map[structs.NamespacedID]struct{})
  1326  	var trimmedEvals []*structs.Evaluation
  1327  	for _, eval := range evals {
  1328  		namespacedID := structs.NamespacedID{
  1329  			ID:        eval.JobID,
  1330  			Namespace: eval.Namespace,
  1331  		}
  1332  		_, exists := evalsByJobId[namespacedID]
  1333  		if !exists {
  1334  			now := time.Now().UTC().UnixNano()
  1335  			eval.CreateTime = now
  1336  			eval.ModifyTime = now
  1337  			trimmedEvals = append(trimmedEvals, eval)
  1338  			evalsByJobId[namespacedID] = struct{}{}
  1339  		}
  1340  	}
  1341  
  1342  	if len(trimmedEvals) > 0 {
  1343  		n.logger.Debug("adding evaluations for rescheduling failed allocations", "num_evals", len(trimmedEvals))
  1344  	}
  1345  	// Prepare the batch update
  1346  	batch := &structs.AllocUpdateRequest{
  1347  		Alloc:        updates,
  1348  		Evals:        trimmedEvals,
  1349  		WriteRequest: structs.WriteRequest{Region: n.srv.config.Region},
  1350  	}
  1351  
  1352  	// Commit this update via Raft
  1353  	_, index, err := n.srv.raftApply(structs.AllocClientUpdateRequestType, batch)
  1354  	if err != nil {
  1355  		n.logger.Error("alloc update failed", "error", err)
  1356  		mErr.Errors = append(mErr.Errors, err)
  1357  	}
  1358  
  1359  	// For each allocation we are updating, check if we should revoke any
  1360  	// - Vault token accessors
  1361  	// - Service Identity token accessors
  1362  	var (
  1363  		revokeVault []*structs.VaultAccessor
  1364  		revokeSI    []*structs.SITokenAccessor
  1365  	)
  1366  
  1367  	for _, alloc := range updates {
  1368  		// Skip any allocation that isn't dead on the client
  1369  		if !alloc.Terminated() {
  1370  			continue
  1371  		}
  1372  
  1373  		ws := memdb.NewWatchSet()
  1374  
  1375  		// Determine if there are any orphaned Vault accessors for the allocation
  1376  		if accessors, err := n.srv.State().VaultAccessorsByAlloc(ws, alloc.ID); err != nil {
  1377  			n.logger.Error("looking up vault accessors for alloc failed", "alloc_id", alloc.ID, "error", err)
  1378  			mErr.Errors = append(mErr.Errors, err)
  1379  		} else {
  1380  			revokeVault = append(revokeVault, accessors...)
  1381  		}
  1382  
  1383  		// Determine if there are any orphaned SI accessors for the allocation
  1384  		if accessors, err := n.srv.State().SITokenAccessorsByAlloc(ws, alloc.ID); err != nil {
  1385  			n.logger.Error("looking up si accessors for alloc failed", "alloc_id", alloc.ID, "error", err)
  1386  			mErr.Errors = append(mErr.Errors, err)
  1387  		} else {
  1388  			revokeSI = append(revokeSI, accessors...)
  1389  		}
  1390  	}
  1391  
  1392  	// Revoke any orphaned Vault token accessors
  1393  	if l := len(revokeVault); l > 0 {
  1394  		n.logger.Debug("revoking vault accessors due to terminal allocations", "num_accessors", l)
  1395  		if err := n.srv.vault.RevokeTokens(context.Background(), revokeVault, true); err != nil {
  1396  			n.logger.Error("batched vault accessor revocation failed", "error", err)
  1397  			mErr.Errors = append(mErr.Errors, err)
  1398  		}
  1399  	}
  1400  
  1401  	// Revoke any orphaned SI token accessors
  1402  	if l := len(revokeSI); l > 0 {
  1403  		n.logger.Debug("revoking si accessors due to terminal allocations", "num_accessors", l)
  1404  		_ = n.srv.consulACLs.RevokeTokens(context.Background(), revokeSI, true)
  1405  	}
  1406  
  1407  	// Respond to the future
  1408  	future.Respond(index, mErr.ErrorOrNil())
  1409  }
  1410  
  1411  // List is used to list the available nodes
  1412  func (n *Node) List(args *structs.NodeListRequest,
  1413  	reply *structs.NodeListResponse) error {
  1414  	if done, err := n.srv.forward("Node.List", args, args, reply); done {
  1415  		return err
  1416  	}
  1417  	defer metrics.MeasureSince([]string{"nomad", "client", "list"}, time.Now())
  1418  
  1419  	// Check node read permissions
  1420  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
  1421  		return err
  1422  	} else if aclObj != nil && !aclObj.AllowNodeRead() {
  1423  		return structs.ErrPermissionDenied
  1424  	}
  1425  
  1426  	// Set up the blocking query.
  1427  	opts := blockingOptions{
  1428  		queryOpts: &args.QueryOptions,
  1429  		queryMeta: &reply.QueryMeta,
  1430  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
  1431  
  1432  			var err error
  1433  			var iter memdb.ResultIterator
  1434  			if prefix := args.QueryOptions.Prefix; prefix != "" {
  1435  				iter, err = state.NodesByIDPrefix(ws, prefix)
  1436  			} else {
  1437  				iter, err = state.Nodes(ws)
  1438  			}
  1439  			if err != nil {
  1440  				return err
  1441  			}
  1442  
  1443  			// Generate the tokenizer to use for pagination using the populated
  1444  			// paginatorOpts object. The ID of a node must be unique within the
  1445  			// region, therefore we only need WithID on the paginator options.
  1446  			tokenizer := paginator.NewStructsTokenizer(iter, paginator.StructsTokenizerOptions{WithID: true})
  1447  
  1448  			var nodes []*structs.NodeListStub
  1449  
  1450  			// Build the paginator. This includes the function that is
  1451  			// responsible for appending a node to the nodes array.
  1452  			paginatorImpl, err := paginator.NewPaginator(iter, tokenizer, nil, args.QueryOptions,
  1453  				func(raw interface{}) error {
  1454  					nodes = append(nodes, raw.(*structs.Node).Stub(args.Fields))
  1455  					return nil
  1456  				})
  1457  			if err != nil {
  1458  				return structs.NewErrRPCCodedf(
  1459  					http.StatusBadRequest, "failed to create result paginator: %v", err)
  1460  			}
  1461  
  1462  			// Calling page populates our output nodes array as well as returns
  1463  			// the next token.
  1464  			nextToken, err := paginatorImpl.Page()
  1465  			if err != nil {
  1466  				return structs.NewErrRPCCodedf(
  1467  					http.StatusBadRequest, "failed to read result page: %v", err)
  1468  			}
  1469  
  1470  			// Populate the reply.
  1471  			reply.Nodes = nodes
  1472  			reply.NextToken = nextToken
  1473  
  1474  			// Use the last index that affected the jobs table
  1475  			index, err := state.Index("nodes")
  1476  			if err != nil {
  1477  				return err
  1478  			}
  1479  			reply.Index = index
  1480  
  1481  			// Set the query response
  1482  			n.srv.setQueryMeta(&reply.QueryMeta)
  1483  			return nil
  1484  		}}
  1485  	return n.srv.blockingRPC(&opts)
  1486  }
  1487  
  1488  // createNodeEvals is used to create evaluations for each alloc on a node.
  1489  // Each Eval is scoped to a job, so we need to potentially trigger many evals.
  1490  func (n *Node) createNodeEvals(node *structs.Node, nodeIndex uint64) ([]string, uint64, error) {
  1491  	nodeID := node.ID
  1492  
  1493  	// Snapshot the state
  1494  	snap, err := n.srv.fsm.State().Snapshot()
  1495  	if err != nil {
  1496  		return nil, 0, fmt.Errorf("failed to snapshot state: %v", err)
  1497  	}
  1498  
  1499  	// Find all the allocations for this node
  1500  	allocs, err := snap.AllocsByNode(nil, nodeID)
  1501  	if err != nil {
  1502  		return nil, 0, fmt.Errorf("failed to find allocs for '%s': %v", nodeID, err)
  1503  	}
  1504  
  1505  	sysJobsIter, err := snap.JobsByScheduler(nil, "system")
  1506  	if err != nil {
  1507  		return nil, 0, fmt.Errorf("failed to find system jobs for '%s': %v", nodeID, err)
  1508  	}
  1509  
  1510  	var sysJobs []*structs.Job
  1511  	for jobI := sysJobsIter.Next(); jobI != nil; jobI = sysJobsIter.Next() {
  1512  		job := jobI.(*structs.Job)
  1513  		// Avoid creating evals for jobs that don't run in this
  1514  		// datacenter. We could perform an entire feasibility check
  1515  		// here, but datacenter is a good optimization to start with as
  1516  		// datacenter cardinality tends to be low so the check
  1517  		// shouldn't add much work.
  1518  		for _, dc := range job.Datacenters {
  1519  			if dc == node.Datacenter {
  1520  				sysJobs = append(sysJobs, job)
  1521  				break
  1522  			}
  1523  		}
  1524  	}
  1525  
  1526  	// Fast-path if nothing to do
  1527  	if len(allocs) == 0 && len(sysJobs) == 0 {
  1528  		return nil, 0, nil
  1529  	}
  1530  
  1531  	// Create an eval for each JobID affected
  1532  	var evals []*structs.Evaluation
  1533  	var evalIDs []string
  1534  	jobIDs := map[structs.NamespacedID]struct{}{}
  1535  	now := time.Now().UTC().UnixNano()
  1536  
  1537  	for _, alloc := range allocs {
  1538  		// Deduplicate on JobID
  1539  		if _, ok := jobIDs[alloc.JobNamespacedID()]; ok {
  1540  			continue
  1541  		}
  1542  		jobIDs[alloc.JobNamespacedID()] = struct{}{}
  1543  
  1544  		// Create a new eval
  1545  		eval := &structs.Evaluation{
  1546  			ID:              uuid.Generate(),
  1547  			Namespace:       alloc.Namespace,
  1548  			Priority:        alloc.Job.Priority,
  1549  			Type:            alloc.Job.Type,
  1550  			TriggeredBy:     structs.EvalTriggerNodeUpdate,
  1551  			JobID:           alloc.JobID,
  1552  			NodeID:          nodeID,
  1553  			NodeModifyIndex: nodeIndex,
  1554  			Status:          structs.EvalStatusPending,
  1555  			CreateTime:      now,
  1556  			ModifyTime:      now,
  1557  		}
  1558  
  1559  		evals = append(evals, eval)
  1560  		evalIDs = append(evalIDs, eval.ID)
  1561  	}
  1562  
  1563  	// Create an evaluation for each system job.
  1564  	for _, job := range sysJobs {
  1565  		// Still dedup on JobID as the node may already have the system job.
  1566  		if _, ok := jobIDs[job.NamespacedID()]; ok {
  1567  			continue
  1568  		}
  1569  		jobIDs[job.NamespacedID()] = struct{}{}
  1570  
  1571  		// Create a new eval
  1572  		eval := &structs.Evaluation{
  1573  			ID:              uuid.Generate(),
  1574  			Namespace:       job.Namespace,
  1575  			Priority:        job.Priority,
  1576  			Type:            job.Type,
  1577  			TriggeredBy:     structs.EvalTriggerNodeUpdate,
  1578  			JobID:           job.ID,
  1579  			NodeID:          nodeID,
  1580  			NodeModifyIndex: nodeIndex,
  1581  			Status:          structs.EvalStatusPending,
  1582  			CreateTime:      now,
  1583  			ModifyTime:      now,
  1584  		}
  1585  		evals = append(evals, eval)
  1586  		evalIDs = append(evalIDs, eval.ID)
  1587  	}
  1588  
  1589  	// Create the Raft transaction
  1590  	update := &structs.EvalUpdateRequest{
  1591  		Evals:        evals,
  1592  		WriteRequest: structs.WriteRequest{Region: n.srv.config.Region},
  1593  	}
  1594  
  1595  	// Commit this evaluation via Raft
  1596  	// XXX: There is a risk of partial failure where the node update succeeds
  1597  	// but that the EvalUpdate does not.
  1598  	_, evalIndex, err := n.srv.raftApply(structs.EvalUpdateRequestType, update)
  1599  	if err != nil {
  1600  		return nil, 0, err
  1601  	}
  1602  	return evalIDs, evalIndex, nil
  1603  }
  1604  
  1605  // DeriveVaultToken is used by the clients to request wrapped Vault tokens for
  1606  // tasks
  1607  func (n *Node) DeriveVaultToken(args *structs.DeriveVaultTokenRequest, reply *structs.DeriveVaultTokenResponse) error {
  1608  	setError := func(e error, recoverable bool) {
  1609  		if e != nil {
  1610  			if re, ok := e.(*structs.RecoverableError); ok {
  1611  				reply.Error = re // No need to wrap if error is already a RecoverableError
  1612  			} else {
  1613  				reply.Error = structs.NewRecoverableError(e, recoverable).(*structs.RecoverableError)
  1614  			}
  1615  			n.logger.Error("DeriveVaultToken failed", "recoverable", recoverable, "error", e)
  1616  		}
  1617  	}
  1618  
  1619  	if done, err := n.srv.forward("Node.DeriveVaultToken", args, args, reply); done {
  1620  		setError(err, structs.IsRecoverable(err) || err == structs.ErrNoLeader)
  1621  		return nil
  1622  	}
  1623  	defer metrics.MeasureSince([]string{"nomad", "client", "derive_vault_token"}, time.Now())
  1624  
  1625  	// Verify the arguments
  1626  	if args.NodeID == "" {
  1627  		setError(fmt.Errorf("missing node ID"), false)
  1628  		return nil
  1629  	}
  1630  	if args.SecretID == "" {
  1631  		setError(fmt.Errorf("missing node SecretID"), false)
  1632  		return nil
  1633  	}
  1634  	if args.AllocID == "" {
  1635  		setError(fmt.Errorf("missing allocation ID"), false)
  1636  		return nil
  1637  	}
  1638  	if len(args.Tasks) == 0 {
  1639  		setError(fmt.Errorf("no tasks specified"), false)
  1640  		return nil
  1641  	}
  1642  
  1643  	// Verify the following:
  1644  	// * The Node exists and has the correct SecretID
  1645  	// * The Allocation exists on the specified Node
  1646  	// * The Allocation contains the given tasks and they each require Vault
  1647  	//   tokens
  1648  	snap, err := n.srv.fsm.State().Snapshot()
  1649  	if err != nil {
  1650  		setError(err, false)
  1651  		return nil
  1652  	}
  1653  	ws := memdb.NewWatchSet()
  1654  	node, err := snap.NodeByID(ws, args.NodeID)
  1655  	if err != nil {
  1656  		setError(err, false)
  1657  		return nil
  1658  	}
  1659  	if node == nil {
  1660  		setError(fmt.Errorf("Node %q does not exist", args.NodeID), false)
  1661  		return nil
  1662  	}
  1663  	if node.SecretID != args.SecretID {
  1664  		setError(fmt.Errorf("SecretID mismatch"), false)
  1665  		return nil
  1666  	}
  1667  
  1668  	alloc, err := snap.AllocByID(ws, args.AllocID)
  1669  	if err != nil {
  1670  		setError(err, false)
  1671  		return nil
  1672  	}
  1673  	if alloc == nil {
  1674  		setError(fmt.Errorf("Allocation %q does not exist", args.AllocID), false)
  1675  		return nil
  1676  	}
  1677  	if alloc.NodeID != args.NodeID {
  1678  		setError(fmt.Errorf("Allocation %q not running on Node %q", args.AllocID, args.NodeID), false)
  1679  		return nil
  1680  	}
  1681  	if alloc.TerminalStatus() {
  1682  		setError(fmt.Errorf("Can't request Vault token for terminal allocation"), false)
  1683  		return nil
  1684  	}
  1685  
  1686  	// Check if alloc has Vault
  1687  	vaultBlocks := alloc.Job.Vault()
  1688  	if vaultBlocks == nil {
  1689  		setError(fmt.Errorf("Job does not require Vault token"), false)
  1690  		return nil
  1691  	}
  1692  	tg, ok := vaultBlocks[alloc.TaskGroup]
  1693  	if !ok {
  1694  		setError(fmt.Errorf("Task group does not require Vault token"), false)
  1695  		return nil
  1696  	}
  1697  
  1698  	var unneeded []string
  1699  	for _, task := range args.Tasks {
  1700  		taskVault := tg[task]
  1701  		if taskVault == nil || len(taskVault.Policies) == 0 {
  1702  			unneeded = append(unneeded, task)
  1703  		}
  1704  	}
  1705  
  1706  	if len(unneeded) != 0 {
  1707  		e := fmt.Errorf("Requested Vault tokens for tasks without defined Vault policies: %s",
  1708  			strings.Join(unneeded, ", "))
  1709  		setError(e, false)
  1710  		return nil
  1711  	}
  1712  
  1713  	// At this point the request is valid and we should contact Vault for
  1714  	// tokens.
  1715  
  1716  	// Create an error group where we will spin up a fixed set of goroutines to
  1717  	// handle deriving tokens but where if any fails the whole group is
  1718  	// canceled.
  1719  	g, ctx := errgroup.WithContext(context.Background())
  1720  
  1721  	// Cap the handlers
  1722  	handlers := len(args.Tasks)
  1723  	if handlers > maxParallelRequestsPerDerive {
  1724  		handlers = maxParallelRequestsPerDerive
  1725  	}
  1726  
  1727  	// Create the Vault Tokens
  1728  	input := make(chan string, handlers)
  1729  	results := make(map[string]*vapi.Secret, len(args.Tasks))
  1730  	for i := 0; i < handlers; i++ {
  1731  		g.Go(func() error {
  1732  			for {
  1733  				select {
  1734  				case task, ok := <-input:
  1735  					if !ok {
  1736  						return nil
  1737  					}
  1738  
  1739  					secret, err := n.srv.vault.CreateToken(ctx, alloc, task)
  1740  					if err != nil {
  1741  						return err
  1742  					}
  1743  
  1744  					results[task] = secret
  1745  				case <-ctx.Done():
  1746  					return nil
  1747  				}
  1748  			}
  1749  		})
  1750  	}
  1751  
  1752  	// Send the input
  1753  	go func() {
  1754  		defer close(input)
  1755  		for _, task := range args.Tasks {
  1756  			select {
  1757  			case <-ctx.Done():
  1758  				return
  1759  			case input <- task:
  1760  			}
  1761  		}
  1762  	}()
  1763  
  1764  	// Wait for everything to complete or for an error
  1765  	createErr := g.Wait()
  1766  
  1767  	// Retrieve the results
  1768  	accessors := make([]*structs.VaultAccessor, 0, len(results))
  1769  	tokens := make(map[string]string, len(results))
  1770  	for task, secret := range results {
  1771  		w := secret.WrapInfo
  1772  		tokens[task] = w.Token
  1773  		accessor := &structs.VaultAccessor{
  1774  			Accessor:    w.WrappedAccessor,
  1775  			Task:        task,
  1776  			NodeID:      alloc.NodeID,
  1777  			AllocID:     alloc.ID,
  1778  			CreationTTL: w.TTL,
  1779  		}
  1780  
  1781  		accessors = append(accessors, accessor)
  1782  	}
  1783  
  1784  	// If there was an error revoke the created tokens
  1785  	if createErr != nil {
  1786  		n.logger.Error("Vault token creation for alloc failed", "alloc_id", alloc.ID, "error", createErr)
  1787  
  1788  		if revokeErr := n.srv.vault.RevokeTokens(context.Background(), accessors, false); revokeErr != nil {
  1789  			n.logger.Error("Vault token revocation for alloc failed", "alloc_id", alloc.ID, "error", revokeErr)
  1790  		}
  1791  
  1792  		if rerr, ok := createErr.(*structs.RecoverableError); ok {
  1793  			reply.Error = rerr
  1794  		} else {
  1795  			reply.Error = structs.NewRecoverableError(createErr, false).(*structs.RecoverableError)
  1796  		}
  1797  
  1798  		return nil
  1799  	}
  1800  
  1801  	// Commit to Raft before returning any of the tokens
  1802  	req := structs.VaultAccessorsRequest{Accessors: accessors}
  1803  	_, index, err := n.srv.raftApply(structs.VaultAccessorRegisterRequestType, &req)
  1804  	if err != nil {
  1805  		n.logger.Error("registering Vault accessors for alloc failed", "alloc_id", alloc.ID, "error", err)
  1806  
  1807  		// Determine if we can recover from the error
  1808  		retry := false
  1809  		switch err {
  1810  		case raft.ErrNotLeader, raft.ErrLeadershipLost, raft.ErrRaftShutdown, raft.ErrEnqueueTimeout:
  1811  			retry = true
  1812  		}
  1813  
  1814  		setError(err, retry)
  1815  		return nil
  1816  	}
  1817  
  1818  	reply.Index = index
  1819  	reply.Tasks = tokens
  1820  	n.srv.setQueryMeta(&reply.QueryMeta)
  1821  	return nil
  1822  }
  1823  
  1824  type connectTask struct {
  1825  	TaskKind structs.TaskKind
  1826  	TaskName string
  1827  }
  1828  
  1829  func (n *Node) DeriveSIToken(args *structs.DeriveSITokenRequest, reply *structs.DeriveSITokenResponse) error {
  1830  	setError := func(e error, recoverable bool) {
  1831  		if e != nil {
  1832  			if re, ok := e.(*structs.RecoverableError); ok {
  1833  				reply.Error = re // No need to wrap if error is already a RecoverableError
  1834  			} else {
  1835  				reply.Error = structs.NewRecoverableError(e, recoverable).(*structs.RecoverableError)
  1836  			}
  1837  			n.logger.Error("DeriveSIToken failed", "recoverable", recoverable, "error", e)
  1838  		}
  1839  	}
  1840  
  1841  	if done, err := n.srv.forward("Node.DeriveSIToken", args, args, reply); done {
  1842  		setError(err, structs.IsRecoverable(err) || err == structs.ErrNoLeader)
  1843  		return nil
  1844  	}
  1845  	defer metrics.MeasureSince([]string{"nomad", "client", "derive_si_token"}, time.Now())
  1846  
  1847  	// Verify the arguments
  1848  	if err := args.Validate(); err != nil {
  1849  		setError(err, false)
  1850  		return nil
  1851  	}
  1852  
  1853  	// Get the ClusterID
  1854  	clusterID, err := n.srv.ClusterID()
  1855  	if err != nil {
  1856  		setError(err, false)
  1857  		return nil
  1858  	}
  1859  
  1860  	// Verify the following:
  1861  	// * The Node exists and has the correct SecretID.
  1862  	// * The Allocation exists on the specified Node.
  1863  	// * The Allocation contains the given tasks, and each task requires a
  1864  	//   SI token.
  1865  
  1866  	snap, err := n.srv.fsm.State().Snapshot()
  1867  	if err != nil {
  1868  		setError(err, false)
  1869  		return nil
  1870  	}
  1871  	node, err := snap.NodeByID(nil, args.NodeID)
  1872  	if err != nil {
  1873  		setError(err, false)
  1874  		return nil
  1875  	}
  1876  	if node == nil {
  1877  		setError(fmt.Errorf("Node %q does not exist", args.NodeID), false)
  1878  		return nil
  1879  	}
  1880  	if node.SecretID != args.SecretID {
  1881  		setError(errors.New("SecretID mismatch"), false)
  1882  		return nil
  1883  	}
  1884  
  1885  	alloc, err := snap.AllocByID(nil, args.AllocID)
  1886  	if err != nil {
  1887  		setError(err, false)
  1888  		return nil
  1889  	}
  1890  	if alloc == nil {
  1891  		setError(fmt.Errorf("Allocation %q does not exist", args.AllocID), false)
  1892  		return nil
  1893  	}
  1894  	if alloc.NodeID != args.NodeID {
  1895  		setError(fmt.Errorf("Allocation %q not running on node %q", args.AllocID, args.NodeID), false)
  1896  		return nil
  1897  	}
  1898  	if alloc.TerminalStatus() {
  1899  		setError(errors.New("Cannot request SI token for terminal allocation"), false)
  1900  		return nil
  1901  	}
  1902  
  1903  	// make sure task group contains at least one connect enabled service
  1904  	tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup)
  1905  	if tg == nil {
  1906  		setError(fmt.Errorf("Allocation %q does not contain TaskGroup %q", args.AllocID, alloc.TaskGroup), false)
  1907  		return nil
  1908  	}
  1909  	if !tg.UsesConnect() {
  1910  		setError(fmt.Errorf("TaskGroup %q does not use Connect", tg.Name), false)
  1911  		return nil
  1912  	}
  1913  
  1914  	// make sure each task in args.Tasks is a connect-enabled task
  1915  	notConnect, tasks := connectTasks(tg, args.Tasks)
  1916  	if len(notConnect) > 0 {
  1917  		setError(fmt.Errorf(
  1918  			"Requested Consul Service Identity tokens for tasks that are not Connect enabled: %v",
  1919  			strings.Join(notConnect, ", "),
  1920  		), false)
  1921  	}
  1922  
  1923  	// At this point the request is valid and we should contact Consul for tokens.
  1924  
  1925  	// A lot of the following is copied from DeriveVaultToken which has been
  1926  	// working fine for years.
  1927  
  1928  	// Create an error group where we will spin up a fixed set of goroutines to
  1929  	// handle deriving tokens but where if any fails the whole group is
  1930  	// canceled.
  1931  	g, ctx := errgroup.WithContext(context.Background())
  1932  
  1933  	// Cap the worker threads
  1934  	numWorkers := len(args.Tasks)
  1935  	if numWorkers > maxParallelRequestsPerDerive {
  1936  		numWorkers = maxParallelRequestsPerDerive
  1937  	}
  1938  
  1939  	// would like to pull some of this out...
  1940  
  1941  	// Create the SI tokens from a slice of task name + connect service
  1942  	input := make(chan connectTask, numWorkers)
  1943  	results := make(map[string]*structs.SIToken, numWorkers)
  1944  	for i := 0; i < numWorkers; i++ {
  1945  		g.Go(func() error {
  1946  			for {
  1947  				select {
  1948  				case task, ok := <-input:
  1949  					if !ok {
  1950  						return nil
  1951  					}
  1952  					secret, err := n.srv.consulACLs.CreateToken(ctx, ServiceIdentityRequest{
  1953  						ConsulNamespace: tg.Consul.GetNamespace(),
  1954  						TaskKind:        task.TaskKind,
  1955  						TaskName:        task.TaskName,
  1956  						ClusterID:       clusterID,
  1957  						AllocID:         alloc.ID,
  1958  					})
  1959  					if err != nil {
  1960  						return err
  1961  					}
  1962  					results[task.TaskName] = secret
  1963  				case <-ctx.Done():
  1964  					return nil
  1965  				}
  1966  			}
  1967  		})
  1968  	}
  1969  
  1970  	// Send the input
  1971  	go func() {
  1972  		defer close(input)
  1973  		for _, connectTask := range tasks {
  1974  			select {
  1975  			case <-ctx.Done():
  1976  				return
  1977  			case input <- connectTask:
  1978  			}
  1979  		}
  1980  	}()
  1981  
  1982  	// Wait for everything to complete or for an error
  1983  	createErr := g.Wait()
  1984  
  1985  	accessors := make([]*structs.SITokenAccessor, 0, len(results))
  1986  	tokens := make(map[string]string, len(results))
  1987  	for task, secret := range results {
  1988  		tokens[task] = secret.SecretID
  1989  		accessor := &structs.SITokenAccessor{
  1990  			ConsulNamespace: tg.Consul.GetNamespace(),
  1991  			NodeID:          alloc.NodeID,
  1992  			AllocID:         alloc.ID,
  1993  			TaskName:        task,
  1994  			AccessorID:      secret.AccessorID,
  1995  		}
  1996  		accessors = append(accessors, accessor)
  1997  	}
  1998  
  1999  	// If there was an error, revoke all created tokens. These tokens have not
  2000  	// yet been committed to the persistent store.
  2001  	if createErr != nil {
  2002  		n.logger.Error("Consul Service Identity token creation for alloc failed", "alloc_id", alloc.ID, "error", createErr)
  2003  		_ = n.srv.consulACLs.RevokeTokens(context.Background(), accessors, false)
  2004  
  2005  		if recoverable, ok := createErr.(*structs.RecoverableError); ok {
  2006  			reply.Error = recoverable
  2007  		} else {
  2008  			reply.Error = structs.NewRecoverableError(createErr, false).(*structs.RecoverableError)
  2009  		}
  2010  
  2011  		return nil
  2012  	}
  2013  
  2014  	// Commit the derived tokens to raft before returning them
  2015  	requested := structs.SITokenAccessorsRequest{Accessors: accessors}
  2016  	_, index, err := n.srv.raftApply(structs.ServiceIdentityAccessorRegisterRequestType, &requested)
  2017  	if err != nil {
  2018  		n.logger.Error("registering Service Identity token accessors for alloc failed", "alloc_id", alloc.ID, "error", err)
  2019  
  2020  		// Determine if we can recover from the error
  2021  		retry := false
  2022  		switch err {
  2023  		case raft.ErrNotLeader, raft.ErrLeadershipLost, raft.ErrRaftShutdown, raft.ErrEnqueueTimeout:
  2024  			retry = true
  2025  		}
  2026  		setError(err, retry)
  2027  		return nil
  2028  	}
  2029  
  2030  	// We made it! Now we can set the reply.
  2031  	reply.Index = index
  2032  	reply.Tokens = tokens
  2033  	n.srv.setQueryMeta(&reply.QueryMeta)
  2034  	return nil
  2035  }
  2036  
  2037  func connectTasks(tg *structs.TaskGroup, tasks []string) ([]string, []connectTask) {
  2038  	var notConnect []string
  2039  	var usesConnect []connectTask
  2040  	for _, task := range tasks {
  2041  		tgTask := tg.LookupTask(task)
  2042  		if !taskUsesConnect(tgTask) {
  2043  			notConnect = append(notConnect, task)
  2044  		} else {
  2045  			usesConnect = append(usesConnect, connectTask{
  2046  				TaskName: task,
  2047  				TaskKind: tgTask.Kind,
  2048  			})
  2049  		}
  2050  	}
  2051  	return notConnect, usesConnect
  2052  }
  2053  
  2054  func taskUsesConnect(task *structs.Task) bool {
  2055  	if task == nil {
  2056  		// not even in the task group
  2057  		return false
  2058  	}
  2059  	return task.UsesConnect()
  2060  }
  2061  
  2062  func (n *Node) EmitEvents(args *structs.EmitNodeEventsRequest, reply *structs.EmitNodeEventsResponse) error {
  2063  	// Ensure the connection was initiated by another client if TLS is used.
  2064  	err := validateTLSCertificateLevel(n.srv, n.ctx, tlsCertificateLevelClient)
  2065  	if err != nil {
  2066  		return err
  2067  	}
  2068  
  2069  	if done, err := n.srv.forward("Node.EmitEvents", args, args, reply); done {
  2070  		return err
  2071  	}
  2072  	defer metrics.MeasureSince([]string{"nomad", "client", "emit_events"}, time.Now())
  2073  
  2074  	if len(args.NodeEvents) == 0 {
  2075  		return fmt.Errorf("no node events given")
  2076  	}
  2077  	for nodeID, events := range args.NodeEvents {
  2078  		if len(events) == 0 {
  2079  			return fmt.Errorf("no node events given for node %q", nodeID)
  2080  		}
  2081  	}
  2082  
  2083  	_, index, err := n.srv.raftApply(structs.UpsertNodeEventsType, args)
  2084  	if err != nil {
  2085  		n.logger.Error("upserting node events failed", "error", err)
  2086  		return err
  2087  	}
  2088  
  2089  	reply.Index = index
  2090  	return nil
  2091  }