github.com/zoomfoo/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/nomad/node_endpoint.go

github.com/zoomfoo/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/nomad/node_endpoint.go (about)

     1  package nomad
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"strings"
     7  	"sync"
     8  	"time"
     9  
    10  	"golang.org/x/sync/errgroup"
    11  
    12  	"github.com/armon/go-metrics"
    13  	"github.com/hashicorp/go-memdb"
    14  	"github.com/hashicorp/go-multierror"
    15  	"github.com/hashicorp/nomad/acl"
    16  	"github.com/hashicorp/nomad/helper/uuid"
    17  	"github.com/hashicorp/nomad/nomad/state"
    18  	"github.com/hashicorp/nomad/nomad/structs"
    19  	"github.com/hashicorp/raft"
    20  	vapi "github.com/hashicorp/vault/api"
    21  )
    22  
    23  const (
    24  	// batchUpdateInterval is how long we wait to batch updates
    25  	batchUpdateInterval = 50 * time.Millisecond
    26  
    27  	// maxParallelRequestsPerDerive  is the maximum number of parallel Vault
    28  	// create token requests that may be outstanding per derive request
    29  	maxParallelRequestsPerDerive = 16
    30  
    31  	// NodeDrainEvents are the various drain messages
    32  	NodeDrainEventDrainSet      = "Node drain strategy set"
    33  	NodeDrainEventDrainDisabled = "Node drain disabled"
    34  	NodeDrainEventDrainUpdated  = "Node drain stategy updated"
    35  
    36  	// NodeEligibilityEventEligible is used when the nodes eligiblity is marked
    37  	// eligible
    38  	NodeEligibilityEventEligible = "Node marked as eligible for scheduling"
    39  
    40  	// NodeEligibilityEventIneligible is used when the nodes eligiblity is marked
    41  	// ineligible
    42  	NodeEligibilityEventIneligible = "Node marked as ineligible for scheduling"
    43  
    44  	// NodeHeartbeatEventReregistered is the message used when the node becomes
    45  	// reregistered by the heartbeat.
    46  	NodeHeartbeatEventReregistered = "Node reregistered by heartbeat"
    47  )
    48  
    49  // Node endpoint is used for client interactions
    50  type Node struct {
    51  	srv *Server
    52  
    53  	// ctx provides context regarding the underlying connection
    54  	ctx *RPCContext
    55  
    56  	// updates holds pending client status updates for allocations
    57  	updates []*structs.Allocation
    58  
    59  	// evals holds pending rescheduling eval updates triggered by failed allocations
    60  	evals []*structs.Evaluation
    61  
    62  	// updateFuture is used to wait for the pending batch update
    63  	// to complete. This may be nil if no batch is pending.
    64  	updateFuture *structs.BatchFuture
    65  
    66  	// updateTimer is the timer that will trigger the next batch
    67  	// update, and may be nil if there is no batch pending.
    68  	updateTimer *time.Timer
    69  
    70  	// updatesLock synchronizes access to the updates list,
    71  	// the future and the timer.
    72  	updatesLock sync.Mutex
    73  }
    74  
    75  // Register is used to upsert a client that is available for scheduling
    76  func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUpdateResponse) error {
    77  	if done, err := n.srv.forward("Node.Register", args, args, reply); done {
    78  		// We have a valid node connection since there is no error from the
    79  		// forwarded server, so add the mapping to cache the
    80  		// connection and allow the server to send RPCs to the client.
    81  		if err == nil && n.ctx != nil && n.ctx.NodeID == "" {
    82  			n.ctx.NodeID = args.Node.ID
    83  			n.srv.addNodeConn(n.ctx)
    84  		}
    85  
    86  		return err
    87  	}
    88  	defer metrics.MeasureSince([]string{"nomad", "client", "register"}, time.Now())
    89  
    90  	// Validate the arguments
    91  	if args.Node == nil {
    92  		return fmt.Errorf("missing node for client registration")
    93  	}
    94  	if args.Node.ID == "" {
    95  		return fmt.Errorf("missing node ID for client registration")
    96  	}
    97  	if args.Node.Datacenter == "" {
    98  		return fmt.Errorf("missing datacenter for client registration")
    99  	}
   100  	if args.Node.Name == "" {
   101  		return fmt.Errorf("missing node name for client registration")
   102  	}
   103  	if len(args.Node.Attributes) == 0 {
   104  		return fmt.Errorf("missing attributes for client registration")
   105  	}
   106  	if args.Node.SecretID == "" {
   107  		return fmt.Errorf("missing node secret ID for client registration")
   108  	}
   109  
   110  	// Default the status if none is given
   111  	if args.Node.Status == "" {
   112  		args.Node.Status = structs.NodeStatusInit
   113  	}
   114  	if !structs.ValidNodeStatus(args.Node.Status) {
   115  		return fmt.Errorf("invalid status for node")
   116  	}
   117  
   118  	// Default to eligible for scheduling if unset
   119  	if args.Node.SchedulingEligibility == "" {
   120  		args.Node.SchedulingEligibility = structs.NodeSchedulingEligible
   121  	}
   122  
   123  	// Set the timestamp when the node is registered
   124  	args.Node.StatusUpdatedAt = time.Now().Unix()
   125  
   126  	// Compute the node class
   127  	if err := args.Node.ComputeClass(); err != nil {
   128  		return fmt.Errorf("failed to computed node class: %v", err)
   129  	}
   130  
   131  	// Look for the node so we can detect a state transition
   132  	snap, err := n.srv.fsm.State().Snapshot()
   133  	if err != nil {
   134  		return err
   135  	}
   136  
   137  	ws := memdb.NewWatchSet()
   138  	originalNode, err := snap.NodeByID(ws, args.Node.ID)
   139  	if err != nil {
   140  		return err
   141  	}
   142  
   143  	// Check if the SecretID has been tampered with
   144  	if originalNode != nil {
   145  		if args.Node.SecretID != originalNode.SecretID && originalNode.SecretID != "" {
   146  			return fmt.Errorf("node secret ID does not match. Not registering node.")
   147  		}
   148  	}
   149  
   150  	// We have a valid node connection, so add the mapping to cache the
   151  	// connection and allow the server to send RPCs to the client. We only cache
   152  	// the connection if it is not being forwarded from another server.
   153  	if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() {
   154  		n.ctx.NodeID = args.Node.ID
   155  		n.srv.addNodeConn(n.ctx)
   156  	}
   157  
   158  	// Commit this update via Raft
   159  	_, index, err := n.srv.raftApply(structs.NodeRegisterRequestType, args)
   160  	if err != nil {
   161  		n.srv.logger.Printf("[ERR] nomad.client: Register failed: %v", err)
   162  		return err
   163  	}
   164  	reply.NodeModifyIndex = index
   165  
   166  	// Check if we should trigger evaluations
   167  	originalStatus := structs.NodeStatusInit
   168  	if originalNode != nil {
   169  		originalStatus = originalNode.Status
   170  	}
   171  	transitionToReady := transitionedToReady(args.Node.Status, originalStatus)
   172  	if structs.ShouldDrainNode(args.Node.Status) || transitionToReady {
   173  		evalIDs, evalIndex, err := n.createNodeEvals(args.Node.ID, index)
   174  		if err != nil {
   175  			n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   176  			return err
   177  		}
   178  		reply.EvalIDs = evalIDs
   179  		reply.EvalCreateIndex = evalIndex
   180  	}
   181  
   182  	// Check if we need to setup a heartbeat
   183  	if !args.Node.TerminalStatus() {
   184  		ttl, err := n.srv.resetHeartbeatTimer(args.Node.ID)
   185  		if err != nil {
   186  			n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err)
   187  			return err
   188  		}
   189  		reply.HeartbeatTTL = ttl
   190  	}
   191  
   192  	// Set the reply index
   193  	reply.Index = index
   194  	snap, err = n.srv.fsm.State().Snapshot()
   195  	if err != nil {
   196  		return err
   197  	}
   198  
   199  	n.srv.peerLock.RLock()
   200  	defer n.srv.peerLock.RUnlock()
   201  	if err := n.constructNodeServerInfoResponse(snap, reply); err != nil {
   202  		n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err)
   203  		return err
   204  	}
   205  
   206  	return nil
   207  }
   208  
   209  // updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading.
   210  func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error {
   211  	reply.LeaderRPCAddr = string(n.srv.raft.Leader())
   212  
   213  	// Reply with config information required for future RPC requests
   214  	reply.Servers = make([]*structs.NodeServerInfo, 0, len(n.srv.localPeers))
   215  	for _, v := range n.srv.localPeers {
   216  		reply.Servers = append(reply.Servers,
   217  			&structs.NodeServerInfo{
   218  				RPCAdvertiseAddr: v.RPCAddr.String(),
   219  				RPCMajorVersion:  int32(v.MajorVersion),
   220  				RPCMinorVersion:  int32(v.MinorVersion),
   221  				Datacenter:       v.Datacenter,
   222  			})
   223  	}
   224  
   225  	// TODO(sean@): Use an indexed node count instead
   226  	//
   227  	// Snapshot is used only to iterate over all nodes to create a node
   228  	// count to send back to Nomad Clients in their heartbeat so Clients
   229  	// can estimate the size of the cluster.
   230  	ws := memdb.NewWatchSet()
   231  	iter, err := snap.Nodes(ws)
   232  	if err == nil {
   233  		for {
   234  			raw := iter.Next()
   235  			if raw == nil {
   236  				break
   237  			}
   238  			reply.NumNodes++
   239  		}
   240  	}
   241  
   242  	return nil
   243  }
   244  
   245  // Deregister is used to remove a client from the cluster. If a client should
   246  // just be made unavailable for scheduling, a status update is preferred.
   247  func (n *Node) Deregister(args *structs.NodeDeregisterRequest, reply *structs.NodeUpdateResponse) error {
   248  	if done, err := n.srv.forward("Node.Deregister", args, args, reply); done {
   249  		return err
   250  	}
   251  	defer metrics.MeasureSince([]string{"nomad", "client", "deregister"}, time.Now())
   252  
   253  	// Check node permissions
   254  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   255  		return err
   256  	} else if aclObj != nil && !aclObj.AllowNodeWrite() {
   257  		return structs.ErrPermissionDenied
   258  	}
   259  
   260  	// Verify the arguments
   261  	if args.NodeID == "" {
   262  		return fmt.Errorf("missing node ID for client deregistration")
   263  	}
   264  	// Look for the node
   265  	snap, err := n.srv.fsm.State().Snapshot()
   266  	if err != nil {
   267  		return err
   268  	}
   269  
   270  	ws := memdb.NewWatchSet()
   271  	node, err := snap.NodeByID(ws, args.NodeID)
   272  	if err != nil {
   273  		return err
   274  	}
   275  	if node == nil {
   276  		return fmt.Errorf("node not found")
   277  	}
   278  
   279  	// Commit this update via Raft
   280  	_, index, err := n.srv.raftApply(structs.NodeDeregisterRequestType, args)
   281  	if err != nil {
   282  		n.srv.logger.Printf("[ERR] nomad.client: Deregister failed: %v", err)
   283  		return err
   284  	}
   285  
   286  	// Clear the heartbeat timer if any
   287  	n.srv.clearHeartbeatTimer(args.NodeID)
   288  
   289  	// Create the evaluations for this node
   290  	evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
   291  	if err != nil {
   292  		n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   293  		return err
   294  	}
   295  
   296  	// Determine if there are any Vault accessors on the node
   297  	accessors, err := snap.VaultAccessorsByNode(ws, args.NodeID)
   298  	if err != nil {
   299  		n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for node %q failed: %v", args.NodeID, err)
   300  		return err
   301  	}
   302  
   303  	if l := len(accessors); l != 0 {
   304  		n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors on node %q due to deregister", l, args.NodeID)
   305  		if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil {
   306  			n.srv.logger.Printf("[ERR] nomad.client: revoking accessors for node %q failed: %v", args.NodeID, err)
   307  			return err
   308  		}
   309  	}
   310  
   311  	// Setup the reply
   312  	reply.EvalIDs = evalIDs
   313  	reply.EvalCreateIndex = evalIndex
   314  	reply.NodeModifyIndex = index
   315  	reply.Index = index
   316  	return nil
   317  }
   318  
   319  // UpdateStatus is used to update the status of a client node
   320  func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *structs.NodeUpdateResponse) error {
   321  	if done, err := n.srv.forward("Node.UpdateStatus", args, args, reply); done {
   322  		// We have a valid node connection since there is no error from the
   323  		// forwarded server, so add the mapping to cache the
   324  		// connection and allow the server to send RPCs to the client.
   325  		if err == nil && n.ctx != nil && n.ctx.NodeID == "" {
   326  			n.ctx.NodeID = args.NodeID
   327  			n.srv.addNodeConn(n.ctx)
   328  		}
   329  
   330  		return err
   331  	}
   332  	defer metrics.MeasureSince([]string{"nomad", "client", "update_status"}, time.Now())
   333  
   334  	// Verify the arguments
   335  	if args.NodeID == "" {
   336  		return fmt.Errorf("missing node ID for client status update")
   337  	}
   338  	if !structs.ValidNodeStatus(args.Status) {
   339  		return fmt.Errorf("invalid status for node")
   340  	}
   341  
   342  	// Look for the node
   343  	snap, err := n.srv.fsm.State().Snapshot()
   344  	if err != nil {
   345  		return err
   346  	}
   347  
   348  	ws := memdb.NewWatchSet()
   349  	node, err := snap.NodeByID(ws, args.NodeID)
   350  	if err != nil {
   351  		return err
   352  	}
   353  	if node == nil {
   354  		return fmt.Errorf("node not found")
   355  	}
   356  
   357  	// We have a valid node connection, so add the mapping to cache the
   358  	// connection and allow the server to send RPCs to the client. We only cache
   359  	// the connection if it is not being forwarded from another server.
   360  	if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() {
   361  		n.ctx.NodeID = args.NodeID
   362  		n.srv.addNodeConn(n.ctx)
   363  	}
   364  
   365  	// XXX: Could use the SecretID here but have to update the heartbeat system
   366  	// to track SecretIDs.
   367  
   368  	// Update the timestamp of when the node status was updated
   369  	node.StatusUpdatedAt = time.Now().Unix()
   370  
   371  	// Commit this update via Raft
   372  	var index uint64
   373  	if node.Status != args.Status {
   374  		// Attach an event if we are updating the node status to ready when it
   375  		// is down via a heartbeat
   376  		if node.Status == structs.NodeStatusDown && args.NodeEvent == nil {
   377  			args.NodeEvent = structs.NewNodeEvent().
   378  				SetSubsystem(structs.NodeEventSubsystemCluster).
   379  				SetMessage(NodeHeartbeatEventReregistered)
   380  		}
   381  
   382  		_, index, err = n.srv.raftApply(structs.NodeUpdateStatusRequestType, args)
   383  		if err != nil {
   384  			n.srv.logger.Printf("[ERR] nomad.client: status update failed: %v", err)
   385  			return err
   386  		}
   387  		reply.NodeModifyIndex = index
   388  	}
   389  
   390  	// Check if we should trigger evaluations
   391  	transitionToReady := transitionedToReady(args.Status, node.Status)
   392  	if structs.ShouldDrainNode(args.Status) || transitionToReady {
   393  		evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
   394  		if err != nil {
   395  			n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   396  			return err
   397  		}
   398  		reply.EvalIDs = evalIDs
   399  		reply.EvalCreateIndex = evalIndex
   400  	}
   401  
   402  	// Check if we need to setup a heartbeat
   403  	switch args.Status {
   404  	case structs.NodeStatusDown:
   405  		// Determine if there are any Vault accessors on the node
   406  		accessors, err := n.srv.State().VaultAccessorsByNode(ws, args.NodeID)
   407  		if err != nil {
   408  			n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for node %q failed: %v", args.NodeID, err)
   409  			return err
   410  		}
   411  
   412  		if l := len(accessors); l != 0 {
   413  			n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors on node %q due to down state", l, args.NodeID)
   414  			if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil {
   415  				n.srv.logger.Printf("[ERR] nomad.client: revoking accessors for node %q failed: %v", args.NodeID, err)
   416  				return err
   417  			}
   418  		}
   419  	default:
   420  		ttl, err := n.srv.resetHeartbeatTimer(args.NodeID)
   421  		if err != nil {
   422  			n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err)
   423  			return err
   424  		}
   425  		reply.HeartbeatTTL = ttl
   426  	}
   427  
   428  	// Set the reply index and leader
   429  	reply.Index = index
   430  	n.srv.peerLock.RLock()
   431  	defer n.srv.peerLock.RUnlock()
   432  	if err := n.constructNodeServerInfoResponse(snap, reply); err != nil {
   433  		n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err)
   434  		return err
   435  	}
   436  
   437  	return nil
   438  }
   439  
   440  // transitionedToReady is a helper that takes a nodes new and old status and
   441  // returns whether it has transitioned to ready.
   442  func transitionedToReady(newStatus, oldStatus string) bool {
   443  	initToReady := oldStatus == structs.NodeStatusInit && newStatus == structs.NodeStatusReady
   444  	terminalToReady := oldStatus == structs.NodeStatusDown && newStatus == structs.NodeStatusReady
   445  	return initToReady || terminalToReady
   446  }
   447  
   448  // UpdateDrain is used to update the drain mode of a client node
   449  func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest,
   450  	reply *structs.NodeDrainUpdateResponse) error {
   451  	if done, err := n.srv.forward("Node.UpdateDrain", args, args, reply); done {
   452  		return err
   453  	}
   454  	defer metrics.MeasureSince([]string{"nomad", "client", "update_drain"}, time.Now())
   455  
   456  	// Check node write permissions
   457  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   458  		return err
   459  	} else if aclObj != nil && !aclObj.AllowNodeWrite() {
   460  		return structs.ErrPermissionDenied
   461  	}
   462  
   463  	// Verify the arguments
   464  	if args.NodeID == "" {
   465  		return fmt.Errorf("missing node ID for drain update")
   466  	}
   467  	if args.NodeEvent != nil {
   468  		return fmt.Errorf("node event must not be set")
   469  	}
   470  
   471  	// Look for the node
   472  	snap, err := n.srv.fsm.State().Snapshot()
   473  	if err != nil {
   474  		return err
   475  	}
   476  	node, err := snap.NodeByID(nil, args.NodeID)
   477  	if err != nil {
   478  		return err
   479  	}
   480  	if node == nil {
   481  		return fmt.Errorf("node not found")
   482  	}
   483  
   484  	// COMPAT: Remove in 0.9. Attempt to upgrade the request if it is of the old
   485  	// format.
   486  	if args.Drain && args.DrainStrategy == nil {
   487  		args.DrainStrategy = &structs.DrainStrategy{
   488  			DrainSpec: structs.DrainSpec{
   489  				Deadline: -1 * time.Second, // Force drain
   490  			},
   491  		}
   492  	}
   493  
   494  	// Mark the deadline time
   495  	if args.DrainStrategy != nil && args.DrainStrategy.Deadline.Nanoseconds() > 0 {
   496  		args.DrainStrategy.ForceDeadline = time.Now().Add(args.DrainStrategy.Deadline)
   497  	}
   498  
   499  	// Construct the node event
   500  	args.NodeEvent = structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemDrain)
   501  	if node.DrainStrategy == nil && args.DrainStrategy != nil {
   502  		args.NodeEvent.SetMessage(NodeDrainEventDrainSet)
   503  	} else if node.DrainStrategy != nil && args.DrainStrategy != nil {
   504  		args.NodeEvent.SetMessage(NodeDrainEventDrainUpdated)
   505  	} else if node.DrainStrategy != nil && args.DrainStrategy == nil {
   506  		args.NodeEvent.SetMessage(NodeDrainEventDrainDisabled)
   507  	} else {
   508  		args.NodeEvent = nil
   509  	}
   510  
   511  	// Commit this update via Raft
   512  	_, index, err := n.srv.raftApply(structs.NodeUpdateDrainRequestType, args)
   513  	if err != nil {
   514  		n.srv.logger.Printf("[ERR] nomad.client: drain update failed: %v", err)
   515  		return err
   516  	}
   517  	reply.NodeModifyIndex = index
   518  
   519  	// If the node is transitioning to be eligible, create Node evaluations
   520  	// because there may be a System job registered that should be evaluated.
   521  	if node.SchedulingEligibility == structs.NodeSchedulingIneligible && args.MarkEligible && args.DrainStrategy == nil {
   522  		evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
   523  		if err != nil {
   524  			n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   525  			return err
   526  		}
   527  		reply.EvalIDs = evalIDs
   528  		reply.EvalCreateIndex = evalIndex
   529  	}
   530  
   531  	// Set the reply index
   532  	reply.Index = index
   533  	return nil
   534  }
   535  
   536  // UpdateEligibility is used to update the scheduling eligibility of a node
   537  func (n *Node) UpdateEligibility(args *structs.NodeUpdateEligibilityRequest,
   538  	reply *structs.NodeEligibilityUpdateResponse) error {
   539  	if done, err := n.srv.forward("Node.UpdateEligibility", args, args, reply); done {
   540  		return err
   541  	}
   542  	defer metrics.MeasureSince([]string{"nomad", "client", "update_eligibility"}, time.Now())
   543  
   544  	// Check node write permissions
   545  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   546  		return err
   547  	} else if aclObj != nil && !aclObj.AllowNodeWrite() {
   548  		return structs.ErrPermissionDenied
   549  	}
   550  
   551  	// Verify the arguments
   552  	if args.NodeID == "" {
   553  		return fmt.Errorf("missing node ID for setting scheduling eligibility")
   554  	}
   555  	if args.NodeEvent != nil {
   556  		return fmt.Errorf("node event must not be set")
   557  	}
   558  
   559  	// Check that only allowed types are set
   560  	switch args.Eligibility {
   561  	case structs.NodeSchedulingEligible, structs.NodeSchedulingIneligible:
   562  	default:
   563  		return fmt.Errorf("invalid scheduling eligibility %q", args.Eligibility)
   564  	}
   565  
   566  	// Look for the node
   567  	snap, err := n.srv.fsm.State().Snapshot()
   568  	if err != nil {
   569  		return err
   570  	}
   571  	node, err := snap.NodeByID(nil, args.NodeID)
   572  	if err != nil {
   573  		return err
   574  	}
   575  	if node == nil {
   576  		return fmt.Errorf("node not found")
   577  	}
   578  
   579  	if node.DrainStrategy != nil && args.Eligibility == structs.NodeSchedulingEligible {
   580  		return fmt.Errorf("can not set node's scheduling eligibility to eligible while it is draining")
   581  	}
   582  
   583  	switch args.Eligibility {
   584  	case structs.NodeSchedulingEligible, structs.NodeSchedulingIneligible:
   585  	default:
   586  		return fmt.Errorf("invalid scheduling eligibility %q", args.Eligibility)
   587  	}
   588  
   589  	// Construct the node event
   590  	args.NodeEvent = structs.NewNodeEvent().SetSubsystem(structs.NodeEventSubsystemCluster)
   591  	if node.SchedulingEligibility == args.Eligibility {
   592  		return nil // Nothing to do
   593  	} else if args.Eligibility == structs.NodeSchedulingEligible {
   594  		args.NodeEvent.SetMessage(NodeEligibilityEventEligible)
   595  	} else {
   596  		args.NodeEvent.SetMessage(NodeEligibilityEventIneligible)
   597  	}
   598  
   599  	// Commit this update via Raft
   600  	outErr, index, err := n.srv.raftApply(structs.NodeUpdateEligibilityRequestType, args)
   601  	if err != nil {
   602  		n.srv.logger.Printf("[ERR] nomad.client: eligibility update failed: %v", err)
   603  		return err
   604  	}
   605  	if outErr != nil {
   606  		if err, ok := outErr.(error); ok && err != nil {
   607  			n.srv.logger.Printf("[ERR] nomad.client: eligibility update failed: %v", err)
   608  			return err
   609  		}
   610  	}
   611  
   612  	// If the node is transitioning to be eligible, create Node evaluations
   613  	// because there may be a System job registered that should be evaluated.
   614  	if node.SchedulingEligibility == structs.NodeSchedulingIneligible && args.Eligibility == structs.NodeSchedulingEligible {
   615  		evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
   616  		if err != nil {
   617  			n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   618  			return err
   619  		}
   620  		reply.EvalIDs = evalIDs
   621  		reply.EvalCreateIndex = evalIndex
   622  	}
   623  
   624  	// Set the reply index
   625  	reply.Index = index
   626  	return nil
   627  }
   628  
   629  // Evaluate is used to force a re-evaluation of the node
   630  func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUpdateResponse) error {
   631  	if done, err := n.srv.forward("Node.Evaluate", args, args, reply); done {
   632  		return err
   633  	}
   634  	defer metrics.MeasureSince([]string{"nomad", "client", "evaluate"}, time.Now())
   635  
   636  	// Check node write permissions
   637  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   638  		return err
   639  	} else if aclObj != nil && !aclObj.AllowNodeWrite() {
   640  		return structs.ErrPermissionDenied
   641  	}
   642  
   643  	// Verify the arguments
   644  	if args.NodeID == "" {
   645  		return fmt.Errorf("missing node ID for evaluation")
   646  	}
   647  
   648  	// Look for the node
   649  	snap, err := n.srv.fsm.State().Snapshot()
   650  	if err != nil {
   651  		return err
   652  	}
   653  	ws := memdb.NewWatchSet()
   654  	node, err := snap.NodeByID(ws, args.NodeID)
   655  	if err != nil {
   656  		return err
   657  	}
   658  	if node == nil {
   659  		return fmt.Errorf("node not found")
   660  	}
   661  
   662  	// Create the evaluation
   663  	evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, node.ModifyIndex)
   664  	if err != nil {
   665  		n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   666  		return err
   667  	}
   668  	reply.EvalIDs = evalIDs
   669  	reply.EvalCreateIndex = evalIndex
   670  
   671  	// Set the reply index
   672  	reply.Index = evalIndex
   673  
   674  	n.srv.peerLock.RLock()
   675  	defer n.srv.peerLock.RUnlock()
   676  	if err := n.constructNodeServerInfoResponse(snap, reply); err != nil {
   677  		n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err)
   678  		return err
   679  	}
   680  	return nil
   681  }
   682  
   683  // GetNode is used to request information about a specific node
   684  func (n *Node) GetNode(args *structs.NodeSpecificRequest,
   685  	reply *structs.SingleNodeResponse) error {
   686  	if done, err := n.srv.forward("Node.GetNode", args, args, reply); done {
   687  		return err
   688  	}
   689  	defer metrics.MeasureSince([]string{"nomad", "client", "get_node"}, time.Now())
   690  
   691  	// Check node read permissions
   692  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   693  		// If ResolveToken had an unexpected error return that
   694  		if err != structs.ErrTokenNotFound {
   695  			return err
   696  		}
   697  
   698  		// Attempt to lookup AuthToken as a Node.SecretID since nodes
   699  		// call this endpoint and don't have an ACL token.
   700  		node, stateErr := n.srv.fsm.State().NodeBySecretID(nil, args.AuthToken)
   701  		if stateErr != nil {
   702  			// Return the original ResolveToken error with this err
   703  			var merr multierror.Error
   704  			merr.Errors = append(merr.Errors, err, stateErr)
   705  			return merr.ErrorOrNil()
   706  		}
   707  
   708  		// Not a node or a valid ACL token
   709  		if node == nil {
   710  			return structs.ErrTokenNotFound
   711  		}
   712  	} else if aclObj != nil && !aclObj.AllowNodeRead() {
   713  		return structs.ErrPermissionDenied
   714  	}
   715  
   716  	// Setup the blocking query
   717  	opts := blockingOptions{
   718  		queryOpts: &args.QueryOptions,
   719  		queryMeta: &reply.QueryMeta,
   720  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   721  			// Verify the arguments
   722  			if args.NodeID == "" {
   723  				return fmt.Errorf("missing node ID")
   724  			}
   725  
   726  			// Look for the node
   727  			out, err := state.NodeByID(ws, args.NodeID)
   728  			if err != nil {
   729  				return err
   730  			}
   731  
   732  			// Setup the output
   733  			if out != nil {
   734  				// Clear the secret ID
   735  				reply.Node = out.Copy()
   736  				reply.Node.SecretID = ""
   737  				reply.Index = out.ModifyIndex
   738  			} else {
   739  				// Use the last index that affected the nodes table
   740  				index, err := state.Index("nodes")
   741  				if err != nil {
   742  					return err
   743  				}
   744  				reply.Node = nil
   745  				reply.Index = index
   746  			}
   747  
   748  			// Set the query response
   749  			n.srv.setQueryMeta(&reply.QueryMeta)
   750  			return nil
   751  		}}
   752  	return n.srv.blockingRPC(&opts)
   753  }
   754  
   755  // GetAllocs is used to request allocations for a specific node
   756  func (n *Node) GetAllocs(args *structs.NodeSpecificRequest,
   757  	reply *structs.NodeAllocsResponse) error {
   758  	if done, err := n.srv.forward("Node.GetAllocs", args, args, reply); done {
   759  		return err
   760  	}
   761  	defer metrics.MeasureSince([]string{"nomad", "client", "get_allocs"}, time.Now())
   762  
   763  	// Check node read and namespace job read permissions
   764  	aclObj, err := n.srv.ResolveToken(args.AuthToken)
   765  	if err != nil {
   766  		return err
   767  	}
   768  	if aclObj != nil && !aclObj.AllowNodeRead() {
   769  		return structs.ErrPermissionDenied
   770  	}
   771  
   772  	// cache namespace perms
   773  	readableNamespaces := map[string]bool{}
   774  
   775  	// readNS is a caching namespace read-job helper
   776  	readNS := func(ns string) bool {
   777  		if aclObj == nil {
   778  			// ACLs are disabled; everything is readable
   779  			return true
   780  		}
   781  
   782  		if readable, ok := readableNamespaces[ns]; ok {
   783  			// cache hit
   784  			return readable
   785  		}
   786  
   787  		// cache miss
   788  		readable := aclObj.AllowNsOp(ns, acl.NamespaceCapabilityReadJob)
   789  		readableNamespaces[ns] = readable
   790  		return readable
   791  	}
   792  
   793  	// Verify the arguments
   794  	if args.NodeID == "" {
   795  		return fmt.Errorf("missing node ID")
   796  	}
   797  
   798  	// Setup the blocking query
   799  	opts := blockingOptions{
   800  		queryOpts: &args.QueryOptions,
   801  		queryMeta: &reply.QueryMeta,
   802  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   803  			// Look for the node
   804  			allocs, err := state.AllocsByNode(ws, args.NodeID)
   805  			if err != nil {
   806  				return err
   807  			}
   808  
   809  			// Setup the output
   810  			if n := len(allocs); n != 0 {
   811  				reply.Allocs = make([]*structs.Allocation, 0, n)
   812  				for _, alloc := range allocs {
   813  					if readNS(alloc.Namespace) {
   814  						reply.Allocs = append(reply.Allocs, alloc)
   815  					}
   816  
   817  					// Get the max of all allocs since
   818  					// subsequent requests need to start
   819  					// from the latest index
   820  					reply.Index = maxUint64(reply.Index, alloc.ModifyIndex)
   821  				}
   822  			} else {
   823  				reply.Allocs = nil
   824  
   825  				// Use the last index that affected the nodes table
   826  				index, err := state.Index("allocs")
   827  				if err != nil {
   828  					return err
   829  				}
   830  
   831  				// Must provide non-zero index to prevent blocking
   832  				// Index 1 is impossible anyways (due to Raft internals)
   833  				if index == 0 {
   834  					reply.Index = 1
   835  				} else {
   836  					reply.Index = index
   837  				}
   838  			}
   839  			return nil
   840  		}}
   841  	return n.srv.blockingRPC(&opts)
   842  }
   843  
   844  // GetClientAllocs is used to request a lightweight list of alloc modify indexes
   845  // per allocation.
   846  func (n *Node) GetClientAllocs(args *structs.NodeSpecificRequest,
   847  	reply *structs.NodeClientAllocsResponse) error {
   848  	if done, err := n.srv.forward("Node.GetClientAllocs", args, args, reply); done {
   849  		// We have a valid node connection since there is no error from the
   850  		// forwarded server, so add the mapping to cache the
   851  		// connection and allow the server to send RPCs to the client.
   852  		if err == nil && n.ctx != nil && n.ctx.NodeID == "" {
   853  			n.ctx.NodeID = args.NodeID
   854  			n.srv.addNodeConn(n.ctx)
   855  		}
   856  
   857  		return err
   858  	}
   859  	defer metrics.MeasureSince([]string{"nomad", "client", "get_client_allocs"}, time.Now())
   860  
   861  	// Verify the arguments
   862  	if args.NodeID == "" {
   863  		return fmt.Errorf("missing node ID")
   864  	}
   865  
   866  	// numOldAllocs is used to detect if there is a garbage collection event
   867  	// that effects the node. When an allocation is garbage collected, that does
   868  	// not change the modify index changes and thus the query won't unblock,
   869  	// even though the set of allocations on the node has changed.
   870  	var numOldAllocs int
   871  
   872  	// Setup the blocking query
   873  	opts := blockingOptions{
   874  		queryOpts: &args.QueryOptions,
   875  		queryMeta: &reply.QueryMeta,
   876  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   877  			// Look for the node
   878  			node, err := state.NodeByID(ws, args.NodeID)
   879  			if err != nil {
   880  				return err
   881  			}
   882  
   883  			var allocs []*structs.Allocation
   884  			if node != nil {
   885  				if args.SecretID == "" {
   886  					return fmt.Errorf("missing node secret ID for client status update")
   887  				} else if args.SecretID != node.SecretID {
   888  					return fmt.Errorf("node secret ID does not match")
   889  				}
   890  
   891  				// We have a valid node connection, so add the mapping to cache the
   892  				// connection and allow the server to send RPCs to the client. We only cache
   893  				// the connection if it is not being forwarded from another server.
   894  				if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() {
   895  					n.ctx.NodeID = args.NodeID
   896  					n.srv.addNodeConn(n.ctx)
   897  				}
   898  
   899  				var err error
   900  				allocs, err = state.AllocsByNode(ws, args.NodeID)
   901  				if err != nil {
   902  					return err
   903  				}
   904  			}
   905  
   906  			reply.Allocs = make(map[string]uint64)
   907  			reply.MigrateTokens = make(map[string]string)
   908  
   909  			// preferTableIndex is used to determine whether we should build the
   910  			// response index based on the full table indexes versus the modify
   911  			// indexes of the allocations on the specific node. This is
   912  			// preferred in the case that the node doesn't yet have allocations
   913  			// or when we detect a GC that effects the node.
   914  			preferTableIndex := true
   915  
   916  			// Setup the output
   917  			if numAllocs := len(allocs); numAllocs != 0 {
   918  				preferTableIndex = false
   919  
   920  				for _, alloc := range allocs {
   921  					reply.Allocs[alloc.ID] = alloc.AllocModifyIndex
   922  
   923  					// If the allocation is going to do a migration, create a
   924  					// migration token so that the client can authenticate with
   925  					// the node hosting the previous allocation.
   926  					if alloc.ShouldMigrate() {
   927  						prevAllocation, err := state.AllocByID(ws, alloc.PreviousAllocation)
   928  						if err != nil {
   929  							return err
   930  						}
   931  
   932  						if prevAllocation != nil && prevAllocation.NodeID != alloc.NodeID {
   933  							allocNode, err := state.NodeByID(ws, prevAllocation.NodeID)
   934  							if err != nil {
   935  								return err
   936  							}
   937  							if allocNode == nil {
   938  								// Node must have been GC'd so skip the token
   939  								continue
   940  							}
   941  
   942  							token, err := structs.GenerateMigrateToken(prevAllocation.ID, allocNode.SecretID)
   943  							if err != nil {
   944  								return err
   945  							}
   946  							reply.MigrateTokens[alloc.ID] = token
   947  						}
   948  					}
   949  
   950  					reply.Index = maxUint64(reply.Index, alloc.ModifyIndex)
   951  				}
   952  
   953  				// Determine if we have less allocations than before. This
   954  				// indicates there was a garbage collection
   955  				if numAllocs < numOldAllocs {
   956  					preferTableIndex = true
   957  				}
   958  
   959  				// Store the new number of allocations
   960  				numOldAllocs = numAllocs
   961  			}
   962  
   963  			if preferTableIndex {
   964  				// Use the last index that affected the nodes table
   965  				index, err := state.Index("allocs")
   966  				if err != nil {
   967  					return err
   968  				}
   969  
   970  				// Must provide non-zero index to prevent blocking
   971  				// Index 1 is impossible anyways (due to Raft internals)
   972  				if index == 0 {
   973  					reply.Index = 1
   974  				} else {
   975  					reply.Index = index
   976  				}
   977  			}
   978  			return nil
   979  		}}
   980  	return n.srv.blockingRPC(&opts)
   981  }
   982  
   983  // UpdateAlloc is used to update the client status of an allocation
   984  func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.GenericResponse) error {
   985  	if done, err := n.srv.forward("Node.UpdateAlloc", args, args, reply); done {
   986  		return err
   987  	}
   988  	defer metrics.MeasureSince([]string{"nomad", "client", "update_alloc"}, time.Now())
   989  
   990  	// Ensure at least a single alloc
   991  	if len(args.Alloc) == 0 {
   992  		return fmt.Errorf("must update at least one allocation")
   993  	}
   994  
   995  	// Ensure that evals aren't set from client RPCs
   996  	// We create them here before the raft update
   997  	if len(args.Evals) != 0 {
   998  		return fmt.Errorf("evals field must not be set")
   999  	}
  1000  
  1001  	// Update modified timestamp for client initiated allocation updates
  1002  	now := time.Now()
  1003  	var evals []*structs.Evaluation
  1004  
  1005  	for _, alloc := range args.Alloc {
  1006  		alloc.ModifyTime = now.UTC().UnixNano()
  1007  
  1008  		// Add an evaluation if this is a failed alloc that is eligible for rescheduling
  1009  		if alloc.ClientStatus == structs.AllocClientStatusFailed {
  1010  			// Only create evaluations if this is an existing alloc,
  1011  			// and eligible as per its task group's ReschedulePolicy
  1012  			if existingAlloc, _ := n.srv.State().AllocByID(nil, alloc.ID); existingAlloc != nil {
  1013  				job, err := n.srv.State().JobByID(nil, existingAlloc.Namespace, existingAlloc.JobID)
  1014  				if err != nil {
  1015  					n.srv.logger.Printf("[ERR] nomad.client: UpdateAlloc unable to find job ID %q :%v", existingAlloc.JobID, err)
  1016  					continue
  1017  				}
  1018  				if job == nil {
  1019  					n.srv.logger.Printf("[DEBUG] nomad.client: UpdateAlloc unable to find job ID %q", existingAlloc.JobID)
  1020  					continue
  1021  				}
  1022  				taskGroup := job.LookupTaskGroup(existingAlloc.TaskGroup)
  1023  				if taskGroup != nil && existingAlloc.FollowupEvalID == "" && existingAlloc.RescheduleEligible(taskGroup.ReschedulePolicy, now) {
  1024  					eval := &structs.Evaluation{
  1025  						ID:          uuid.Generate(),
  1026  						Namespace:   existingAlloc.Namespace,
  1027  						TriggeredBy: structs.EvalTriggerRetryFailedAlloc,
  1028  						JobID:       existingAlloc.JobID,
  1029  						Type:        job.Type,
  1030  						Priority:    job.Priority,
  1031  						Status:      structs.EvalStatusPending,
  1032  					}
  1033  					evals = append(evals, eval)
  1034  				}
  1035  			}
  1036  		}
  1037  	}
  1038  
  1039  	// Add this to the batch
  1040  	n.updatesLock.Lock()
  1041  	n.updates = append(n.updates, args.Alloc...)
  1042  	n.evals = append(n.evals, evals...)
  1043  
  1044  	// Start a new batch if none
  1045  	future := n.updateFuture
  1046  	if future == nil {
  1047  		future = structs.NewBatchFuture()
  1048  		n.updateFuture = future
  1049  		n.updateTimer = time.AfterFunc(batchUpdateInterval, func() {
  1050  			// Get the pending updates
  1051  			n.updatesLock.Lock()
  1052  			updates := n.updates
  1053  			evals := n.evals
  1054  			future := n.updateFuture
  1055  			n.updates = nil
  1056  			n.evals = nil
  1057  			n.updateFuture = nil
  1058  			n.updateTimer = nil
  1059  			n.updatesLock.Unlock()
  1060  
  1061  			// Perform the batch update
  1062  			n.batchUpdate(future, updates, evals)
  1063  		})
  1064  	}
  1065  	n.updatesLock.Unlock()
  1066  
  1067  	// Wait for the future
  1068  	if err := future.Wait(); err != nil {
  1069  		return err
  1070  	}
  1071  
  1072  	// Setup the response
  1073  	reply.Index = future.Index()
  1074  	return nil
  1075  }
  1076  
  1077  // batchUpdate is used to update all the allocations
  1078  func (n *Node) batchUpdate(future *structs.BatchFuture, updates []*structs.Allocation, evals []*structs.Evaluation) {
  1079  	// Group pending evals by jobID to prevent creating unnecessary evals
  1080  	evalsByJobId := make(map[structs.NamespacedID]struct{})
  1081  	var trimmedEvals []*structs.Evaluation
  1082  	for _, eval := range evals {
  1083  		namespacedID := structs.NamespacedID{
  1084  			ID:        eval.JobID,
  1085  			Namespace: eval.Namespace,
  1086  		}
  1087  		_, exists := evalsByJobId[namespacedID]
  1088  		if !exists {
  1089  			trimmedEvals = append(trimmedEvals, eval)
  1090  			evalsByJobId[namespacedID] = struct{}{}
  1091  		}
  1092  	}
  1093  
  1094  	if len(trimmedEvals) > 0 {
  1095  		n.srv.logger.Printf("[DEBUG] nomad.client: Adding %v evaluations for rescheduling failed allocations", len(trimmedEvals))
  1096  	}
  1097  	// Prepare the batch update
  1098  	batch := &structs.AllocUpdateRequest{
  1099  		Alloc:        updates,
  1100  		Evals:        trimmedEvals,
  1101  		WriteRequest: structs.WriteRequest{Region: n.srv.config.Region},
  1102  	}
  1103  
  1104  	// Commit this update via Raft
  1105  	var mErr multierror.Error
  1106  	_, index, err := n.srv.raftApply(structs.AllocClientUpdateRequestType, batch)
  1107  	if err != nil {
  1108  		n.srv.logger.Printf("[ERR] nomad.client: alloc update failed: %v", err)
  1109  		mErr.Errors = append(mErr.Errors, err)
  1110  	}
  1111  
  1112  	// For each allocation we are updating check if we should revoke any
  1113  	// Vault Accessors
  1114  	var revoke []*structs.VaultAccessor
  1115  	for _, alloc := range updates {
  1116  		// Skip any allocation that isn't dead on the client
  1117  		if !alloc.Terminated() {
  1118  			continue
  1119  		}
  1120  
  1121  		// Determine if there are any Vault accessors for the allocation
  1122  		ws := memdb.NewWatchSet()
  1123  		accessors, err := n.srv.State().VaultAccessorsByAlloc(ws, alloc.ID)
  1124  		if err != nil {
  1125  			n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for alloc %q failed: %v", alloc.ID, err)
  1126  			mErr.Errors = append(mErr.Errors, err)
  1127  		}
  1128  
  1129  		revoke = append(revoke, accessors...)
  1130  	}
  1131  
  1132  	if l := len(revoke); l != 0 {
  1133  		n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors due to terminal allocations", l)
  1134  		if err := n.srv.vault.RevokeTokens(context.Background(), revoke, true); err != nil {
  1135  			n.srv.logger.Printf("[ERR] nomad.client: batched accessor revocation failed: %v", err)
  1136  			mErr.Errors = append(mErr.Errors, err)
  1137  		}
  1138  	}
  1139  
  1140  	// Respond to the future
  1141  	future.Respond(index, mErr.ErrorOrNil())
  1142  }
  1143  
  1144  // List is used to list the available nodes
  1145  func (n *Node) List(args *structs.NodeListRequest,
  1146  	reply *structs.NodeListResponse) error {
  1147  	if done, err := n.srv.forward("Node.List", args, args, reply); done {
  1148  		return err
  1149  	}
  1150  	defer metrics.MeasureSince([]string{"nomad", "client", "list"}, time.Now())
  1151  
  1152  	// Check node read permissions
  1153  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
  1154  		return err
  1155  	} else if aclObj != nil && !aclObj.AllowNodeRead() {
  1156  		return structs.ErrPermissionDenied
  1157  	}
  1158  
  1159  	// Setup the blocking query
  1160  	opts := blockingOptions{
  1161  		queryOpts: &args.QueryOptions,
  1162  		queryMeta: &reply.QueryMeta,
  1163  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
  1164  			// Capture all the nodes
  1165  			var err error
  1166  			var iter memdb.ResultIterator
  1167  			if prefix := args.QueryOptions.Prefix; prefix != "" {
  1168  				iter, err = state.NodesByIDPrefix(ws, prefix)
  1169  			} else {
  1170  				iter, err = state.Nodes(ws)
  1171  			}
  1172  			if err != nil {
  1173  				return err
  1174  			}
  1175  
  1176  			var nodes []*structs.NodeListStub
  1177  			for {
  1178  				raw := iter.Next()
  1179  				if raw == nil {
  1180  					break
  1181  				}
  1182  				node := raw.(*structs.Node)
  1183  				nodes = append(nodes, node.Stub())
  1184  			}
  1185  			reply.Nodes = nodes
  1186  
  1187  			// Use the last index that affected the jobs table
  1188  			index, err := state.Index("nodes")
  1189  			if err != nil {
  1190  				return err
  1191  			}
  1192  			reply.Index = index
  1193  
  1194  			// Set the query response
  1195  			n.srv.setQueryMeta(&reply.QueryMeta)
  1196  			return nil
  1197  		}}
  1198  	return n.srv.blockingRPC(&opts)
  1199  }
  1200  
  1201  // createNodeEvals is used to create evaluations for each alloc on a node.
  1202  // Each Eval is scoped to a job, so we need to potentially trigger many evals.
  1203  func (n *Node) createNodeEvals(nodeID string, nodeIndex uint64) ([]string, uint64, error) {
  1204  	// Snapshot the state
  1205  	snap, err := n.srv.fsm.State().Snapshot()
  1206  	if err != nil {
  1207  		return nil, 0, fmt.Errorf("failed to snapshot state: %v", err)
  1208  	}
  1209  
  1210  	// Find all the allocations for this node
  1211  	ws := memdb.NewWatchSet()
  1212  	allocs, err := snap.AllocsByNode(ws, nodeID)
  1213  	if err != nil {
  1214  		return nil, 0, fmt.Errorf("failed to find allocs for '%s': %v", nodeID, err)
  1215  	}
  1216  
  1217  	sysJobsIter, err := snap.JobsByScheduler(ws, "system")
  1218  	if err != nil {
  1219  		return nil, 0, fmt.Errorf("failed to find system jobs for '%s': %v", nodeID, err)
  1220  	}
  1221  
  1222  	var sysJobs []*structs.Job
  1223  	for job := sysJobsIter.Next(); job != nil; job = sysJobsIter.Next() {
  1224  		sysJobs = append(sysJobs, job.(*structs.Job))
  1225  	}
  1226  
  1227  	// Fast-path if nothing to do
  1228  	if len(allocs) == 0 && len(sysJobs) == 0 {
  1229  		return nil, 0, nil
  1230  	}
  1231  
  1232  	// Create an eval for each JobID affected
  1233  	var evals []*structs.Evaluation
  1234  	var evalIDs []string
  1235  	jobIDs := make(map[string]struct{})
  1236  
  1237  	for _, alloc := range allocs {
  1238  		// Deduplicate on JobID
  1239  		if _, ok := jobIDs[alloc.JobID]; ok {
  1240  			continue
  1241  		}
  1242  		jobIDs[alloc.JobID] = struct{}{}
  1243  
  1244  		// Create a new eval
  1245  		eval := &structs.Evaluation{
  1246  			ID:              uuid.Generate(),
  1247  			Namespace:       alloc.Namespace,
  1248  			Priority:        alloc.Job.Priority,
  1249  			Type:            alloc.Job.Type,
  1250  			TriggeredBy:     structs.EvalTriggerNodeUpdate,
  1251  			JobID:           alloc.JobID,
  1252  			NodeID:          nodeID,
  1253  			NodeModifyIndex: nodeIndex,
  1254  			Status:          structs.EvalStatusPending,
  1255  		}
  1256  		evals = append(evals, eval)
  1257  		evalIDs = append(evalIDs, eval.ID)
  1258  	}
  1259  
  1260  	// Create an evaluation for each system job.
  1261  	for _, job := range sysJobs {
  1262  		// Still dedup on JobID as the node may already have the system job.
  1263  		if _, ok := jobIDs[job.ID]; ok {
  1264  			continue
  1265  		}
  1266  		jobIDs[job.ID] = struct{}{}
  1267  
  1268  		// Create a new eval
  1269  		eval := &structs.Evaluation{
  1270  			ID:              uuid.Generate(),
  1271  			Namespace:       job.Namespace,
  1272  			Priority:        job.Priority,
  1273  			Type:            job.Type,
  1274  			TriggeredBy:     structs.EvalTriggerNodeUpdate,
  1275  			JobID:           job.ID,
  1276  			NodeID:          nodeID,
  1277  			NodeModifyIndex: nodeIndex,
  1278  			Status:          structs.EvalStatusPending,
  1279  		}
  1280  		evals = append(evals, eval)
  1281  		evalIDs = append(evalIDs, eval.ID)
  1282  	}
  1283  
  1284  	// Create the Raft transaction
  1285  	update := &structs.EvalUpdateRequest{
  1286  		Evals:        evals,
  1287  		WriteRequest: structs.WriteRequest{Region: n.srv.config.Region},
  1288  	}
  1289  
  1290  	// Commit this evaluation via Raft
  1291  	// XXX: There is a risk of partial failure where the node update succeeds
  1292  	// but that the EvalUpdate does not.
  1293  	_, evalIndex, err := n.srv.raftApply(structs.EvalUpdateRequestType, update)
  1294  	if err != nil {
  1295  		return nil, 0, err
  1296  	}
  1297  	return evalIDs, evalIndex, nil
  1298  }
  1299  
  1300  // DeriveVaultToken is used by the clients to request wrapped Vault tokens for
  1301  // tasks
  1302  func (n *Node) DeriveVaultToken(args *structs.DeriveVaultTokenRequest,
  1303  	reply *structs.DeriveVaultTokenResponse) error {
  1304  
  1305  	// setErr is a helper for setting the recoverable error on the reply and
  1306  	// logging it
  1307  	setErr := func(e error, recoverable bool) {
  1308  		if e == nil {
  1309  			return
  1310  		}
  1311  		re, ok := e.(*structs.RecoverableError)
  1312  		if ok {
  1313  			// No need to wrap if error is already a RecoverableError
  1314  			reply.Error = re
  1315  		} else {
  1316  			reply.Error = structs.NewRecoverableError(e, recoverable).(*structs.RecoverableError)
  1317  		}
  1318  
  1319  		n.srv.logger.Printf("[ERR] nomad.client: DeriveVaultToken failed (recoverable %v): %v", recoverable, e)
  1320  	}
  1321  
  1322  	if done, err := n.srv.forward("Node.DeriveVaultToken", args, args, reply); done {
  1323  		setErr(err, structs.IsRecoverable(err) || err == structs.ErrNoLeader)
  1324  		return nil
  1325  	}
  1326  	defer metrics.MeasureSince([]string{"nomad", "client", "derive_vault_token"}, time.Now())
  1327  
  1328  	// Verify the arguments
  1329  	if args.NodeID == "" {
  1330  		setErr(fmt.Errorf("missing node ID"), false)
  1331  		return nil
  1332  	}
  1333  	if args.SecretID == "" {
  1334  		setErr(fmt.Errorf("missing node SecretID"), false)
  1335  		return nil
  1336  	}
  1337  	if args.AllocID == "" {
  1338  		setErr(fmt.Errorf("missing allocation ID"), false)
  1339  		return nil
  1340  	}
  1341  	if len(args.Tasks) == 0 {
  1342  		setErr(fmt.Errorf("no tasks specified"), false)
  1343  		return nil
  1344  	}
  1345  
  1346  	// Verify the following:
  1347  	// * The Node exists and has the correct SecretID
  1348  	// * The Allocation exists on the specified node
  1349  	// * The allocation contains the given tasks and they each require Vault
  1350  	//   tokens
  1351  	snap, err := n.srv.fsm.State().Snapshot()
  1352  	if err != nil {
  1353  		setErr(err, false)
  1354  		return nil
  1355  	}
  1356  	ws := memdb.NewWatchSet()
  1357  	node, err := snap.NodeByID(ws, args.NodeID)
  1358  	if err != nil {
  1359  		setErr(err, false)
  1360  		return nil
  1361  	}
  1362  	if node == nil {
  1363  		setErr(fmt.Errorf("Node %q does not exist", args.NodeID), false)
  1364  		return nil
  1365  	}
  1366  	if node.SecretID != args.SecretID {
  1367  		setErr(fmt.Errorf("SecretID mismatch"), false)
  1368  		return nil
  1369  	}
  1370  
  1371  	alloc, err := snap.AllocByID(ws, args.AllocID)
  1372  	if err != nil {
  1373  		setErr(err, false)
  1374  		return nil
  1375  	}
  1376  	if alloc == nil {
  1377  		setErr(fmt.Errorf("Allocation %q does not exist", args.AllocID), false)
  1378  		return nil
  1379  	}
  1380  	if alloc.NodeID != args.NodeID {
  1381  		setErr(fmt.Errorf("Allocation %q not running on Node %q", args.AllocID, args.NodeID), false)
  1382  		return nil
  1383  	}
  1384  	if alloc.TerminalStatus() {
  1385  		setErr(fmt.Errorf("Can't request Vault token for terminal allocation"), false)
  1386  		return nil
  1387  	}
  1388  
  1389  	// Check the policies
  1390  	policies := alloc.Job.VaultPolicies()
  1391  	if policies == nil {
  1392  		setErr(fmt.Errorf("Job doesn't require Vault policies"), false)
  1393  		return nil
  1394  	}
  1395  	tg, ok := policies[alloc.TaskGroup]
  1396  	if !ok {
  1397  		setErr(fmt.Errorf("Task group does not require Vault policies"), false)
  1398  		return nil
  1399  	}
  1400  
  1401  	var unneeded []string
  1402  	for _, task := range args.Tasks {
  1403  		taskVault := tg[task]
  1404  		if taskVault == nil || len(taskVault.Policies) == 0 {
  1405  			unneeded = append(unneeded, task)
  1406  		}
  1407  	}
  1408  
  1409  	if len(unneeded) != 0 {
  1410  		e := fmt.Errorf("Requested Vault tokens for tasks without defined Vault policies: %s",
  1411  			strings.Join(unneeded, ", "))
  1412  		setErr(e, false)
  1413  		return nil
  1414  	}
  1415  
  1416  	// At this point the request is valid and we should contact Vault for
  1417  	// tokens.
  1418  
  1419  	// Create an error group where we will spin up a fixed set of goroutines to
  1420  	// handle deriving tokens but where if any fails the whole group is
  1421  	// canceled.
  1422  	g, ctx := errgroup.WithContext(context.Background())
  1423  
  1424  	// Cap the handlers
  1425  	handlers := len(args.Tasks)
  1426  	if handlers > maxParallelRequestsPerDerive {
  1427  		handlers = maxParallelRequestsPerDerive
  1428  	}
  1429  
  1430  	// Create the Vault Tokens
  1431  	input := make(chan string, handlers)
  1432  	results := make(map[string]*vapi.Secret, len(args.Tasks))
  1433  	for i := 0; i < handlers; i++ {
  1434  		g.Go(func() error {
  1435  			for {
  1436  				select {
  1437  				case task, ok := <-input:
  1438  					if !ok {
  1439  						return nil
  1440  					}
  1441  
  1442  					secret, err := n.srv.vault.CreateToken(ctx, alloc, task)
  1443  					if err != nil {
  1444  						return err
  1445  					}
  1446  
  1447  					results[task] = secret
  1448  				case <-ctx.Done():
  1449  					return nil
  1450  				}
  1451  			}
  1452  		})
  1453  	}
  1454  
  1455  	// Send the input
  1456  	go func() {
  1457  		defer close(input)
  1458  		for _, task := range args.Tasks {
  1459  			select {
  1460  			case <-ctx.Done():
  1461  				return
  1462  			case input <- task:
  1463  			}
  1464  		}
  1465  
  1466  	}()
  1467  
  1468  	// Wait for everything to complete or for an error
  1469  	createErr := g.Wait()
  1470  
  1471  	// Retrieve the results
  1472  	accessors := make([]*structs.VaultAccessor, 0, len(results))
  1473  	tokens := make(map[string]string, len(results))
  1474  	for task, secret := range results {
  1475  		w := secret.WrapInfo
  1476  		tokens[task] = w.Token
  1477  		accessor := &structs.VaultAccessor{
  1478  			Accessor:    w.WrappedAccessor,
  1479  			Task:        task,
  1480  			NodeID:      alloc.NodeID,
  1481  			AllocID:     alloc.ID,
  1482  			CreationTTL: w.TTL,
  1483  		}
  1484  
  1485  		accessors = append(accessors, accessor)
  1486  	}
  1487  
  1488  	// If there was an error revoke the created tokens
  1489  	if createErr != nil {
  1490  		n.srv.logger.Printf("[ERR] nomad.node: Vault token creation for alloc %q failed: %v", alloc.ID, createErr)
  1491  
  1492  		if revokeErr := n.srv.vault.RevokeTokens(context.Background(), accessors, false); revokeErr != nil {
  1493  			n.srv.logger.Printf("[ERR] nomad.node: Vault token revocation for alloc %q failed: %v", alloc.ID, revokeErr)
  1494  		}
  1495  
  1496  		if rerr, ok := createErr.(*structs.RecoverableError); ok {
  1497  			reply.Error = rerr
  1498  		} else {
  1499  			reply.Error = structs.NewRecoverableError(createErr, false).(*structs.RecoverableError)
  1500  		}
  1501  
  1502  		return nil
  1503  	}
  1504  
  1505  	// Commit to Raft before returning any of the tokens
  1506  	req := structs.VaultAccessorsRequest{Accessors: accessors}
  1507  	_, index, err := n.srv.raftApply(structs.VaultAccessorRegisterRequestType, &req)
  1508  	if err != nil {
  1509  		n.srv.logger.Printf("[ERR] nomad.client: Register Vault accessors for alloc %q failed: %v", alloc.ID, err)
  1510  
  1511  		// Determine if we can recover from the error
  1512  		retry := false
  1513  		switch err {
  1514  		case raft.ErrNotLeader, raft.ErrLeadershipLost, raft.ErrRaftShutdown, raft.ErrEnqueueTimeout:
  1515  			retry = true
  1516  		}
  1517  
  1518  		setErr(err, retry)
  1519  		return nil
  1520  	}
  1521  
  1522  	reply.Index = index
  1523  	reply.Tasks = tokens
  1524  	n.srv.setQueryMeta(&reply.QueryMeta)
  1525  	return nil
  1526  }
  1527  
  1528  func (n *Node) EmitEvents(args *structs.EmitNodeEventsRequest, reply *structs.EmitNodeEventsResponse) error {
  1529  	if done, err := n.srv.forward("Node.EmitEvents", args, args, reply); done {
  1530  		return err
  1531  	}
  1532  	defer metrics.MeasureSince([]string{"nomad", "client", "emit_events"}, time.Now())
  1533  
  1534  	if len(args.NodeEvents) == 0 {
  1535  		return fmt.Errorf("no node events given")
  1536  	}
  1537  	for nodeID, events := range args.NodeEvents {
  1538  		if len(events) == 0 {
  1539  			return fmt.Errorf("no node events given for node %q", nodeID)
  1540  		}
  1541  	}
  1542  
  1543  	_, index, err := n.srv.raftApply(structs.UpsertNodeEventsType, args)
  1544  	if err != nil {
  1545  		n.srv.logger.Printf("[ERR] nomad.node upserting node events failed: %v", err)
  1546  		return err
  1547  	}
  1548  
  1549  	reply.Index = index
  1550  	return nil
  1551  }