github.com/djenriquez/nomad-1@v0.8.1/nomad/node_endpoint.go (about)

     1  package nomad
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"strings"
     7  	"sync"
     8  	"time"
     9  
    10  	"golang.org/x/sync/errgroup"
    11  
    12  	"github.com/armon/go-metrics"
    13  	"github.com/hashicorp/go-memdb"
    14  	"github.com/hashicorp/go-multierror"
    15  	"github.com/hashicorp/nomad/acl"
    16  	"github.com/hashicorp/nomad/helper/uuid"
    17  	"github.com/hashicorp/nomad/nomad/state"
    18  	"github.com/hashicorp/nomad/nomad/structs"
    19  	"github.com/hashicorp/raft"
    20  	vapi "github.com/hashicorp/vault/api"
    21  )
    22  
    23  const (
    24  	// batchUpdateInterval is how long we wait to batch updates
    25  	batchUpdateInterval = 50 * time.Millisecond
    26  
    27  	// maxParallelRequestsPerDerive  is the maximum number of parallel Vault
    28  	// create token requests that may be outstanding per derive request
    29  	maxParallelRequestsPerDerive = 16
    30  )
    31  
    32  // Node endpoint is used for client interactions
    33  type Node struct {
    34  	srv *Server
    35  
    36  	// ctx provides context regarding the underlying connection
    37  	ctx *RPCContext
    38  
    39  	// updates holds pending client status updates for allocations
    40  	updates []*structs.Allocation
    41  
    42  	// evals holds pending rescheduling eval updates triggered by failed allocations
    43  	evals []*structs.Evaluation
    44  
    45  	// updateFuture is used to wait for the pending batch update
    46  	// to complete. This may be nil if no batch is pending.
    47  	updateFuture *structs.BatchFuture
    48  
    49  	// updateTimer is the timer that will trigger the next batch
    50  	// update, and may be nil if there is no batch pending.
    51  	updateTimer *time.Timer
    52  
    53  	// updatesLock synchronizes access to the updates list,
    54  	// the future and the timer.
    55  	updatesLock sync.Mutex
    56  }
    57  
    58  // Register is used to upsert a client that is available for scheduling
    59  func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUpdateResponse) error {
    60  	if done, err := n.srv.forward("Node.Register", args, args, reply); done {
    61  		// We have a valid node connection since there is no error from the
    62  		// forwarded server, so add the mapping to cache the
    63  		// connection and allow the server to send RPCs to the client.
    64  		if err == nil && n.ctx != nil && n.ctx.NodeID == "" {
    65  			n.ctx.NodeID = args.Node.ID
    66  			n.srv.addNodeConn(n.ctx)
    67  		}
    68  
    69  		return err
    70  	}
    71  	defer metrics.MeasureSince([]string{"nomad", "client", "register"}, time.Now())
    72  
    73  	// Validate the arguments
    74  	if args.Node == nil {
    75  		return fmt.Errorf("missing node for client registration")
    76  	}
    77  	if args.Node.ID == "" {
    78  		return fmt.Errorf("missing node ID for client registration")
    79  	}
    80  	if args.Node.Datacenter == "" {
    81  		return fmt.Errorf("missing datacenter for client registration")
    82  	}
    83  	if args.Node.Name == "" {
    84  		return fmt.Errorf("missing node name for client registration")
    85  	}
    86  	if len(args.Node.Attributes) == 0 {
    87  		return fmt.Errorf("missing attributes for client registration")
    88  	}
    89  	if args.Node.SecretID == "" {
    90  		return fmt.Errorf("missing node secret ID for client registration")
    91  	}
    92  
    93  	// Default the status if none is given
    94  	if args.Node.Status == "" {
    95  		args.Node.Status = structs.NodeStatusInit
    96  	}
    97  	if !structs.ValidNodeStatus(args.Node.Status) {
    98  		return fmt.Errorf("invalid status for node")
    99  	}
   100  
   101  	// Default to eligible for scheduling if unset
   102  	if args.Node.SchedulingEligibility == "" {
   103  		args.Node.SchedulingEligibility = structs.NodeSchedulingEligible
   104  	}
   105  
   106  	// Set the timestamp when the node is registered
   107  	args.Node.StatusUpdatedAt = time.Now().Unix()
   108  
   109  	// Compute the node class
   110  	if err := args.Node.ComputeClass(); err != nil {
   111  		return fmt.Errorf("failed to computed node class: %v", err)
   112  	}
   113  
   114  	// Look for the node so we can detect a state transition
   115  	snap, err := n.srv.fsm.State().Snapshot()
   116  	if err != nil {
   117  		return err
   118  	}
   119  
   120  	ws := memdb.NewWatchSet()
   121  	originalNode, err := snap.NodeByID(ws, args.Node.ID)
   122  	if err != nil {
   123  		return err
   124  	}
   125  
   126  	// Check if the SecretID has been tampered with
   127  	if originalNode != nil {
   128  		if args.Node.SecretID != originalNode.SecretID && originalNode.SecretID != "" {
   129  			return fmt.Errorf("node secret ID does not match. Not registering node.")
   130  		}
   131  	}
   132  
   133  	// We have a valid node connection, so add the mapping to cache the
   134  	// connection and allow the server to send RPCs to the client. We only cache
   135  	// the connection if it is not being forwarded from another server.
   136  	if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() {
   137  		n.ctx.NodeID = args.Node.ID
   138  		n.srv.addNodeConn(n.ctx)
   139  	}
   140  
   141  	// Commit this update via Raft
   142  	_, index, err := n.srv.raftApply(structs.NodeRegisterRequestType, args)
   143  	if err != nil {
   144  		n.srv.logger.Printf("[ERR] nomad.client: Register failed: %v", err)
   145  		return err
   146  	}
   147  	reply.NodeModifyIndex = index
   148  
   149  	// Check if we should trigger evaluations
   150  	originalStatus := structs.NodeStatusInit
   151  	if originalNode != nil {
   152  		originalStatus = originalNode.Status
   153  	}
   154  	transitionToReady := transitionedToReady(args.Node.Status, originalStatus)
   155  	if structs.ShouldDrainNode(args.Node.Status) || transitionToReady {
   156  		evalIDs, evalIndex, err := n.createNodeEvals(args.Node.ID, index)
   157  		if err != nil {
   158  			n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   159  			return err
   160  		}
   161  		reply.EvalIDs = evalIDs
   162  		reply.EvalCreateIndex = evalIndex
   163  	}
   164  
   165  	// Check if we need to setup a heartbeat
   166  	if !args.Node.TerminalStatus() {
   167  		ttl, err := n.srv.resetHeartbeatTimer(args.Node.ID)
   168  		if err != nil {
   169  			n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err)
   170  			return err
   171  		}
   172  		reply.HeartbeatTTL = ttl
   173  	}
   174  
   175  	// Set the reply index
   176  	reply.Index = index
   177  	snap, err = n.srv.fsm.State().Snapshot()
   178  	if err != nil {
   179  		return err
   180  	}
   181  
   182  	n.srv.peerLock.RLock()
   183  	defer n.srv.peerLock.RUnlock()
   184  	if err := n.constructNodeServerInfoResponse(snap, reply); err != nil {
   185  		n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err)
   186  		return err
   187  	}
   188  
   189  	return nil
   190  }
   191  
   192  // updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading.
   193  func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error {
   194  	reply.LeaderRPCAddr = string(n.srv.raft.Leader())
   195  
   196  	// Reply with config information required for future RPC requests
   197  	reply.Servers = make([]*structs.NodeServerInfo, 0, len(n.srv.localPeers))
   198  	for _, v := range n.srv.localPeers {
   199  		reply.Servers = append(reply.Servers,
   200  			&structs.NodeServerInfo{
   201  				RPCAdvertiseAddr: v.RPCAddr.String(),
   202  				RPCMajorVersion:  int32(v.MajorVersion),
   203  				RPCMinorVersion:  int32(v.MinorVersion),
   204  				Datacenter:       v.Datacenter,
   205  			})
   206  	}
   207  
   208  	// TODO(sean@): Use an indexed node count instead
   209  	//
   210  	// Snapshot is used only to iterate over all nodes to create a node
   211  	// count to send back to Nomad Clients in their heartbeat so Clients
   212  	// can estimate the size of the cluster.
   213  	ws := memdb.NewWatchSet()
   214  	iter, err := snap.Nodes(ws)
   215  	if err == nil {
   216  		for {
   217  			raw := iter.Next()
   218  			if raw == nil {
   219  				break
   220  			}
   221  			reply.NumNodes++
   222  		}
   223  	}
   224  
   225  	return nil
   226  }
   227  
   228  // Deregister is used to remove a client from the cluster. If a client should
   229  // just be made unavailable for scheduling, a status update is preferred.
   230  func (n *Node) Deregister(args *structs.NodeDeregisterRequest, reply *structs.NodeUpdateResponse) error {
   231  	if done, err := n.srv.forward("Node.Deregister", args, args, reply); done {
   232  		return err
   233  	}
   234  	defer metrics.MeasureSince([]string{"nomad", "client", "deregister"}, time.Now())
   235  
   236  	// Check node permissions
   237  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   238  		return err
   239  	} else if aclObj != nil && !aclObj.AllowNodeWrite() {
   240  		return structs.ErrPermissionDenied
   241  	}
   242  
   243  	// Verify the arguments
   244  	if args.NodeID == "" {
   245  		return fmt.Errorf("missing node ID for client deregistration")
   246  	}
   247  	// Look for the node
   248  	snap, err := n.srv.fsm.State().Snapshot()
   249  	if err != nil {
   250  		return err
   251  	}
   252  
   253  	ws := memdb.NewWatchSet()
   254  	node, err := snap.NodeByID(ws, args.NodeID)
   255  	if err != nil {
   256  		return err
   257  	}
   258  	if node == nil {
   259  		return fmt.Errorf("node not found")
   260  	}
   261  
   262  	// Commit this update via Raft
   263  	_, index, err := n.srv.raftApply(structs.NodeDeregisterRequestType, args)
   264  	if err != nil {
   265  		n.srv.logger.Printf("[ERR] nomad.client: Deregister failed: %v", err)
   266  		return err
   267  	}
   268  
   269  	// Clear the heartbeat timer if any
   270  	n.srv.clearHeartbeatTimer(args.NodeID)
   271  
   272  	// Create the evaluations for this node
   273  	evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
   274  	if err != nil {
   275  		n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   276  		return err
   277  	}
   278  
   279  	// Determine if there are any Vault accessors on the node
   280  	accessors, err := snap.VaultAccessorsByNode(ws, args.NodeID)
   281  	if err != nil {
   282  		n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for node %q failed: %v", args.NodeID, err)
   283  		return err
   284  	}
   285  
   286  	if l := len(accessors); l != 0 {
   287  		n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors on node %q due to deregister", l, args.NodeID)
   288  		if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil {
   289  			n.srv.logger.Printf("[ERR] nomad.client: revoking accessors for node %q failed: %v", args.NodeID, err)
   290  			return err
   291  		}
   292  	}
   293  
   294  	// Setup the reply
   295  	reply.EvalIDs = evalIDs
   296  	reply.EvalCreateIndex = evalIndex
   297  	reply.NodeModifyIndex = index
   298  	reply.Index = index
   299  	return nil
   300  }
   301  
   302  // UpdateStatus is used to update the status of a client node
   303  func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *structs.NodeUpdateResponse) error {
   304  	if done, err := n.srv.forward("Node.UpdateStatus", args, args, reply); done {
   305  		// We have a valid node connection since there is no error from the
   306  		// forwarded server, so add the mapping to cache the
   307  		// connection and allow the server to send RPCs to the client.
   308  		if err == nil && n.ctx != nil && n.ctx.NodeID == "" {
   309  			n.ctx.NodeID = args.NodeID
   310  			n.srv.addNodeConn(n.ctx)
   311  		}
   312  
   313  		return err
   314  	}
   315  	defer metrics.MeasureSince([]string{"nomad", "client", "update_status"}, time.Now())
   316  
   317  	// Verify the arguments
   318  	if args.NodeID == "" {
   319  		return fmt.Errorf("missing node ID for client status update")
   320  	}
   321  	if !structs.ValidNodeStatus(args.Status) {
   322  		return fmt.Errorf("invalid status for node")
   323  	}
   324  
   325  	// Look for the node
   326  	snap, err := n.srv.fsm.State().Snapshot()
   327  	if err != nil {
   328  		return err
   329  	}
   330  
   331  	ws := memdb.NewWatchSet()
   332  	node, err := snap.NodeByID(ws, args.NodeID)
   333  	if err != nil {
   334  		return err
   335  	}
   336  	if node == nil {
   337  		return fmt.Errorf("node not found")
   338  	}
   339  
   340  	// We have a valid node connection, so add the mapping to cache the
   341  	// connection and allow the server to send RPCs to the client. We only cache
   342  	// the connection if it is not being forwarded from another server.
   343  	if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() {
   344  		n.ctx.NodeID = args.NodeID
   345  		n.srv.addNodeConn(n.ctx)
   346  	}
   347  
   348  	// XXX: Could use the SecretID here but have to update the heartbeat system
   349  	// to track SecretIDs.
   350  
   351  	// Update the timestamp of when the node status was updated
   352  	node.StatusUpdatedAt = time.Now().Unix()
   353  
   354  	// Commit this update via Raft
   355  	var index uint64
   356  	if node.Status != args.Status {
   357  		_, index, err = n.srv.raftApply(structs.NodeUpdateStatusRequestType, args)
   358  		if err != nil {
   359  			n.srv.logger.Printf("[ERR] nomad.client: status update failed: %v", err)
   360  			return err
   361  		}
   362  		reply.NodeModifyIndex = index
   363  	}
   364  
   365  	// Check if we should trigger evaluations
   366  	transitionToReady := transitionedToReady(args.Status, node.Status)
   367  	if structs.ShouldDrainNode(args.Status) || transitionToReady {
   368  		evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
   369  		if err != nil {
   370  			n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   371  			return err
   372  		}
   373  		reply.EvalIDs = evalIDs
   374  		reply.EvalCreateIndex = evalIndex
   375  	}
   376  
   377  	// Check if we need to setup a heartbeat
   378  	switch args.Status {
   379  	case structs.NodeStatusDown:
   380  		// Determine if there are any Vault accessors on the node
   381  		accessors, err := n.srv.State().VaultAccessorsByNode(ws, args.NodeID)
   382  		if err != nil {
   383  			n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for node %q failed: %v", args.NodeID, err)
   384  			return err
   385  		}
   386  
   387  		if l := len(accessors); l != 0 {
   388  			n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors on node %q due to down state", l, args.NodeID)
   389  			if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil {
   390  				n.srv.logger.Printf("[ERR] nomad.client: revoking accessors for node %q failed: %v", args.NodeID, err)
   391  				return err
   392  			}
   393  		}
   394  	default:
   395  		ttl, err := n.srv.resetHeartbeatTimer(args.NodeID)
   396  		if err != nil {
   397  			n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err)
   398  			return err
   399  		}
   400  		reply.HeartbeatTTL = ttl
   401  	}
   402  
   403  	// Set the reply index and leader
   404  	reply.Index = index
   405  	n.srv.peerLock.RLock()
   406  	defer n.srv.peerLock.RUnlock()
   407  	if err := n.constructNodeServerInfoResponse(snap, reply); err != nil {
   408  		n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err)
   409  		return err
   410  	}
   411  
   412  	return nil
   413  }
   414  
   415  // transitionedToReady is a helper that takes a nodes new and old status and
   416  // returns whether it has transitioned to ready.
   417  func transitionedToReady(newStatus, oldStatus string) bool {
   418  	initToReady := oldStatus == structs.NodeStatusInit && newStatus == structs.NodeStatusReady
   419  	terminalToReady := oldStatus == structs.NodeStatusDown && newStatus == structs.NodeStatusReady
   420  	return initToReady || terminalToReady
   421  }
   422  
   423  // UpdateDrain is used to update the drain mode of a client node
   424  func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest,
   425  	reply *structs.NodeDrainUpdateResponse) error {
   426  	if done, err := n.srv.forward("Node.UpdateDrain", args, args, reply); done {
   427  		return err
   428  	}
   429  	defer metrics.MeasureSince([]string{"nomad", "client", "update_drain"}, time.Now())
   430  
   431  	// Check node write permissions
   432  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   433  		return err
   434  	} else if aclObj != nil && !aclObj.AllowNodeWrite() {
   435  		return structs.ErrPermissionDenied
   436  	}
   437  
   438  	// Verify the arguments
   439  	if args.NodeID == "" {
   440  		return fmt.Errorf("missing node ID for drain update")
   441  	}
   442  
   443  	// Look for the node
   444  	snap, err := n.srv.fsm.State().Snapshot()
   445  	if err != nil {
   446  		return err
   447  	}
   448  	node, err := snap.NodeByID(nil, args.NodeID)
   449  	if err != nil {
   450  		return err
   451  	}
   452  	if node == nil {
   453  		return fmt.Errorf("node not found")
   454  	}
   455  
   456  	// COMPAT: Remove in 0.9. Attempt to upgrade the request if it is of the old
   457  	// format.
   458  	if args.Drain && args.DrainStrategy == nil {
   459  		args.DrainStrategy = &structs.DrainStrategy{
   460  			DrainSpec: structs.DrainSpec{
   461  				Deadline: -1 * time.Second, // Force drain
   462  			},
   463  		}
   464  	}
   465  
   466  	// Mark the deadline time
   467  	if args.DrainStrategy != nil && args.DrainStrategy.Deadline.Nanoseconds() > 0 {
   468  		args.DrainStrategy.ForceDeadline = time.Now().Add(args.DrainStrategy.Deadline)
   469  	}
   470  
   471  	// Commit this update via Raft
   472  	_, index, err := n.srv.raftApply(structs.NodeUpdateDrainRequestType, args)
   473  	if err != nil {
   474  		n.srv.logger.Printf("[ERR] nomad.client: drain update failed: %v", err)
   475  		return err
   476  	}
   477  	reply.NodeModifyIndex = index
   478  
   479  	// If the node is transistioning to be eligible, create Node evaluations
   480  	// because there may be a System job registered that should be evaluated.
   481  	if node.SchedulingEligibility == structs.NodeSchedulingIneligible && args.MarkEligible && args.DrainStrategy == nil {
   482  		evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
   483  		if err != nil {
   484  			n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   485  			return err
   486  		}
   487  		reply.EvalIDs = evalIDs
   488  		reply.EvalCreateIndex = evalIndex
   489  	}
   490  
   491  	// Set the reply index
   492  	reply.Index = index
   493  	return nil
   494  }
   495  
   496  // UpdateEligibility is used to update the scheduling eligibility of a node
   497  func (n *Node) UpdateEligibility(args *structs.NodeUpdateEligibilityRequest,
   498  	reply *structs.NodeEligibilityUpdateResponse) error {
   499  	if done, err := n.srv.forward("Node.UpdateEligibility", args, args, reply); done {
   500  		return err
   501  	}
   502  	defer metrics.MeasureSince([]string{"nomad", "client", "update_eligibility"}, time.Now())
   503  
   504  	// Check node write permissions
   505  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   506  		return err
   507  	} else if aclObj != nil && !aclObj.AllowNodeWrite() {
   508  		return structs.ErrPermissionDenied
   509  	}
   510  
   511  	// Verify the arguments
   512  	if args.NodeID == "" {
   513  		return fmt.Errorf("missing node ID for setting scheduling eligibility")
   514  	}
   515  
   516  	// Check that only allowed types are set
   517  	switch args.Eligibility {
   518  	case structs.NodeSchedulingEligible, structs.NodeSchedulingIneligible:
   519  	default:
   520  		return fmt.Errorf("invalid scheduling eligibility %q", args.Eligibility)
   521  	}
   522  
   523  	// Look for the node
   524  	snap, err := n.srv.fsm.State().Snapshot()
   525  	if err != nil {
   526  		return err
   527  	}
   528  	node, err := snap.NodeByID(nil, args.NodeID)
   529  	if err != nil {
   530  		return err
   531  	}
   532  	if node == nil {
   533  		return fmt.Errorf("node not found")
   534  	}
   535  
   536  	if node.DrainStrategy != nil && args.Eligibility == structs.NodeSchedulingEligible {
   537  		return fmt.Errorf("can not set node's scheduling eligibility to eligible while it is draining")
   538  	}
   539  
   540  	switch args.Eligibility {
   541  	case structs.NodeSchedulingEligible, structs.NodeSchedulingIneligible:
   542  	default:
   543  		return fmt.Errorf("invalid scheduling eligibility %q", args.Eligibility)
   544  	}
   545  
   546  	// Commit this update via Raft
   547  	outErr, index, err := n.srv.raftApply(structs.NodeUpdateEligibilityRequestType, args)
   548  	if err != nil {
   549  		n.srv.logger.Printf("[ERR] nomad.client: eligibility update failed: %v", err)
   550  		return err
   551  	}
   552  	if outErr != nil {
   553  		if err, ok := outErr.(error); ok && err != nil {
   554  			n.srv.logger.Printf("[ERR] nomad.client: eligibility update failed: %v", err)
   555  			return err
   556  		}
   557  	}
   558  
   559  	// If the node is transistioning to be eligible, create Node evaluations
   560  	// because there may be a System job registered that should be evaluated.
   561  	if node.SchedulingEligibility == structs.NodeSchedulingIneligible && args.Eligibility == structs.NodeSchedulingEligible {
   562  		evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
   563  		if err != nil {
   564  			n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   565  			return err
   566  		}
   567  		reply.EvalIDs = evalIDs
   568  		reply.EvalCreateIndex = evalIndex
   569  	}
   570  
   571  	// Set the reply index
   572  	reply.Index = index
   573  	return nil
   574  }
   575  
   576  // Evaluate is used to force a re-evaluation of the node
   577  func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUpdateResponse) error {
   578  	if done, err := n.srv.forward("Node.Evaluate", args, args, reply); done {
   579  		return err
   580  	}
   581  	defer metrics.MeasureSince([]string{"nomad", "client", "evaluate"}, time.Now())
   582  
   583  	// Check node write permissions
   584  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   585  		return err
   586  	} else if aclObj != nil && !aclObj.AllowNodeWrite() {
   587  		return structs.ErrPermissionDenied
   588  	}
   589  
   590  	// Verify the arguments
   591  	if args.NodeID == "" {
   592  		return fmt.Errorf("missing node ID for evaluation")
   593  	}
   594  
   595  	// Look for the node
   596  	snap, err := n.srv.fsm.State().Snapshot()
   597  	if err != nil {
   598  		return err
   599  	}
   600  	ws := memdb.NewWatchSet()
   601  	node, err := snap.NodeByID(ws, args.NodeID)
   602  	if err != nil {
   603  		return err
   604  	}
   605  	if node == nil {
   606  		return fmt.Errorf("node not found")
   607  	}
   608  
   609  	// Create the evaluation
   610  	evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, node.ModifyIndex)
   611  	if err != nil {
   612  		n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   613  		return err
   614  	}
   615  	reply.EvalIDs = evalIDs
   616  	reply.EvalCreateIndex = evalIndex
   617  
   618  	// Set the reply index
   619  	reply.Index = evalIndex
   620  
   621  	n.srv.peerLock.RLock()
   622  	defer n.srv.peerLock.RUnlock()
   623  	if err := n.constructNodeServerInfoResponse(snap, reply); err != nil {
   624  		n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err)
   625  		return err
   626  	}
   627  	return nil
   628  }
   629  
   630  // GetNode is used to request information about a specific node
   631  func (n *Node) GetNode(args *structs.NodeSpecificRequest,
   632  	reply *structs.SingleNodeResponse) error {
   633  	if done, err := n.srv.forward("Node.GetNode", args, args, reply); done {
   634  		return err
   635  	}
   636  	defer metrics.MeasureSince([]string{"nomad", "client", "get_node"}, time.Now())
   637  
   638  	// Check node read permissions
   639  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   640  		// If ResolveToken had an unexpected error return that
   641  		if err != structs.ErrTokenNotFound {
   642  			return err
   643  		}
   644  
   645  		// Attempt to lookup AuthToken as a Node.SecretID since nodes
   646  		// call this endpoint and don't have an ACL token.
   647  		node, stateErr := n.srv.fsm.State().NodeBySecretID(nil, args.AuthToken)
   648  		if stateErr != nil {
   649  			// Return the original ResolveToken error with this err
   650  			var merr multierror.Error
   651  			merr.Errors = append(merr.Errors, err, stateErr)
   652  			return merr.ErrorOrNil()
   653  		}
   654  
   655  		// Not a node or a valid ACL token
   656  		if node == nil {
   657  			return structs.ErrTokenNotFound
   658  		}
   659  	} else if aclObj != nil && !aclObj.AllowNodeRead() {
   660  		return structs.ErrPermissionDenied
   661  	}
   662  
   663  	// Setup the blocking query
   664  	opts := blockingOptions{
   665  		queryOpts: &args.QueryOptions,
   666  		queryMeta: &reply.QueryMeta,
   667  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   668  			// Verify the arguments
   669  			if args.NodeID == "" {
   670  				return fmt.Errorf("missing node ID")
   671  			}
   672  
   673  			// Look for the node
   674  			out, err := state.NodeByID(ws, args.NodeID)
   675  			if err != nil {
   676  				return err
   677  			}
   678  
   679  			// Setup the output
   680  			if out != nil {
   681  				// Clear the secret ID
   682  				reply.Node = out.Copy()
   683  				reply.Node.SecretID = ""
   684  				reply.Index = out.ModifyIndex
   685  			} else {
   686  				// Use the last index that affected the nodes table
   687  				index, err := state.Index("nodes")
   688  				if err != nil {
   689  					return err
   690  				}
   691  				reply.Node = nil
   692  				reply.Index = index
   693  			}
   694  
   695  			// Set the query response
   696  			n.srv.setQueryMeta(&reply.QueryMeta)
   697  			return nil
   698  		}}
   699  	return n.srv.blockingRPC(&opts)
   700  }
   701  
   702  // GetAllocs is used to request allocations for a specific node
   703  func (n *Node) GetAllocs(args *structs.NodeSpecificRequest,
   704  	reply *structs.NodeAllocsResponse) error {
   705  	if done, err := n.srv.forward("Node.GetAllocs", args, args, reply); done {
   706  		return err
   707  	}
   708  	defer metrics.MeasureSince([]string{"nomad", "client", "get_allocs"}, time.Now())
   709  
   710  	// Check node read and namespace job read permissions
   711  	aclObj, err := n.srv.ResolveToken(args.AuthToken)
   712  	if err != nil {
   713  		return err
   714  	}
   715  	if aclObj != nil && !aclObj.AllowNodeRead() {
   716  		return structs.ErrPermissionDenied
   717  	}
   718  
   719  	// cache namespace perms
   720  	readableNamespaces := map[string]bool{}
   721  
   722  	// readNS is a caching namespace read-job helper
   723  	readNS := func(ns string) bool {
   724  		if aclObj == nil {
   725  			// ACLs are disabled; everything is readable
   726  			return true
   727  		}
   728  
   729  		if readable, ok := readableNamespaces[ns]; ok {
   730  			// cache hit
   731  			return readable
   732  		}
   733  
   734  		// cache miss
   735  		readable := aclObj.AllowNsOp(ns, acl.NamespaceCapabilityReadJob)
   736  		readableNamespaces[ns] = readable
   737  		return readable
   738  	}
   739  
   740  	// Verify the arguments
   741  	if args.NodeID == "" {
   742  		return fmt.Errorf("missing node ID")
   743  	}
   744  
   745  	// Setup the blocking query
   746  	opts := blockingOptions{
   747  		queryOpts: &args.QueryOptions,
   748  		queryMeta: &reply.QueryMeta,
   749  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   750  			// Look for the node
   751  			allocs, err := state.AllocsByNode(ws, args.NodeID)
   752  			if err != nil {
   753  				return err
   754  			}
   755  
   756  			// Setup the output
   757  			if n := len(allocs); n != 0 {
   758  				reply.Allocs = make([]*structs.Allocation, 0, n)
   759  				for _, alloc := range allocs {
   760  					if readNS(alloc.Namespace) {
   761  						reply.Allocs = append(reply.Allocs, alloc)
   762  					}
   763  
   764  					// Get the max of all allocs since
   765  					// subsequent requests need to start
   766  					// from the latest index
   767  					reply.Index = maxUint64(reply.Index, alloc.ModifyIndex)
   768  				}
   769  			} else {
   770  				reply.Allocs = nil
   771  
   772  				// Use the last index that affected the nodes table
   773  				index, err := state.Index("allocs")
   774  				if err != nil {
   775  					return err
   776  				}
   777  
   778  				// Must provide non-zero index to prevent blocking
   779  				// Index 1 is impossible anyways (due to Raft internals)
   780  				if index == 0 {
   781  					reply.Index = 1
   782  				} else {
   783  					reply.Index = index
   784  				}
   785  			}
   786  			return nil
   787  		}}
   788  	return n.srv.blockingRPC(&opts)
   789  }
   790  
   791  // GetClientAllocs is used to request a lightweight list of alloc modify indexes
   792  // per allocation.
   793  func (n *Node) GetClientAllocs(args *structs.NodeSpecificRequest,
   794  	reply *structs.NodeClientAllocsResponse) error {
   795  	if done, err := n.srv.forward("Node.GetClientAllocs", args, args, reply); done {
   796  		// We have a valid node connection since there is no error from the
   797  		// forwarded server, so add the mapping to cache the
   798  		// connection and allow the server to send RPCs to the client.
   799  		if err == nil && n.ctx != nil && n.ctx.NodeID == "" {
   800  			n.ctx.NodeID = args.NodeID
   801  			n.srv.addNodeConn(n.ctx)
   802  		}
   803  
   804  		return err
   805  	}
   806  	defer metrics.MeasureSince([]string{"nomad", "client", "get_client_allocs"}, time.Now())
   807  
   808  	// Verify the arguments
   809  	if args.NodeID == "" {
   810  		return fmt.Errorf("missing node ID")
   811  	}
   812  
   813  	// numOldAllocs is used to detect if there is a garbage collection event
   814  	// that effects the node. When an allocation is garbage collected, that does
   815  	// not change the modify index changes and thus the query won't unblock,
   816  	// even though the set of allocations on the node has changed.
   817  	var numOldAllocs int
   818  
   819  	// Setup the blocking query
   820  	opts := blockingOptions{
   821  		queryOpts: &args.QueryOptions,
   822  		queryMeta: &reply.QueryMeta,
   823  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   824  			// Look for the node
   825  			node, err := state.NodeByID(ws, args.NodeID)
   826  			if err != nil {
   827  				return err
   828  			}
   829  
   830  			var allocs []*structs.Allocation
   831  			if node != nil {
   832  				if args.SecretID == "" {
   833  					return fmt.Errorf("missing node secret ID for client status update")
   834  				} else if args.SecretID != node.SecretID {
   835  					return fmt.Errorf("node secret ID does not match")
   836  				}
   837  
   838  				// We have a valid node connection, so add the mapping to cache the
   839  				// connection and allow the server to send RPCs to the client. We only cache
   840  				// the connection if it is not being forwarded from another server.
   841  				if n.ctx != nil && n.ctx.NodeID == "" && !args.IsForwarded() {
   842  					n.ctx.NodeID = args.NodeID
   843  					n.srv.addNodeConn(n.ctx)
   844  				}
   845  
   846  				var err error
   847  				allocs, err = state.AllocsByNode(ws, args.NodeID)
   848  				if err != nil {
   849  					return err
   850  				}
   851  			}
   852  
   853  			reply.Allocs = make(map[string]uint64)
   854  			reply.MigrateTokens = make(map[string]string)
   855  
   856  			// preferTableIndex is used to determine whether we should build the
   857  			// response index based on the full table indexes versus the modify
   858  			// indexes of the allocations on the specific node. This is
   859  			// preferred in the case that the node doesn't yet have allocations
   860  			// or when we detect a GC that effects the node.
   861  			preferTableIndex := true
   862  
   863  			// Setup the output
   864  			if numAllocs := len(allocs); numAllocs != 0 {
   865  				preferTableIndex = false
   866  
   867  				for _, alloc := range allocs {
   868  					reply.Allocs[alloc.ID] = alloc.AllocModifyIndex
   869  
   870  					// If the allocation is going to do a migration, create a
   871  					// migration token so that the client can authenticate with
   872  					// the node hosting the previous allocation.
   873  					if alloc.ShouldMigrate() {
   874  						prevAllocation, err := state.AllocByID(ws, alloc.PreviousAllocation)
   875  						if err != nil {
   876  							return err
   877  						}
   878  
   879  						if prevAllocation != nil && prevAllocation.NodeID != alloc.NodeID {
   880  							allocNode, err := state.NodeByID(ws, prevAllocation.NodeID)
   881  							if err != nil {
   882  								return err
   883  							}
   884  							if allocNode == nil {
   885  								// Node must have been GC'd so skip the token
   886  								continue
   887  							}
   888  
   889  							token, err := structs.GenerateMigrateToken(prevAllocation.ID, allocNode.SecretID)
   890  							if err != nil {
   891  								return err
   892  							}
   893  							reply.MigrateTokens[alloc.ID] = token
   894  						}
   895  					}
   896  
   897  					reply.Index = maxUint64(reply.Index, alloc.ModifyIndex)
   898  				}
   899  
   900  				// Determine if we have less allocations than before. This
   901  				// indicates there was a garbage collection
   902  				if numAllocs < numOldAllocs {
   903  					preferTableIndex = true
   904  				}
   905  
   906  				// Store the new number of allocations
   907  				numOldAllocs = numAllocs
   908  			}
   909  
   910  			if preferTableIndex {
   911  				// Use the last index that affected the nodes table
   912  				index, err := state.Index("allocs")
   913  				if err != nil {
   914  					return err
   915  				}
   916  
   917  				// Must provide non-zero index to prevent blocking
   918  				// Index 1 is impossible anyways (due to Raft internals)
   919  				if index == 0 {
   920  					reply.Index = 1
   921  				} else {
   922  					reply.Index = index
   923  				}
   924  			}
   925  			return nil
   926  		}}
   927  	return n.srv.blockingRPC(&opts)
   928  }
   929  
   930  // UpdateAlloc is used to update the client status of an allocation
   931  func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.GenericResponse) error {
   932  	if done, err := n.srv.forward("Node.UpdateAlloc", args, args, reply); done {
   933  		return err
   934  	}
   935  	defer metrics.MeasureSince([]string{"nomad", "client", "update_alloc"}, time.Now())
   936  
   937  	// Ensure at least a single alloc
   938  	if len(args.Alloc) == 0 {
   939  		return fmt.Errorf("must update at least one allocation")
   940  	}
   941  
   942  	// Ensure that evals aren't set from client RPCs
   943  	// We create them here before the raft update
   944  	if len(args.Evals) != 0 {
   945  		return fmt.Errorf("evals field must not be set")
   946  	}
   947  
   948  	// Update modified timestamp for client initiated allocation updates
   949  	now := time.Now()
   950  	var evals []*structs.Evaluation
   951  
   952  	for _, alloc := range args.Alloc {
   953  		alloc.ModifyTime = now.UTC().UnixNano()
   954  
   955  		// Add an evaluation if this is a failed alloc that is eligible for rescheduling
   956  		if alloc.ClientStatus == structs.AllocClientStatusFailed {
   957  			// Only create evaluations if this is an existing alloc,
   958  			// and eligible as per its task group's ReschedulePolicy
   959  			if existingAlloc, _ := n.srv.State().AllocByID(nil, alloc.ID); existingAlloc != nil {
   960  				job, err := n.srv.State().JobByID(nil, existingAlloc.Namespace, existingAlloc.JobID)
   961  				if err != nil {
   962  					n.srv.logger.Printf("[ERR] nomad.client: UpdateAlloc unable to find job ID %q :%v", existingAlloc.JobID, err)
   963  					continue
   964  				}
   965  				if job == nil {
   966  					n.srv.logger.Printf("[DEBUG] nomad.client: UpdateAlloc unable to find job ID %q", existingAlloc.JobID)
   967  					continue
   968  				}
   969  				taskGroup := job.LookupTaskGroup(existingAlloc.TaskGroup)
   970  				if taskGroup != nil && existingAlloc.FollowupEvalID == "" && existingAlloc.RescheduleEligible(taskGroup.ReschedulePolicy, now) {
   971  					eval := &structs.Evaluation{
   972  						ID:          uuid.Generate(),
   973  						Namespace:   existingAlloc.Namespace,
   974  						TriggeredBy: structs.EvalTriggerRetryFailedAlloc,
   975  						JobID:       existingAlloc.JobID,
   976  						Type:        job.Type,
   977  						Priority:    job.Priority,
   978  						Status:      structs.EvalStatusPending,
   979  					}
   980  					evals = append(evals, eval)
   981  				}
   982  			}
   983  		}
   984  	}
   985  
   986  	// Add this to the batch
   987  	n.updatesLock.Lock()
   988  	n.updates = append(n.updates, args.Alloc...)
   989  	n.evals = append(n.evals, evals...)
   990  
   991  	// Start a new batch if none
   992  	future := n.updateFuture
   993  	if future == nil {
   994  		future = structs.NewBatchFuture()
   995  		n.updateFuture = future
   996  		n.updateTimer = time.AfterFunc(batchUpdateInterval, func() {
   997  			// Get the pending updates
   998  			n.updatesLock.Lock()
   999  			updates := n.updates
  1000  			evals := n.evals
  1001  			future := n.updateFuture
  1002  			n.updates = nil
  1003  			n.evals = nil
  1004  			n.updateFuture = nil
  1005  			n.updateTimer = nil
  1006  			n.updatesLock.Unlock()
  1007  
  1008  			// Perform the batch update
  1009  			n.batchUpdate(future, updates, evals)
  1010  		})
  1011  	}
  1012  	n.updatesLock.Unlock()
  1013  
  1014  	// Wait for the future
  1015  	if err := future.Wait(); err != nil {
  1016  		return err
  1017  	}
  1018  
  1019  	// Setup the response
  1020  	reply.Index = future.Index()
  1021  	return nil
  1022  }
  1023  
  1024  // batchUpdate is used to update all the allocations
  1025  func (n *Node) batchUpdate(future *structs.BatchFuture, updates []*structs.Allocation, evals []*structs.Evaluation) {
  1026  	// Group pending evals by jobID to prevent creating unnecessary evals
  1027  	evalsByJobId := make(map[structs.NamespacedID]struct{})
  1028  	var trimmedEvals []*structs.Evaluation
  1029  	for _, eval := range evals {
  1030  		namespacedID := structs.NamespacedID{
  1031  			ID:        eval.JobID,
  1032  			Namespace: eval.Namespace,
  1033  		}
  1034  		_, exists := evalsByJobId[namespacedID]
  1035  		if !exists {
  1036  			trimmedEvals = append(trimmedEvals, eval)
  1037  			evalsByJobId[namespacedID] = struct{}{}
  1038  		}
  1039  	}
  1040  
  1041  	if len(trimmedEvals) > 0 {
  1042  		n.srv.logger.Printf("[DEBUG] nomad.client: Adding %v evaluations for rescheduling failed allocations", len(trimmedEvals))
  1043  	}
  1044  	// Prepare the batch update
  1045  	batch := &structs.AllocUpdateRequest{
  1046  		Alloc:        updates,
  1047  		Evals:        trimmedEvals,
  1048  		WriteRequest: structs.WriteRequest{Region: n.srv.config.Region},
  1049  	}
  1050  
  1051  	// Commit this update via Raft
  1052  	var mErr multierror.Error
  1053  	_, index, err := n.srv.raftApply(structs.AllocClientUpdateRequestType, batch)
  1054  	if err != nil {
  1055  		n.srv.logger.Printf("[ERR] nomad.client: alloc update failed: %v", err)
  1056  		mErr.Errors = append(mErr.Errors, err)
  1057  	}
  1058  
  1059  	// For each allocation we are updating check if we should revoke any
  1060  	// Vault Accessors
  1061  	var revoke []*structs.VaultAccessor
  1062  	for _, alloc := range updates {
  1063  		// Skip any allocation that isn't dead on the client
  1064  		if !alloc.Terminated() {
  1065  			continue
  1066  		}
  1067  
  1068  		// Determine if there are any Vault accessors for the allocation
  1069  		ws := memdb.NewWatchSet()
  1070  		accessors, err := n.srv.State().VaultAccessorsByAlloc(ws, alloc.ID)
  1071  		if err != nil {
  1072  			n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for alloc %q failed: %v", alloc.ID, err)
  1073  			mErr.Errors = append(mErr.Errors, err)
  1074  		}
  1075  
  1076  		revoke = append(revoke, accessors...)
  1077  	}
  1078  
  1079  	if l := len(revoke); l != 0 {
  1080  		n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors due to terminal allocations", l)
  1081  		if err := n.srv.vault.RevokeTokens(context.Background(), revoke, true); err != nil {
  1082  			n.srv.logger.Printf("[ERR] nomad.client: batched accessor revocation failed: %v", err)
  1083  			mErr.Errors = append(mErr.Errors, err)
  1084  		}
  1085  	}
  1086  
  1087  	// Respond to the future
  1088  	future.Respond(index, mErr.ErrorOrNil())
  1089  }
  1090  
  1091  // List is used to list the available nodes
  1092  func (n *Node) List(args *structs.NodeListRequest,
  1093  	reply *structs.NodeListResponse) error {
  1094  	if done, err := n.srv.forward("Node.List", args, args, reply); done {
  1095  		return err
  1096  	}
  1097  	defer metrics.MeasureSince([]string{"nomad", "client", "list"}, time.Now())
  1098  
  1099  	// Check node read permissions
  1100  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
  1101  		return err
  1102  	} else if aclObj != nil && !aclObj.AllowNodeRead() {
  1103  		return structs.ErrPermissionDenied
  1104  	}
  1105  
  1106  	// Setup the blocking query
  1107  	opts := blockingOptions{
  1108  		queryOpts: &args.QueryOptions,
  1109  		queryMeta: &reply.QueryMeta,
  1110  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
  1111  			// Capture all the nodes
  1112  			var err error
  1113  			var iter memdb.ResultIterator
  1114  			if prefix := args.QueryOptions.Prefix; prefix != "" {
  1115  				iter, err = state.NodesByIDPrefix(ws, prefix)
  1116  			} else {
  1117  				iter, err = state.Nodes(ws)
  1118  			}
  1119  			if err != nil {
  1120  				return err
  1121  			}
  1122  
  1123  			var nodes []*structs.NodeListStub
  1124  			for {
  1125  				raw := iter.Next()
  1126  				if raw == nil {
  1127  					break
  1128  				}
  1129  				node := raw.(*structs.Node)
  1130  				nodes = append(nodes, node.Stub())
  1131  			}
  1132  			reply.Nodes = nodes
  1133  
  1134  			// Use the last index that affected the jobs table
  1135  			index, err := state.Index("nodes")
  1136  			if err != nil {
  1137  				return err
  1138  			}
  1139  			reply.Index = index
  1140  
  1141  			// Set the query response
  1142  			n.srv.setQueryMeta(&reply.QueryMeta)
  1143  			return nil
  1144  		}}
  1145  	return n.srv.blockingRPC(&opts)
  1146  }
  1147  
  1148  // createNodeEvals is used to create evaluations for each alloc on a node.
  1149  // Each Eval is scoped to a job, so we need to potentially trigger many evals.
  1150  func (n *Node) createNodeEvals(nodeID string, nodeIndex uint64) ([]string, uint64, error) {
  1151  	// Snapshot the state
  1152  	snap, err := n.srv.fsm.State().Snapshot()
  1153  	if err != nil {
  1154  		return nil, 0, fmt.Errorf("failed to snapshot state: %v", err)
  1155  	}
  1156  
  1157  	// Find all the allocations for this node
  1158  	ws := memdb.NewWatchSet()
  1159  	allocs, err := snap.AllocsByNode(ws, nodeID)
  1160  	if err != nil {
  1161  		return nil, 0, fmt.Errorf("failed to find allocs for '%s': %v", nodeID, err)
  1162  	}
  1163  
  1164  	sysJobsIter, err := snap.JobsByScheduler(ws, "system")
  1165  	if err != nil {
  1166  		return nil, 0, fmt.Errorf("failed to find system jobs for '%s': %v", nodeID, err)
  1167  	}
  1168  
  1169  	var sysJobs []*structs.Job
  1170  	for job := sysJobsIter.Next(); job != nil; job = sysJobsIter.Next() {
  1171  		sysJobs = append(sysJobs, job.(*structs.Job))
  1172  	}
  1173  
  1174  	// Fast-path if nothing to do
  1175  	if len(allocs) == 0 && len(sysJobs) == 0 {
  1176  		return nil, 0, nil
  1177  	}
  1178  
  1179  	// Create an eval for each JobID affected
  1180  	var evals []*structs.Evaluation
  1181  	var evalIDs []string
  1182  	jobIDs := make(map[string]struct{})
  1183  
  1184  	for _, alloc := range allocs {
  1185  		// Deduplicate on JobID
  1186  		if _, ok := jobIDs[alloc.JobID]; ok {
  1187  			continue
  1188  		}
  1189  		jobIDs[alloc.JobID] = struct{}{}
  1190  
  1191  		// Create a new eval
  1192  		eval := &structs.Evaluation{
  1193  			ID:              uuid.Generate(),
  1194  			Namespace:       alloc.Namespace,
  1195  			Priority:        alloc.Job.Priority,
  1196  			Type:            alloc.Job.Type,
  1197  			TriggeredBy:     structs.EvalTriggerNodeUpdate,
  1198  			JobID:           alloc.JobID,
  1199  			NodeID:          nodeID,
  1200  			NodeModifyIndex: nodeIndex,
  1201  			Status:          structs.EvalStatusPending,
  1202  		}
  1203  		evals = append(evals, eval)
  1204  		evalIDs = append(evalIDs, eval.ID)
  1205  	}
  1206  
  1207  	// Create an evaluation for each system job.
  1208  	for _, job := range sysJobs {
  1209  		// Still dedup on JobID as the node may already have the system job.
  1210  		if _, ok := jobIDs[job.ID]; ok {
  1211  			continue
  1212  		}
  1213  		jobIDs[job.ID] = struct{}{}
  1214  
  1215  		// Create a new eval
  1216  		eval := &structs.Evaluation{
  1217  			ID:              uuid.Generate(),
  1218  			Namespace:       job.Namespace,
  1219  			Priority:        job.Priority,
  1220  			Type:            job.Type,
  1221  			TriggeredBy:     structs.EvalTriggerNodeUpdate,
  1222  			JobID:           job.ID,
  1223  			NodeID:          nodeID,
  1224  			NodeModifyIndex: nodeIndex,
  1225  			Status:          structs.EvalStatusPending,
  1226  		}
  1227  		evals = append(evals, eval)
  1228  		evalIDs = append(evalIDs, eval.ID)
  1229  	}
  1230  
  1231  	// Create the Raft transaction
  1232  	update := &structs.EvalUpdateRequest{
  1233  		Evals:        evals,
  1234  		WriteRequest: structs.WriteRequest{Region: n.srv.config.Region},
  1235  	}
  1236  
  1237  	// Commit this evaluation via Raft
  1238  	// XXX: There is a risk of partial failure where the node update succeeds
  1239  	// but that the EvalUpdate does not.
  1240  	_, evalIndex, err := n.srv.raftApply(structs.EvalUpdateRequestType, update)
  1241  	if err != nil {
  1242  		return nil, 0, err
  1243  	}
  1244  	return evalIDs, evalIndex, nil
  1245  }
  1246  
  1247  // DeriveVaultToken is used by the clients to request wrapped Vault tokens for
  1248  // tasks
  1249  func (n *Node) DeriveVaultToken(args *structs.DeriveVaultTokenRequest,
  1250  	reply *structs.DeriveVaultTokenResponse) error {
  1251  
  1252  	// setErr is a helper for setting the recoverable error on the reply and
  1253  	// logging it
  1254  	setErr := func(e error, recoverable bool) {
  1255  		if e == nil {
  1256  			return
  1257  		}
  1258  		re, ok := e.(*structs.RecoverableError)
  1259  		if ok {
  1260  			// No need to wrap if error is already a RecoverableError
  1261  			reply.Error = re
  1262  		} else {
  1263  			reply.Error = structs.NewRecoverableError(e, recoverable).(*structs.RecoverableError)
  1264  		}
  1265  
  1266  		n.srv.logger.Printf("[ERR] nomad.client: DeriveVaultToken failed (recoverable %v): %v", recoverable, e)
  1267  	}
  1268  
  1269  	if done, err := n.srv.forward("Node.DeriveVaultToken", args, args, reply); done {
  1270  		setErr(err, structs.IsRecoverable(err) || err == structs.ErrNoLeader)
  1271  		return nil
  1272  	}
  1273  	defer metrics.MeasureSince([]string{"nomad", "client", "derive_vault_token"}, time.Now())
  1274  
  1275  	// Verify the arguments
  1276  	if args.NodeID == "" {
  1277  		setErr(fmt.Errorf("missing node ID"), false)
  1278  		return nil
  1279  	}
  1280  	if args.SecretID == "" {
  1281  		setErr(fmt.Errorf("missing node SecretID"), false)
  1282  		return nil
  1283  	}
  1284  	if args.AllocID == "" {
  1285  		setErr(fmt.Errorf("missing allocation ID"), false)
  1286  		return nil
  1287  	}
  1288  	if len(args.Tasks) == 0 {
  1289  		setErr(fmt.Errorf("no tasks specified"), false)
  1290  		return nil
  1291  	}
  1292  
  1293  	// Verify the following:
  1294  	// * The Node exists and has the correct SecretID
  1295  	// * The Allocation exists on the specified node
  1296  	// * The allocation contains the given tasks and they each require Vault
  1297  	//   tokens
  1298  	snap, err := n.srv.fsm.State().Snapshot()
  1299  	if err != nil {
  1300  		setErr(err, false)
  1301  		return nil
  1302  	}
  1303  	ws := memdb.NewWatchSet()
  1304  	node, err := snap.NodeByID(ws, args.NodeID)
  1305  	if err != nil {
  1306  		setErr(err, false)
  1307  		return nil
  1308  	}
  1309  	if node == nil {
  1310  		setErr(fmt.Errorf("Node %q does not exist", args.NodeID), false)
  1311  		return nil
  1312  	}
  1313  	if node.SecretID != args.SecretID {
  1314  		setErr(fmt.Errorf("SecretID mismatch"), false)
  1315  		return nil
  1316  	}
  1317  
  1318  	alloc, err := snap.AllocByID(ws, args.AllocID)
  1319  	if err != nil {
  1320  		setErr(err, false)
  1321  		return nil
  1322  	}
  1323  	if alloc == nil {
  1324  		setErr(fmt.Errorf("Allocation %q does not exist", args.AllocID), false)
  1325  		return nil
  1326  	}
  1327  	if alloc.NodeID != args.NodeID {
  1328  		setErr(fmt.Errorf("Allocation %q not running on Node %q", args.AllocID, args.NodeID), false)
  1329  		return nil
  1330  	}
  1331  	if alloc.TerminalStatus() {
  1332  		setErr(fmt.Errorf("Can't request Vault token for terminal allocation"), false)
  1333  		return nil
  1334  	}
  1335  
  1336  	// Check the policies
  1337  	policies := alloc.Job.VaultPolicies()
  1338  	if policies == nil {
  1339  		setErr(fmt.Errorf("Job doesn't require Vault policies"), false)
  1340  		return nil
  1341  	}
  1342  	tg, ok := policies[alloc.TaskGroup]
  1343  	if !ok {
  1344  		setErr(fmt.Errorf("Task group does not require Vault policies"), false)
  1345  		return nil
  1346  	}
  1347  
  1348  	var unneeded []string
  1349  	for _, task := range args.Tasks {
  1350  		taskVault := tg[task]
  1351  		if taskVault == nil || len(taskVault.Policies) == 0 {
  1352  			unneeded = append(unneeded, task)
  1353  		}
  1354  	}
  1355  
  1356  	if len(unneeded) != 0 {
  1357  		e := fmt.Errorf("Requested Vault tokens for tasks without defined Vault policies: %s",
  1358  			strings.Join(unneeded, ", "))
  1359  		setErr(e, false)
  1360  		return nil
  1361  	}
  1362  
  1363  	// At this point the request is valid and we should contact Vault for
  1364  	// tokens.
  1365  
  1366  	// Create an error group where we will spin up a fixed set of goroutines to
  1367  	// handle deriving tokens but where if any fails the whole group is
  1368  	// canceled.
  1369  	g, ctx := errgroup.WithContext(context.Background())
  1370  
  1371  	// Cap the handlers
  1372  	handlers := len(args.Tasks)
  1373  	if handlers > maxParallelRequestsPerDerive {
  1374  		handlers = maxParallelRequestsPerDerive
  1375  	}
  1376  
  1377  	// Create the Vault Tokens
  1378  	input := make(chan string, handlers)
  1379  	results := make(map[string]*vapi.Secret, len(args.Tasks))
  1380  	for i := 0; i < handlers; i++ {
  1381  		g.Go(func() error {
  1382  			for {
  1383  				select {
  1384  				case task, ok := <-input:
  1385  					if !ok {
  1386  						return nil
  1387  					}
  1388  
  1389  					secret, err := n.srv.vault.CreateToken(ctx, alloc, task)
  1390  					if err != nil {
  1391  						return err
  1392  					}
  1393  
  1394  					results[task] = secret
  1395  				case <-ctx.Done():
  1396  					return nil
  1397  				}
  1398  			}
  1399  		})
  1400  	}
  1401  
  1402  	// Send the input
  1403  	go func() {
  1404  		defer close(input)
  1405  		for _, task := range args.Tasks {
  1406  			select {
  1407  			case <-ctx.Done():
  1408  				return
  1409  			case input <- task:
  1410  			}
  1411  		}
  1412  
  1413  	}()
  1414  
  1415  	// Wait for everything to complete or for an error
  1416  	createErr := g.Wait()
  1417  
  1418  	// Retrieve the results
  1419  	accessors := make([]*structs.VaultAccessor, 0, len(results))
  1420  	tokens := make(map[string]string, len(results))
  1421  	for task, secret := range results {
  1422  		w := secret.WrapInfo
  1423  		tokens[task] = w.Token
  1424  		accessor := &structs.VaultAccessor{
  1425  			Accessor:    w.WrappedAccessor,
  1426  			Task:        task,
  1427  			NodeID:      alloc.NodeID,
  1428  			AllocID:     alloc.ID,
  1429  			CreationTTL: w.TTL,
  1430  		}
  1431  
  1432  		accessors = append(accessors, accessor)
  1433  	}
  1434  
  1435  	// If there was an error revoke the created tokens
  1436  	if createErr != nil {
  1437  		n.srv.logger.Printf("[ERR] nomad.node: Vault token creation for alloc %q failed: %v", alloc.ID, createErr)
  1438  
  1439  		if revokeErr := n.srv.vault.RevokeTokens(context.Background(), accessors, false); revokeErr != nil {
  1440  			n.srv.logger.Printf("[ERR] nomad.node: Vault token revocation for alloc %q failed: %v", alloc.ID, revokeErr)
  1441  		}
  1442  
  1443  		if rerr, ok := createErr.(*structs.RecoverableError); ok {
  1444  			reply.Error = rerr
  1445  		} else {
  1446  			reply.Error = structs.NewRecoverableError(createErr, false).(*structs.RecoverableError)
  1447  		}
  1448  
  1449  		return nil
  1450  	}
  1451  
  1452  	// Commit to Raft before returning any of the tokens
  1453  	req := structs.VaultAccessorsRequest{Accessors: accessors}
  1454  	_, index, err := n.srv.raftApply(structs.VaultAccessorRegisterRequestType, &req)
  1455  	if err != nil {
  1456  		n.srv.logger.Printf("[ERR] nomad.client: Register Vault accessors for alloc %q failed: %v", alloc.ID, err)
  1457  
  1458  		// Determine if we can recover from the error
  1459  		retry := false
  1460  		switch err {
  1461  		case raft.ErrNotLeader, raft.ErrLeadershipLost, raft.ErrRaftShutdown, raft.ErrEnqueueTimeout:
  1462  			retry = true
  1463  		}
  1464  
  1465  		setErr(err, retry)
  1466  		return nil
  1467  	}
  1468  
  1469  	reply.Index = index
  1470  	reply.Tasks = tokens
  1471  	n.srv.setQueryMeta(&reply.QueryMeta)
  1472  	return nil
  1473  }
  1474  
  1475  func (n *Node) EmitEvents(args *structs.EmitNodeEventsRequest, reply *structs.EmitNodeEventsResponse) error {
  1476  	if done, err := n.srv.forward("Node.EmitEvents", args, args, reply); done {
  1477  		return err
  1478  	}
  1479  	defer metrics.MeasureSince([]string{"nomad", "client", "emit_events"}, time.Now())
  1480  
  1481  	if len(args.NodeEvents) == 0 {
  1482  		return fmt.Errorf("no node events given")
  1483  	}
  1484  	for nodeID, events := range args.NodeEvents {
  1485  		if len(events) == 0 {
  1486  			return fmt.Errorf("no node events given for node %q", nodeID)
  1487  		}
  1488  	}
  1489  
  1490  	_, index, err := n.srv.raftApply(structs.UpsertNodeEventsType, args)
  1491  	if err != nil {
  1492  		n.srv.logger.Printf("[ERR] nomad.node upserting node events failed: %v", err)
  1493  		return err
  1494  	}
  1495  
  1496  	reply.Index = index
  1497  	return nil
  1498  }