github.com/blixtra/nomad@v0.7.2-0.20171221000451-da9a1d7bb050/nomad/node_endpoint.go

github.com/blixtra/nomad@v0.7.2-0.20171221000451-da9a1d7bb050/nomad/node_endpoint.go (about)

     1  package nomad
     2  
     3  import (
     4  	"context"
     5  	"crypto/subtle"
     6  	"encoding/base64"
     7  	"fmt"
     8  	"strings"
     9  	"sync"
    10  	"time"
    11  
    12  	"golang.org/x/crypto/blake2b"
    13  	"golang.org/x/sync/errgroup"
    14  
    15  	"github.com/armon/go-metrics"
    16  	"github.com/hashicorp/go-memdb"
    17  	"github.com/hashicorp/go-multierror"
    18  	"github.com/hashicorp/nomad/acl"
    19  	"github.com/hashicorp/nomad/helper/uuid"
    20  	"github.com/hashicorp/nomad/nomad/state"
    21  	"github.com/hashicorp/nomad/nomad/structs"
    22  	"github.com/hashicorp/raft"
    23  	vapi "github.com/hashicorp/vault/api"
    24  )
    25  
    26  const (
    27  	// batchUpdateInterval is how long we wait to batch updates
    28  	batchUpdateInterval = 50 * time.Millisecond
    29  
    30  	// maxParallelRequestsPerDerive  is the maximum number of parallel Vault
    31  	// create token requests that may be outstanding per derive request
    32  	maxParallelRequestsPerDerive = 16
    33  )
    34  
    35  // Node endpoint is used for client interactions
    36  type Node struct {
    37  	srv *Server
    38  
    39  	// updates holds pending client status updates for allocations
    40  	updates []*structs.Allocation
    41  
    42  	// updateFuture is used to wait for the pending batch update
    43  	// to complete. This may be nil if no batch is pending.
    44  	updateFuture *batchFuture
    45  
    46  	// updateTimer is the timer that will trigger the next batch
    47  	// update, and may be nil if there is no batch pending.
    48  	updateTimer *time.Timer
    49  
    50  	// updatesLock synchronizes access to the updates list,
    51  	// the future and the timer.
    52  	updatesLock sync.Mutex
    53  }
    54  
    55  // Register is used to upsert a client that is available for scheduling
    56  func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUpdateResponse) error {
    57  	if done, err := n.srv.forward("Node.Register", args, args, reply); done {
    58  		return err
    59  	}
    60  	defer metrics.MeasureSince([]string{"nomad", "client", "register"}, time.Now())
    61  
    62  	// Validate the arguments
    63  	if args.Node == nil {
    64  		return fmt.Errorf("missing node for client registration")
    65  	}
    66  	if args.Node.ID == "" {
    67  		return fmt.Errorf("missing node ID for client registration")
    68  	}
    69  	if args.Node.Datacenter == "" {
    70  		return fmt.Errorf("missing datacenter for client registration")
    71  	}
    72  	if args.Node.Name == "" {
    73  		return fmt.Errorf("missing node name for client registration")
    74  	}
    75  	if len(args.Node.Attributes) == 0 {
    76  		return fmt.Errorf("missing attributes for client registration")
    77  	}
    78  	if args.Node.SecretID == "" {
    79  		return fmt.Errorf("missing node secret ID for client registration")
    80  	}
    81  
    82  	// Default the status if none is given
    83  	if args.Node.Status == "" {
    84  		args.Node.Status = structs.NodeStatusInit
    85  	}
    86  	if !structs.ValidNodeStatus(args.Node.Status) {
    87  		return fmt.Errorf("invalid status for node")
    88  	}
    89  
    90  	// Set the timestamp when the node is registered
    91  	args.Node.StatusUpdatedAt = time.Now().Unix()
    92  
    93  	// Compute the node class
    94  	if err := args.Node.ComputeClass(); err != nil {
    95  		return fmt.Errorf("failed to computed node class: %v", err)
    96  	}
    97  
    98  	// Look for the node so we can detect a state transition
    99  	snap, err := n.srv.fsm.State().Snapshot()
   100  	if err != nil {
   101  		return err
   102  	}
   103  
   104  	ws := memdb.NewWatchSet()
   105  	originalNode, err := snap.NodeByID(ws, args.Node.ID)
   106  	if err != nil {
   107  		return err
   108  	}
   109  
   110  	// Check if the SecretID has been tampered with
   111  	if originalNode != nil {
   112  		if args.Node.SecretID != originalNode.SecretID && originalNode.SecretID != "" {
   113  			return fmt.Errorf("node secret ID does not match. Not registering node.")
   114  		}
   115  	}
   116  
   117  	// Commit this update via Raft
   118  	_, index, err := n.srv.raftApply(structs.NodeRegisterRequestType, args)
   119  	if err != nil {
   120  		n.srv.logger.Printf("[ERR] nomad.client: Register failed: %v", err)
   121  		return err
   122  	}
   123  	reply.NodeModifyIndex = index
   124  
   125  	// Check if we should trigger evaluations
   126  	originalStatus := structs.NodeStatusInit
   127  	if originalNode != nil {
   128  		originalStatus = originalNode.Status
   129  	}
   130  	transitionToReady := transitionedToReady(args.Node.Status, originalStatus)
   131  	if structs.ShouldDrainNode(args.Node.Status) || transitionToReady {
   132  		evalIDs, evalIndex, err := n.createNodeEvals(args.Node.ID, index)
   133  		if err != nil {
   134  			n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   135  			return err
   136  		}
   137  		reply.EvalIDs = evalIDs
   138  		reply.EvalCreateIndex = evalIndex
   139  	}
   140  
   141  	// Check if we need to setup a heartbeat
   142  	if !args.Node.TerminalStatus() {
   143  		ttl, err := n.srv.resetHeartbeatTimer(args.Node.ID)
   144  		if err != nil {
   145  			n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err)
   146  			return err
   147  		}
   148  		reply.HeartbeatTTL = ttl
   149  	}
   150  
   151  	// Set the reply index
   152  	reply.Index = index
   153  	snap, err = n.srv.fsm.State().Snapshot()
   154  	if err != nil {
   155  		return err
   156  	}
   157  
   158  	n.srv.peerLock.RLock()
   159  	defer n.srv.peerLock.RUnlock()
   160  	if err := n.constructNodeServerInfoResponse(snap, reply); err != nil {
   161  		n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err)
   162  		return err
   163  	}
   164  
   165  	return nil
   166  }
   167  
   168  // updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading.
   169  func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error {
   170  	reply.LeaderRPCAddr = string(n.srv.raft.Leader())
   171  
   172  	// Reply with config information required for future RPC requests
   173  	reply.Servers = make([]*structs.NodeServerInfo, 0, len(n.srv.localPeers))
   174  	for k, v := range n.srv.localPeers {
   175  		reply.Servers = append(reply.Servers,
   176  			&structs.NodeServerInfo{
   177  				RPCAdvertiseAddr: string(k),
   178  				RPCMajorVersion:  int32(v.MajorVersion),
   179  				RPCMinorVersion:  int32(v.MinorVersion),
   180  				Datacenter:       v.Datacenter,
   181  			})
   182  	}
   183  
   184  	// TODO(sean@): Use an indexed node count instead
   185  	//
   186  	// Snapshot is used only to iterate over all nodes to create a node
   187  	// count to send back to Nomad Clients in their heartbeat so Clients
   188  	// can estimate the size of the cluster.
   189  	ws := memdb.NewWatchSet()
   190  	iter, err := snap.Nodes(ws)
   191  	if err == nil {
   192  		for {
   193  			raw := iter.Next()
   194  			if raw == nil {
   195  				break
   196  			}
   197  			reply.NumNodes++
   198  		}
   199  	}
   200  
   201  	return nil
   202  }
   203  
   204  // Deregister is used to remove a client from the cluster. If a client should
   205  // just be made unavailable for scheduling, a status update is preferred.
   206  func (n *Node) Deregister(args *structs.NodeDeregisterRequest, reply *structs.NodeUpdateResponse) error {
   207  	if done, err := n.srv.forward("Node.Deregister", args, args, reply); done {
   208  		return err
   209  	}
   210  	defer metrics.MeasureSince([]string{"nomad", "client", "deregister"}, time.Now())
   211  
   212  	// Check node permissions
   213  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   214  		return err
   215  	} else if aclObj != nil && !aclObj.AllowNodeWrite() {
   216  		return structs.ErrPermissionDenied
   217  	}
   218  
   219  	// Verify the arguments
   220  	if args.NodeID == "" {
   221  		return fmt.Errorf("missing node ID for client deregistration")
   222  	}
   223  	// Look for the node
   224  	snap, err := n.srv.fsm.State().Snapshot()
   225  	if err != nil {
   226  		return err
   227  	}
   228  
   229  	ws := memdb.NewWatchSet()
   230  	node, err := snap.NodeByID(ws, args.NodeID)
   231  	if err != nil {
   232  		return err
   233  	}
   234  	if node == nil {
   235  		return fmt.Errorf("node not found")
   236  	}
   237  
   238  	// Commit this update via Raft
   239  	_, index, err := n.srv.raftApply(structs.NodeDeregisterRequestType, args)
   240  	if err != nil {
   241  		n.srv.logger.Printf("[ERR] nomad.client: Deregister failed: %v", err)
   242  		return err
   243  	}
   244  
   245  	// Clear the heartbeat timer if any
   246  	n.srv.clearHeartbeatTimer(args.NodeID)
   247  
   248  	// Create the evaluations for this node
   249  	evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
   250  	if err != nil {
   251  		n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   252  		return err
   253  	}
   254  
   255  	// Determine if there are any Vault accessors on the node
   256  	accessors, err := snap.VaultAccessorsByNode(ws, args.NodeID)
   257  	if err != nil {
   258  		n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for node %q failed: %v", args.NodeID, err)
   259  		return err
   260  	}
   261  
   262  	if l := len(accessors); l != 0 {
   263  		n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors on node %q due to deregister", l, args.NodeID)
   264  		if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil {
   265  			n.srv.logger.Printf("[ERR] nomad.client: revoking accessors for node %q failed: %v", args.NodeID, err)
   266  			return err
   267  		}
   268  	}
   269  
   270  	// Setup the reply
   271  	reply.EvalIDs = evalIDs
   272  	reply.EvalCreateIndex = evalIndex
   273  	reply.NodeModifyIndex = index
   274  	reply.Index = index
   275  	return nil
   276  }
   277  
   278  // UpdateStatus is used to update the status of a client node
   279  func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *structs.NodeUpdateResponse) error {
   280  	if done, err := n.srv.forward("Node.UpdateStatus", args, args, reply); done {
   281  		return err
   282  	}
   283  	defer metrics.MeasureSince([]string{"nomad", "client", "update_status"}, time.Now())
   284  
   285  	// Verify the arguments
   286  	if args.NodeID == "" {
   287  		return fmt.Errorf("missing node ID for client status update")
   288  	}
   289  	if !structs.ValidNodeStatus(args.Status) {
   290  		return fmt.Errorf("invalid status for node")
   291  	}
   292  
   293  	// Look for the node
   294  	snap, err := n.srv.fsm.State().Snapshot()
   295  	if err != nil {
   296  		return err
   297  	}
   298  
   299  	ws := memdb.NewWatchSet()
   300  	node, err := snap.NodeByID(ws, args.NodeID)
   301  	if err != nil {
   302  		return err
   303  	}
   304  	if node == nil {
   305  		return fmt.Errorf("node not found")
   306  	}
   307  
   308  	// XXX: Could use the SecretID here but have to update the heartbeat system
   309  	// to track SecretIDs.
   310  
   311  	// Update the timestamp of when the node status was updated
   312  	node.StatusUpdatedAt = time.Now().Unix()
   313  
   314  	// Commit this update via Raft
   315  	var index uint64
   316  	if node.Status != args.Status {
   317  		_, index, err = n.srv.raftApply(structs.NodeUpdateStatusRequestType, args)
   318  		if err != nil {
   319  			n.srv.logger.Printf("[ERR] nomad.client: status update failed: %v", err)
   320  			return err
   321  		}
   322  		reply.NodeModifyIndex = index
   323  	}
   324  
   325  	// Check if we should trigger evaluations
   326  	transitionToReady := transitionedToReady(args.Status, node.Status)
   327  	if structs.ShouldDrainNode(args.Status) || transitionToReady {
   328  		evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
   329  		if err != nil {
   330  			n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   331  			return err
   332  		}
   333  		reply.EvalIDs = evalIDs
   334  		reply.EvalCreateIndex = evalIndex
   335  	}
   336  
   337  	// Check if we need to setup a heartbeat
   338  	switch args.Status {
   339  	case structs.NodeStatusDown:
   340  		// Determine if there are any Vault accessors on the node
   341  		accessors, err := n.srv.State().VaultAccessorsByNode(ws, args.NodeID)
   342  		if err != nil {
   343  			n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for node %q failed: %v", args.NodeID, err)
   344  			return err
   345  		}
   346  
   347  		if l := len(accessors); l != 0 {
   348  			n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors on node %q due to down state", l, args.NodeID)
   349  			if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil {
   350  				n.srv.logger.Printf("[ERR] nomad.client: revoking accessors for node %q failed: %v", args.NodeID, err)
   351  				return err
   352  			}
   353  		}
   354  	default:
   355  		ttl, err := n.srv.resetHeartbeatTimer(args.NodeID)
   356  		if err != nil {
   357  			n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err)
   358  			return err
   359  		}
   360  		reply.HeartbeatTTL = ttl
   361  	}
   362  
   363  	// Set the reply index and leader
   364  	reply.Index = index
   365  	n.srv.peerLock.RLock()
   366  	defer n.srv.peerLock.RUnlock()
   367  	if err := n.constructNodeServerInfoResponse(snap, reply); err != nil {
   368  		n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err)
   369  		return err
   370  	}
   371  
   372  	return nil
   373  }
   374  
   375  // transitionedToReady is a helper that takes a nodes new and old status and
   376  // returns whether it has transistioned to ready.
   377  func transitionedToReady(newStatus, oldStatus string) bool {
   378  	initToReady := oldStatus == structs.NodeStatusInit && newStatus == structs.NodeStatusReady
   379  	terminalToReady := oldStatus == structs.NodeStatusDown && newStatus == structs.NodeStatusReady
   380  	return initToReady || terminalToReady
   381  }
   382  
   383  // UpdateDrain is used to update the drain mode of a client node
   384  func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest,
   385  	reply *structs.NodeDrainUpdateResponse) error {
   386  	if done, err := n.srv.forward("Node.UpdateDrain", args, args, reply); done {
   387  		return err
   388  	}
   389  	defer metrics.MeasureSince([]string{"nomad", "client", "update_drain"}, time.Now())
   390  
   391  	// Check node write permissions
   392  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   393  		return err
   394  	} else if aclObj != nil && !aclObj.AllowNodeWrite() {
   395  		return structs.ErrPermissionDenied
   396  	}
   397  
   398  	// Verify the arguments
   399  	if args.NodeID == "" {
   400  		return fmt.Errorf("missing node ID for drain update")
   401  	}
   402  
   403  	// Look for the node
   404  	snap, err := n.srv.fsm.State().Snapshot()
   405  	if err != nil {
   406  		return err
   407  	}
   408  	ws := memdb.NewWatchSet()
   409  	node, err := snap.NodeByID(ws, args.NodeID)
   410  	if err != nil {
   411  		return err
   412  	}
   413  	if node == nil {
   414  		return fmt.Errorf("node not found")
   415  	}
   416  
   417  	// Update the timestamp to
   418  	node.StatusUpdatedAt = time.Now().Unix()
   419  
   420  	// Commit this update via Raft
   421  	var index uint64
   422  	if node.Drain != args.Drain {
   423  		_, index, err = n.srv.raftApply(structs.NodeUpdateDrainRequestType, args)
   424  		if err != nil {
   425  			n.srv.logger.Printf("[ERR] nomad.client: drain update failed: %v", err)
   426  			return err
   427  		}
   428  		reply.NodeModifyIndex = index
   429  	}
   430  
   431  	// Always attempt to create Node evaluations because there may be a System
   432  	// job registered that should be evaluated.
   433  	evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
   434  	if err != nil {
   435  		n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   436  		return err
   437  	}
   438  	reply.EvalIDs = evalIDs
   439  	reply.EvalCreateIndex = evalIndex
   440  
   441  	// Set the reply index
   442  	reply.Index = index
   443  	return nil
   444  }
   445  
   446  // Evaluate is used to force a re-evaluation of the node
   447  func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUpdateResponse) error {
   448  	if done, err := n.srv.forward("Node.Evaluate", args, args, reply); done {
   449  		return err
   450  	}
   451  	defer metrics.MeasureSince([]string{"nomad", "client", "evaluate"}, time.Now())
   452  
   453  	// Check node write permissions
   454  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   455  		return err
   456  	} else if aclObj != nil && !aclObj.AllowNodeWrite() {
   457  		return structs.ErrPermissionDenied
   458  	}
   459  
   460  	// Verify the arguments
   461  	if args.NodeID == "" {
   462  		return fmt.Errorf("missing node ID for evaluation")
   463  	}
   464  
   465  	// Look for the node
   466  	snap, err := n.srv.fsm.State().Snapshot()
   467  	if err != nil {
   468  		return err
   469  	}
   470  	ws := memdb.NewWatchSet()
   471  	node, err := snap.NodeByID(ws, args.NodeID)
   472  	if err != nil {
   473  		return err
   474  	}
   475  	if node == nil {
   476  		return fmt.Errorf("node not found")
   477  	}
   478  
   479  	// Create the evaluation
   480  	evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, node.ModifyIndex)
   481  	if err != nil {
   482  		n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   483  		return err
   484  	}
   485  	reply.EvalIDs = evalIDs
   486  	reply.EvalCreateIndex = evalIndex
   487  
   488  	// Set the reply index
   489  	reply.Index = evalIndex
   490  
   491  	n.srv.peerLock.RLock()
   492  	defer n.srv.peerLock.RUnlock()
   493  	if err := n.constructNodeServerInfoResponse(snap, reply); err != nil {
   494  		n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err)
   495  		return err
   496  	}
   497  	return nil
   498  }
   499  
   500  // GetNode is used to request information about a specific node
   501  func (n *Node) GetNode(args *structs.NodeSpecificRequest,
   502  	reply *structs.SingleNodeResponse) error {
   503  	if done, err := n.srv.forward("Node.GetNode", args, args, reply); done {
   504  		return err
   505  	}
   506  	defer metrics.MeasureSince([]string{"nomad", "client", "get_node"}, time.Now())
   507  
   508  	// Check node read permissions
   509  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   510  		// If ResolveToken had an unexpected error return that
   511  		if err != structs.ErrTokenNotFound {
   512  			return err
   513  		}
   514  
   515  		// Attempt to lookup AuthToken as a Node.SecretID since nodes
   516  		// call this endpoint and don't have an ACL token.
   517  		node, stateErr := n.srv.fsm.State().NodeBySecretID(nil, args.AuthToken)
   518  		if stateErr != nil {
   519  			// Return the original ResolveToken error with this err
   520  			var merr multierror.Error
   521  			merr.Errors = append(merr.Errors, err, stateErr)
   522  			return merr.ErrorOrNil()
   523  		}
   524  
   525  		// Not a node or a valid ACL token
   526  		if node == nil {
   527  			return structs.ErrTokenNotFound
   528  		}
   529  	} else if aclObj != nil && !aclObj.AllowNodeRead() {
   530  		return structs.ErrPermissionDenied
   531  	}
   532  
   533  	// Setup the blocking query
   534  	opts := blockingOptions{
   535  		queryOpts: &args.QueryOptions,
   536  		queryMeta: &reply.QueryMeta,
   537  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   538  			// Verify the arguments
   539  			if args.NodeID == "" {
   540  				return fmt.Errorf("missing node ID")
   541  			}
   542  
   543  			// Look for the node
   544  			out, err := state.NodeByID(ws, args.NodeID)
   545  			if err != nil {
   546  				return err
   547  			}
   548  
   549  			// Setup the output
   550  			if out != nil {
   551  				// Clear the secret ID
   552  				reply.Node = out.Copy()
   553  				reply.Node.SecretID = ""
   554  				reply.Index = out.ModifyIndex
   555  			} else {
   556  				// Use the last index that affected the nodes table
   557  				index, err := state.Index("nodes")
   558  				if err != nil {
   559  					return err
   560  				}
   561  				reply.Node = nil
   562  				reply.Index = index
   563  			}
   564  
   565  			// Set the query response
   566  			n.srv.setQueryMeta(&reply.QueryMeta)
   567  			return nil
   568  		}}
   569  	return n.srv.blockingRPC(&opts)
   570  }
   571  
   572  // GetAllocs is used to request allocations for a specific node
   573  func (n *Node) GetAllocs(args *structs.NodeSpecificRequest,
   574  	reply *structs.NodeAllocsResponse) error {
   575  	if done, err := n.srv.forward("Node.GetAllocs", args, args, reply); done {
   576  		return err
   577  	}
   578  	defer metrics.MeasureSince([]string{"nomad", "client", "get_allocs"}, time.Now())
   579  
   580  	// Check node read and namespace job read permissions
   581  	aclObj, err := n.srv.ResolveToken(args.AuthToken)
   582  	if err != nil {
   583  		return err
   584  	}
   585  	if aclObj != nil && !aclObj.AllowNodeRead() {
   586  		return structs.ErrPermissionDenied
   587  	}
   588  
   589  	// cache namespace perms
   590  	readableNamespaces := map[string]bool{}
   591  
   592  	// readNS is a caching namespace read-job helper
   593  	readNS := func(ns string) bool {
   594  		if aclObj == nil {
   595  			// ACLs are disabled; everything is readable
   596  			return true
   597  		}
   598  
   599  		if readable, ok := readableNamespaces[ns]; ok {
   600  			// cache hit
   601  			return readable
   602  		}
   603  
   604  		// cache miss
   605  		readable := aclObj.AllowNsOp(ns, acl.NamespaceCapabilityReadJob)
   606  		readableNamespaces[ns] = readable
   607  		return readable
   608  	}
   609  
   610  	// Verify the arguments
   611  	if args.NodeID == "" {
   612  		return fmt.Errorf("missing node ID")
   613  	}
   614  
   615  	// Setup the blocking query
   616  	opts := blockingOptions{
   617  		queryOpts: &args.QueryOptions,
   618  		queryMeta: &reply.QueryMeta,
   619  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   620  			// Look for the node
   621  			allocs, err := state.AllocsByNode(ws, args.NodeID)
   622  			if err != nil {
   623  				return err
   624  			}
   625  
   626  			// Setup the output
   627  			if n := len(allocs); n != 0 {
   628  				reply.Allocs = make([]*structs.Allocation, 0, n)
   629  				for _, alloc := range allocs {
   630  					if readNS(alloc.Namespace) {
   631  						reply.Allocs = append(reply.Allocs, alloc)
   632  					}
   633  
   634  					// Get the max of all allocs since
   635  					// subsequent requests need to start
   636  					// from the latest index
   637  					reply.Index = maxUint64(reply.Index, alloc.ModifyIndex)
   638  				}
   639  			} else {
   640  				reply.Allocs = nil
   641  
   642  				// Use the last index that affected the nodes table
   643  				index, err := state.Index("allocs")
   644  				if err != nil {
   645  					return err
   646  				}
   647  
   648  				// Must provide non-zero index to prevent blocking
   649  				// Index 1 is impossible anyways (due to Raft internals)
   650  				if index == 0 {
   651  					reply.Index = 1
   652  				} else {
   653  					reply.Index = index
   654  				}
   655  			}
   656  			return nil
   657  		}}
   658  	return n.srv.blockingRPC(&opts)
   659  }
   660  
   661  // GenerateMigrateToken will create a token for a client to access an
   662  // authenticated volume of another client to migrate data for sticky volumes.
   663  func GenerateMigrateToken(allocID, nodeSecretID string) (string, error) {
   664  	h, err := blake2b.New512([]byte(nodeSecretID))
   665  	if err != nil {
   666  		return "", err
   667  	}
   668  	h.Write([]byte(allocID))
   669  	return base64.URLEncoding.EncodeToString(h.Sum(nil)), nil
   670  }
   671  
   672  // CompareMigrateToken returns true if two migration tokens can be computed and
   673  // are equal.
   674  func CompareMigrateToken(allocID, nodeSecretID, otherMigrateToken string) bool {
   675  	h, err := blake2b.New512([]byte(nodeSecretID))
   676  	if err != nil {
   677  		return false
   678  	}
   679  	h.Write([]byte(allocID))
   680  
   681  	otherBytes, err := base64.URLEncoding.DecodeString(otherMigrateToken)
   682  	if err != nil {
   683  		return false
   684  	}
   685  	return subtle.ConstantTimeCompare(h.Sum(nil), otherBytes) == 1
   686  }
   687  
   688  // GetClientAllocs is used to request a lightweight list of alloc modify indexes
   689  // per allocation.
   690  func (n *Node) GetClientAllocs(args *structs.NodeSpecificRequest,
   691  	reply *structs.NodeClientAllocsResponse) error {
   692  	if done, err := n.srv.forward("Node.GetClientAllocs", args, args, reply); done {
   693  		return err
   694  	}
   695  	defer metrics.MeasureSince([]string{"nomad", "client", "get_client_allocs"}, time.Now())
   696  
   697  	// Verify the arguments
   698  	if args.NodeID == "" {
   699  		return fmt.Errorf("missing node ID")
   700  	}
   701  
   702  	// numOldAllocs is used to detect if there is a garbage collection event
   703  	// that effects the node. When an allocation is garbage collected, that does
   704  	// not change the modify index changes and thus the query won't unblock,
   705  	// even though the set of allocations on the node has changed.
   706  	var numOldAllocs int
   707  
   708  	// Setup the blocking query
   709  	opts := blockingOptions{
   710  		queryOpts: &args.QueryOptions,
   711  		queryMeta: &reply.QueryMeta,
   712  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   713  			// Look for the node
   714  			node, err := state.NodeByID(ws, args.NodeID)
   715  			if err != nil {
   716  				return err
   717  			}
   718  
   719  			var allocs []*structs.Allocation
   720  			if node != nil {
   721  				if args.SecretID == "" {
   722  					return fmt.Errorf("missing node secret ID for client status update")
   723  				} else if args.SecretID != node.SecretID {
   724  					return fmt.Errorf("node secret ID does not match")
   725  				}
   726  
   727  				var err error
   728  				allocs, err = state.AllocsByNode(ws, args.NodeID)
   729  				if err != nil {
   730  					return err
   731  				}
   732  			}
   733  
   734  			reply.Allocs = make(map[string]uint64)
   735  			reply.MigrateTokens = make(map[string]string)
   736  
   737  			// preferTableIndex is used to determine whether we should build the
   738  			// response index based on the full table indexes versus the modify
   739  			// indexes of the allocations on the specific node. This is
   740  			// preferred in the case that the node doesn't yet have allocations
   741  			// or when we detect a GC that effects the node.
   742  			preferTableIndex := true
   743  
   744  			// Setup the output
   745  			if numAllocs := len(allocs); numAllocs != 0 {
   746  				preferTableIndex = false
   747  
   748  				for _, alloc := range allocs {
   749  					reply.Allocs[alloc.ID] = alloc.AllocModifyIndex
   750  
   751  					// If the allocation is going to do a migration, create a
   752  					// migration token so that the client can authenticate with
   753  					// the node hosting the previous allocation.
   754  					if alloc.ShouldMigrate() {
   755  						prevAllocation, err := state.AllocByID(ws, alloc.PreviousAllocation)
   756  						if err != nil {
   757  							return err
   758  						}
   759  
   760  						if prevAllocation != nil && prevAllocation.NodeID != alloc.NodeID {
   761  							allocNode, err := state.NodeByID(ws, prevAllocation.NodeID)
   762  							if err != nil {
   763  								return err
   764  							}
   765  							if allocNode == nil {
   766  								// Node must have been GC'd so skip the token
   767  								continue
   768  							}
   769  
   770  							token, err := GenerateMigrateToken(prevAllocation.ID, allocNode.SecretID)
   771  							if err != nil {
   772  								return err
   773  							}
   774  							reply.MigrateTokens[alloc.ID] = token
   775  						}
   776  					}
   777  
   778  					reply.Index = maxUint64(reply.Index, alloc.ModifyIndex)
   779  				}
   780  
   781  				// Determine if we have less allocations than before. This
   782  				// indicates there was a garbage collection
   783  				if numAllocs < numOldAllocs {
   784  					preferTableIndex = true
   785  				}
   786  
   787  				// Store the new number of allocations
   788  				numOldAllocs = numAllocs
   789  			}
   790  
   791  			if preferTableIndex {
   792  				// Use the last index that affected the nodes table
   793  				index, err := state.Index("allocs")
   794  				if err != nil {
   795  					return err
   796  				}
   797  
   798  				// Must provide non-zero index to prevent blocking
   799  				// Index 1 is impossible anyways (due to Raft internals)
   800  				if index == 0 {
   801  					reply.Index = 1
   802  				} else {
   803  					reply.Index = index
   804  				}
   805  			}
   806  			return nil
   807  		}}
   808  	return n.srv.blockingRPC(&opts)
   809  }
   810  
   811  // UpdateAlloc is used to update the client status of an allocation
   812  func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.GenericResponse) error {
   813  	if done, err := n.srv.forward("Node.UpdateAlloc", args, args, reply); done {
   814  		return err
   815  	}
   816  	defer metrics.MeasureSince([]string{"nomad", "client", "update_alloc"}, time.Now())
   817  
   818  	// Ensure at least a single alloc
   819  	if len(args.Alloc) == 0 {
   820  		return fmt.Errorf("must update at least one allocation")
   821  	}
   822  
   823  	// Update modified timestamp for client initiated allocation updates
   824  	now := time.Now().UTC().UnixNano()
   825  	for _, alloc := range args.Alloc {
   826  		alloc.ModifyTime = now
   827  	}
   828  	// Add this to the batch
   829  	n.updatesLock.Lock()
   830  	n.updates = append(n.updates, args.Alloc...)
   831  
   832  	// Start a new batch if none
   833  	future := n.updateFuture
   834  	if future == nil {
   835  		future = NewBatchFuture()
   836  		n.updateFuture = future
   837  		n.updateTimer = time.AfterFunc(batchUpdateInterval, func() {
   838  			// Get the pending updates
   839  			n.updatesLock.Lock()
   840  			updates := n.updates
   841  			future := n.updateFuture
   842  			n.updates = nil
   843  			n.updateFuture = nil
   844  			n.updateTimer = nil
   845  			n.updatesLock.Unlock()
   846  
   847  			// Perform the batch update
   848  			n.batchUpdate(future, updates)
   849  		})
   850  	}
   851  	n.updatesLock.Unlock()
   852  
   853  	// Wait for the future
   854  	if err := future.Wait(); err != nil {
   855  		return err
   856  	}
   857  
   858  	// Setup the response
   859  	reply.Index = future.Index()
   860  	return nil
   861  }
   862  
   863  // batchUpdate is used to update all the allocations
   864  func (n *Node) batchUpdate(future *batchFuture, updates []*structs.Allocation) {
   865  	// Prepare the batch update
   866  	batch := &structs.AllocUpdateRequest{
   867  		Alloc:        updates,
   868  		WriteRequest: structs.WriteRequest{Region: n.srv.config.Region},
   869  	}
   870  
   871  	// Commit this update via Raft
   872  	var mErr multierror.Error
   873  	_, index, err := n.srv.raftApply(structs.AllocClientUpdateRequestType, batch)
   874  	if err != nil {
   875  		n.srv.logger.Printf("[ERR] nomad.client: alloc update failed: %v", err)
   876  		mErr.Errors = append(mErr.Errors, err)
   877  	}
   878  
   879  	// For each allocation we are updating check if we should revoke any
   880  	// Vault Accessors
   881  	var revoke []*structs.VaultAccessor
   882  	for _, alloc := range updates {
   883  		// Skip any allocation that isn't dead on the client
   884  		if !alloc.Terminated() {
   885  			continue
   886  		}
   887  
   888  		// Determine if there are any Vault accessors for the allocation
   889  		ws := memdb.NewWatchSet()
   890  		accessors, err := n.srv.State().VaultAccessorsByAlloc(ws, alloc.ID)
   891  		if err != nil {
   892  			n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for alloc %q failed: %v", alloc.ID, err)
   893  			mErr.Errors = append(mErr.Errors, err)
   894  		}
   895  
   896  		revoke = append(revoke, accessors...)
   897  	}
   898  
   899  	if l := len(revoke); l != 0 {
   900  		n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors due to terminal allocations", l)
   901  		if err := n.srv.vault.RevokeTokens(context.Background(), revoke, true); err != nil {
   902  			n.srv.logger.Printf("[ERR] nomad.client: batched accessor revocation failed: %v", err)
   903  			mErr.Errors = append(mErr.Errors, err)
   904  		}
   905  	}
   906  
   907  	// Respond to the future
   908  	future.Respond(index, mErr.ErrorOrNil())
   909  }
   910  
   911  // List is used to list the available nodes
   912  func (n *Node) List(args *structs.NodeListRequest,
   913  	reply *structs.NodeListResponse) error {
   914  	if done, err := n.srv.forward("Node.List", args, args, reply); done {
   915  		return err
   916  	}
   917  	defer metrics.MeasureSince([]string{"nomad", "client", "list"}, time.Now())
   918  
   919  	// Check node read permissions
   920  	if aclObj, err := n.srv.ResolveToken(args.AuthToken); err != nil {
   921  		return err
   922  	} else if aclObj != nil && !aclObj.AllowNodeRead() {
   923  		return structs.ErrPermissionDenied
   924  	}
   925  
   926  	// Setup the blocking query
   927  	opts := blockingOptions{
   928  		queryOpts: &args.QueryOptions,
   929  		queryMeta: &reply.QueryMeta,
   930  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   931  			// Capture all the nodes
   932  			var err error
   933  			var iter memdb.ResultIterator
   934  			if prefix := args.QueryOptions.Prefix; prefix != "" {
   935  				iter, err = state.NodesByIDPrefix(ws, prefix)
   936  			} else {
   937  				iter, err = state.Nodes(ws)
   938  			}
   939  			if err != nil {
   940  				return err
   941  			}
   942  
   943  			var nodes []*structs.NodeListStub
   944  			for {
   945  				raw := iter.Next()
   946  				if raw == nil {
   947  					break
   948  				}
   949  				node := raw.(*structs.Node)
   950  				nodes = append(nodes, node.Stub())
   951  			}
   952  			reply.Nodes = nodes
   953  
   954  			// Use the last index that affected the jobs table
   955  			index, err := state.Index("nodes")
   956  			if err != nil {
   957  				return err
   958  			}
   959  			reply.Index = index
   960  
   961  			// Set the query response
   962  			n.srv.setQueryMeta(&reply.QueryMeta)
   963  			return nil
   964  		}}
   965  	return n.srv.blockingRPC(&opts)
   966  }
   967  
   968  // createNodeEvals is used to create evaluations for each alloc on a node.
   969  // Each Eval is scoped to a job, so we need to potentially trigger many evals.
   970  func (n *Node) createNodeEvals(nodeID string, nodeIndex uint64) ([]string, uint64, error) {
   971  	// Snapshot the state
   972  	snap, err := n.srv.fsm.State().Snapshot()
   973  	if err != nil {
   974  		return nil, 0, fmt.Errorf("failed to snapshot state: %v", err)
   975  	}
   976  
   977  	// Find all the allocations for this node
   978  	ws := memdb.NewWatchSet()
   979  	allocs, err := snap.AllocsByNode(ws, nodeID)
   980  	if err != nil {
   981  		return nil, 0, fmt.Errorf("failed to find allocs for '%s': %v", nodeID, err)
   982  	}
   983  
   984  	sysJobsIter, err := snap.JobsByScheduler(ws, "system")
   985  	if err != nil {
   986  		return nil, 0, fmt.Errorf("failed to find system jobs for '%s': %v", nodeID, err)
   987  	}
   988  
   989  	var sysJobs []*structs.Job
   990  	for job := sysJobsIter.Next(); job != nil; job = sysJobsIter.Next() {
   991  		sysJobs = append(sysJobs, job.(*structs.Job))
   992  	}
   993  
   994  	// Fast-path if nothing to do
   995  	if len(allocs) == 0 && len(sysJobs) == 0 {
   996  		return nil, 0, nil
   997  	}
   998  
   999  	// Create an eval for each JobID affected
  1000  	var evals []*structs.Evaluation
  1001  	var evalIDs []string
  1002  	jobIDs := make(map[string]struct{})
  1003  
  1004  	for _, alloc := range allocs {
  1005  		// Deduplicate on JobID
  1006  		if _, ok := jobIDs[alloc.JobID]; ok {
  1007  			continue
  1008  		}
  1009  		jobIDs[alloc.JobID] = struct{}{}
  1010  
  1011  		// Create a new eval
  1012  		eval := &structs.Evaluation{
  1013  			ID:              uuid.Generate(),
  1014  			Namespace:       alloc.Namespace,
  1015  			Priority:        alloc.Job.Priority,
  1016  			Type:            alloc.Job.Type,
  1017  			TriggeredBy:     structs.EvalTriggerNodeUpdate,
  1018  			JobID:           alloc.JobID,
  1019  			NodeID:          nodeID,
  1020  			NodeModifyIndex: nodeIndex,
  1021  			Status:          structs.EvalStatusPending,
  1022  		}
  1023  		evals = append(evals, eval)
  1024  		evalIDs = append(evalIDs, eval.ID)
  1025  	}
  1026  
  1027  	// Create an evaluation for each system job.
  1028  	for _, job := range sysJobs {
  1029  		// Still dedup on JobID as the node may already have the system job.
  1030  		if _, ok := jobIDs[job.ID]; ok {
  1031  			continue
  1032  		}
  1033  		jobIDs[job.ID] = struct{}{}
  1034  
  1035  		// Create a new eval
  1036  		eval := &structs.Evaluation{
  1037  			ID:              uuid.Generate(),
  1038  			Namespace:       job.Namespace,
  1039  			Priority:        job.Priority,
  1040  			Type:            job.Type,
  1041  			TriggeredBy:     structs.EvalTriggerNodeUpdate,
  1042  			JobID:           job.ID,
  1043  			NodeID:          nodeID,
  1044  			NodeModifyIndex: nodeIndex,
  1045  			Status:          structs.EvalStatusPending,
  1046  		}
  1047  		evals = append(evals, eval)
  1048  		evalIDs = append(evalIDs, eval.ID)
  1049  	}
  1050  
  1051  	// Create the Raft transaction
  1052  	update := &structs.EvalUpdateRequest{
  1053  		Evals:        evals,
  1054  		WriteRequest: structs.WriteRequest{Region: n.srv.config.Region},
  1055  	}
  1056  
  1057  	// Commit this evaluation via Raft
  1058  	// XXX: There is a risk of partial failure where the node update succeeds
  1059  	// but that the EvalUpdate does not.
  1060  	_, evalIndex, err := n.srv.raftApply(structs.EvalUpdateRequestType, update)
  1061  	if err != nil {
  1062  		return nil, 0, err
  1063  	}
  1064  	return evalIDs, evalIndex, nil
  1065  }
  1066  
  1067  // batchFuture is used to wait on a batch update to complete
  1068  type batchFuture struct {
  1069  	doneCh chan struct{}
  1070  	err    error
  1071  	index  uint64
  1072  }
  1073  
  1074  // NewBatchFuture creates a new batch future
  1075  func NewBatchFuture() *batchFuture {
  1076  	return &batchFuture{
  1077  		doneCh: make(chan struct{}),
  1078  	}
  1079  }
  1080  
  1081  // Wait is used to block for the future to complete and returns the error
  1082  func (b *batchFuture) Wait() error {
  1083  	<-b.doneCh
  1084  	return b.err
  1085  }
  1086  
  1087  // Index is used to return the index of the batch, only after Wait()
  1088  func (b *batchFuture) Index() uint64 {
  1089  	return b.index
  1090  }
  1091  
  1092  // Respond is used to unblock the future
  1093  func (b *batchFuture) Respond(index uint64, err error) {
  1094  	b.index = index
  1095  	b.err = err
  1096  	close(b.doneCh)
  1097  }
  1098  
  1099  // DeriveVaultToken is used by the clients to request wrapped Vault tokens for
  1100  // tasks
  1101  func (n *Node) DeriveVaultToken(args *structs.DeriveVaultTokenRequest,
  1102  	reply *structs.DeriveVaultTokenResponse) error {
  1103  
  1104  	// setErr is a helper for setting the recoverable error on the reply and
  1105  	// logging it
  1106  	setErr := func(e error, recoverable bool) {
  1107  		if e == nil {
  1108  			return
  1109  		}
  1110  		reply.Error = structs.NewRecoverableError(e, recoverable).(*structs.RecoverableError)
  1111  		n.srv.logger.Printf("[ERR] nomad.client: DeriveVaultToken failed (recoverable %v): %v", recoverable, e)
  1112  	}
  1113  
  1114  	if done, err := n.srv.forward("Node.DeriveVaultToken", args, args, reply); done {
  1115  		setErr(err, structs.IsRecoverable(err) || err == structs.ErrNoLeader)
  1116  		return nil
  1117  	}
  1118  	defer metrics.MeasureSince([]string{"nomad", "client", "derive_vault_token"}, time.Now())
  1119  
  1120  	// Verify the arguments
  1121  	if args.NodeID == "" {
  1122  		setErr(fmt.Errorf("missing node ID"), false)
  1123  		return nil
  1124  	}
  1125  	if args.SecretID == "" {
  1126  		setErr(fmt.Errorf("missing node SecretID"), false)
  1127  		return nil
  1128  	}
  1129  	if args.AllocID == "" {
  1130  		setErr(fmt.Errorf("missing allocation ID"), false)
  1131  		return nil
  1132  	}
  1133  	if len(args.Tasks) == 0 {
  1134  		setErr(fmt.Errorf("no tasks specified"), false)
  1135  		return nil
  1136  	}
  1137  
  1138  	// Verify the following:
  1139  	// * The Node exists and has the correct SecretID
  1140  	// * The Allocation exists on the specified node
  1141  	// * The allocation contains the given tasks and they each require Vault
  1142  	//   tokens
  1143  	snap, err := n.srv.fsm.State().Snapshot()
  1144  	if err != nil {
  1145  		setErr(err, false)
  1146  		return nil
  1147  	}
  1148  	ws := memdb.NewWatchSet()
  1149  	node, err := snap.NodeByID(ws, args.NodeID)
  1150  	if err != nil {
  1151  		setErr(err, false)
  1152  		return nil
  1153  	}
  1154  	if node == nil {
  1155  		setErr(fmt.Errorf("Node %q does not exist", args.NodeID), false)
  1156  		return nil
  1157  	}
  1158  	if node.SecretID != args.SecretID {
  1159  		setErr(fmt.Errorf("SecretID mismatch"), false)
  1160  		return nil
  1161  	}
  1162  
  1163  	alloc, err := snap.AllocByID(ws, args.AllocID)
  1164  	if err != nil {
  1165  		setErr(err, false)
  1166  		return nil
  1167  	}
  1168  	if alloc == nil {
  1169  		setErr(fmt.Errorf("Allocation %q does not exist", args.AllocID), false)
  1170  		return nil
  1171  	}
  1172  	if alloc.NodeID != args.NodeID {
  1173  		setErr(fmt.Errorf("Allocation %q not running on Node %q", args.AllocID, args.NodeID), false)
  1174  		return nil
  1175  	}
  1176  	if alloc.TerminalStatus() {
  1177  		setErr(fmt.Errorf("Can't request Vault token for terminal allocation"), false)
  1178  		return nil
  1179  	}
  1180  
  1181  	// Check the policies
  1182  	policies := alloc.Job.VaultPolicies()
  1183  	if policies == nil {
  1184  		setErr(fmt.Errorf("Job doesn't require Vault policies"), false)
  1185  		return nil
  1186  	}
  1187  	tg, ok := policies[alloc.TaskGroup]
  1188  	if !ok {
  1189  		setErr(fmt.Errorf("Task group does not require Vault policies"), false)
  1190  		return nil
  1191  	}
  1192  
  1193  	var unneeded []string
  1194  	for _, task := range args.Tasks {
  1195  		taskVault := tg[task]
  1196  		if taskVault == nil || len(taskVault.Policies) == 0 {
  1197  			unneeded = append(unneeded, task)
  1198  		}
  1199  	}
  1200  
  1201  	if len(unneeded) != 0 {
  1202  		e := fmt.Errorf("Requested Vault tokens for tasks without defined Vault policies: %s",
  1203  			strings.Join(unneeded, ", "))
  1204  		setErr(e, false)
  1205  		return nil
  1206  	}
  1207  
  1208  	// At this point the request is valid and we should contact Vault for
  1209  	// tokens.
  1210  
  1211  	// Create an error group where we will spin up a fixed set of goroutines to
  1212  	// handle deriving tokens but where if any fails the whole group is
  1213  	// canceled.
  1214  	g, ctx := errgroup.WithContext(context.Background())
  1215  
  1216  	// Cap the handlers
  1217  	handlers := len(args.Tasks)
  1218  	if handlers > maxParallelRequestsPerDerive {
  1219  		handlers = maxParallelRequestsPerDerive
  1220  	}
  1221  
  1222  	// Create the Vault Tokens
  1223  	input := make(chan string, handlers)
  1224  	results := make(map[string]*vapi.Secret, len(args.Tasks))
  1225  	for i := 0; i < handlers; i++ {
  1226  		g.Go(func() error {
  1227  			for {
  1228  				select {
  1229  				case task, ok := <-input:
  1230  					if !ok {
  1231  						return nil
  1232  					}
  1233  
  1234  					secret, err := n.srv.vault.CreateToken(ctx, alloc, task)
  1235  					if err != nil {
  1236  						wrapped := fmt.Sprintf("failed to create token for task %q on alloc %q: %v", task, alloc.ID, err)
  1237  						return structs.WrapRecoverable(wrapped, err)
  1238  					}
  1239  
  1240  					results[task] = secret
  1241  				case <-ctx.Done():
  1242  					return nil
  1243  				}
  1244  			}
  1245  		})
  1246  	}
  1247  
  1248  	// Send the input
  1249  	go func() {
  1250  		defer close(input)
  1251  		for _, task := range args.Tasks {
  1252  			select {
  1253  			case <-ctx.Done():
  1254  				return
  1255  			case input <- task:
  1256  			}
  1257  		}
  1258  
  1259  	}()
  1260  
  1261  	// Wait for everything to complete or for an error
  1262  	createErr := g.Wait()
  1263  
  1264  	// Retrieve the results
  1265  	accessors := make([]*structs.VaultAccessor, 0, len(results))
  1266  	tokens := make(map[string]string, len(results))
  1267  	for task, secret := range results {
  1268  		w := secret.WrapInfo
  1269  		if w == nil {
  1270  			return fmt.Errorf("Vault returned Secret without WrapInfo")
  1271  		}
  1272  
  1273  		tokens[task] = w.Token
  1274  		accessor := &structs.VaultAccessor{
  1275  			Accessor:    w.WrappedAccessor,
  1276  			Task:        task,
  1277  			NodeID:      alloc.NodeID,
  1278  			AllocID:     alloc.ID,
  1279  			CreationTTL: w.TTL,
  1280  		}
  1281  
  1282  		accessors = append(accessors, accessor)
  1283  	}
  1284  
  1285  	// If there was an error revoke the created tokens
  1286  	if createErr != nil {
  1287  		n.srv.logger.Printf("[ERR] nomad.node: Vault token creation for alloc %q failed: %v", alloc.ID, createErr)
  1288  
  1289  		if revokeErr := n.srv.vault.RevokeTokens(context.Background(), accessors, false); revokeErr != nil {
  1290  			n.srv.logger.Printf("[ERR] nomad.node: Vault token revocation for alloc %q failed: %v", alloc.ID, revokeErr)
  1291  		}
  1292  
  1293  		if rerr, ok := createErr.(*structs.RecoverableError); ok {
  1294  			reply.Error = rerr
  1295  		} else {
  1296  			reply.Error = structs.NewRecoverableError(createErr, false).(*structs.RecoverableError)
  1297  		}
  1298  
  1299  		return nil
  1300  	}
  1301  
  1302  	// Commit to Raft before returning any of the tokens
  1303  	req := structs.VaultAccessorsRequest{Accessors: accessors}
  1304  	_, index, err := n.srv.raftApply(structs.VaultAccessorRegisterRequestType, &req)
  1305  	if err != nil {
  1306  		n.srv.logger.Printf("[ERR] nomad.client: Register Vault accessors for alloc %q failed: %v", alloc.ID, err)
  1307  
  1308  		// Determine if we can recover from the error
  1309  		retry := false
  1310  		switch err {
  1311  		case raft.ErrNotLeader, raft.ErrLeadershipLost, raft.ErrRaftShutdown, raft.ErrEnqueueTimeout:
  1312  			retry = true
  1313  		}
  1314  
  1315  		setErr(err, retry)
  1316  		return nil
  1317  	}
  1318  
  1319  	reply.Index = index
  1320  	reply.Tasks = tokens
  1321  	n.srv.setQueryMeta(&reply.QueryMeta)
  1322  	return nil
  1323  }