github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/nomad/node_endpoint.go

github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/nomad/node_endpoint.go (about)

     1  package nomad
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"strings"
     7  	"sync"
     8  	"time"
     9  
    10  	"golang.org/x/sync/errgroup"
    11  
    12  	"github.com/armon/go-metrics"
    13  	"github.com/hashicorp/go-memdb"
    14  	"github.com/hashicorp/go-multierror"
    15  	"github.com/hashicorp/nomad/nomad/state"
    16  	"github.com/hashicorp/nomad/nomad/structs"
    17  	"github.com/hashicorp/nomad/nomad/watch"
    18  	"github.com/hashicorp/raft"
    19  	vapi "github.com/hashicorp/vault/api"
    20  )
    21  
    22  const (
    23  	// batchUpdateInterval is how long we wait to batch updates
    24  	batchUpdateInterval = 50 * time.Millisecond
    25  
    26  	// maxParallelRequestsPerDerive  is the maximum number of parallel Vault
    27  	// create token requests that may be outstanding per derive request
    28  	maxParallelRequestsPerDerive = 16
    29  )
    30  
    31  // Node endpoint is used for client interactions
    32  type Node struct {
    33  	srv *Server
    34  
    35  	// updates holds pending client status updates for allocations
    36  	updates []*structs.Allocation
    37  
    38  	// updateFuture is used to wait for the pending batch update
    39  	// to complete. This may be nil if no batch is pending.
    40  	updateFuture *batchFuture
    41  
    42  	// updateTimer is the timer that will trigger the next batch
    43  	// update, and may be nil if there is no batch pending.
    44  	updateTimer *time.Timer
    45  
    46  	// updatesLock synchronizes access to the updates list,
    47  	// the future and the timer.
    48  	updatesLock sync.Mutex
    49  }
    50  
    51  // Register is used to upsert a client that is available for scheduling
    52  func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUpdateResponse) error {
    53  	if done, err := n.srv.forward("Node.Register", args, args, reply); done {
    54  		return err
    55  	}
    56  	defer metrics.MeasureSince([]string{"nomad", "client", "register"}, time.Now())
    57  
    58  	// Validate the arguments
    59  	if args.Node == nil {
    60  		return fmt.Errorf("missing node for client registration")
    61  	}
    62  	if args.Node.ID == "" {
    63  		return fmt.Errorf("missing node ID for client registration")
    64  	}
    65  	if args.Node.Datacenter == "" {
    66  		return fmt.Errorf("missing datacenter for client registration")
    67  	}
    68  	if args.Node.Name == "" {
    69  		return fmt.Errorf("missing node name for client registration")
    70  	}
    71  	if len(args.Node.Attributes) == 0 {
    72  		return fmt.Errorf("missing attributes for client registration")
    73  	}
    74  
    75  	// COMPAT: Remove after 0.6
    76  	// Need to check if this node is <0.4.x since SecretID is new in 0.5
    77  	pre, err := nodePreSecretID(args.Node)
    78  	if err != nil {
    79  		return err
    80  	}
    81  	if args.Node.SecretID == "" && !pre {
    82  		return fmt.Errorf("missing node secret ID for client registration")
    83  	}
    84  
    85  	// Default the status if none is given
    86  	if args.Node.Status == "" {
    87  		args.Node.Status = structs.NodeStatusInit
    88  	}
    89  	if !structs.ValidNodeStatus(args.Node.Status) {
    90  		return fmt.Errorf("invalid status for node")
    91  	}
    92  
    93  	// Set the timestamp when the node is registered
    94  	args.Node.StatusUpdatedAt = time.Now().Unix()
    95  
    96  	// Compute the node class
    97  	if err := args.Node.ComputeClass(); err != nil {
    98  		return fmt.Errorf("failed to computed node class: %v", err)
    99  	}
   100  
   101  	// Look for the node so we can detect a state transistion
   102  	snap, err := n.srv.fsm.State().Snapshot()
   103  	if err != nil {
   104  		return err
   105  	}
   106  	originalNode, err := snap.NodeByID(args.Node.ID)
   107  	if err != nil {
   108  		return err
   109  	}
   110  
   111  	// Check if the SecretID has been tampered with
   112  	if !pre && originalNode != nil {
   113  		if args.Node.SecretID != originalNode.SecretID && originalNode.SecretID != "" {
   114  			return fmt.Errorf("node secret ID does not match. Not registering node.")
   115  		}
   116  	}
   117  
   118  	// Commit this update via Raft
   119  	_, index, err := n.srv.raftApply(structs.NodeRegisterRequestType, args)
   120  	if err != nil {
   121  		n.srv.logger.Printf("[ERR] nomad.client: Register failed: %v", err)
   122  		return err
   123  	}
   124  	reply.NodeModifyIndex = index
   125  
   126  	// Check if we should trigger evaluations
   127  	originalStatus := structs.NodeStatusInit
   128  	if originalNode != nil {
   129  		originalStatus = originalNode.Status
   130  	}
   131  	transitionToReady := transitionedToReady(args.Node.Status, originalStatus)
   132  	if structs.ShouldDrainNode(args.Node.Status) || transitionToReady {
   133  		evalIDs, evalIndex, err := n.createNodeEvals(args.Node.ID, index)
   134  		if err != nil {
   135  			n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   136  			return err
   137  		}
   138  		reply.EvalIDs = evalIDs
   139  		reply.EvalCreateIndex = evalIndex
   140  	}
   141  
   142  	// Check if we need to setup a heartbeat
   143  	if !args.Node.TerminalStatus() {
   144  		ttl, err := n.srv.resetHeartbeatTimer(args.Node.ID)
   145  		if err != nil {
   146  			n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err)
   147  			return err
   148  		}
   149  		reply.HeartbeatTTL = ttl
   150  	}
   151  
   152  	// Set the reply index
   153  	reply.Index = index
   154  	snap, err = n.srv.fsm.State().Snapshot()
   155  	if err != nil {
   156  		return err
   157  	}
   158  
   159  	n.srv.peerLock.RLock()
   160  	defer n.srv.peerLock.RUnlock()
   161  	if err := n.constructNodeServerInfoResponse(snap, reply); err != nil {
   162  		n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err)
   163  		return err
   164  	}
   165  
   166  	return nil
   167  }
   168  
   169  // nodePreSecretID is a helper that returns whether the node is on a version
   170  // that is before SecretIDs were introduced
   171  func nodePreSecretID(node *structs.Node) (bool, error) {
   172  	a := node.Attributes
   173  	if a == nil {
   174  		return false, fmt.Errorf("node doesn't have attributes set")
   175  	}
   176  
   177  	v, ok := a["nomad.version"]
   178  	if !ok {
   179  		return false, fmt.Errorf("missing Nomad version in attributes")
   180  	}
   181  
   182  	return !strings.HasPrefix(v, "0.5"), nil
   183  }
   184  
   185  // updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading.
   186  func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error {
   187  	reply.LeaderRPCAddr = n.srv.raft.Leader()
   188  
   189  	// Reply with config information required for future RPC requests
   190  	reply.Servers = make([]*structs.NodeServerInfo, 0, len(n.srv.localPeers))
   191  	for k, v := range n.srv.localPeers {
   192  		reply.Servers = append(reply.Servers,
   193  			&structs.NodeServerInfo{
   194  				RPCAdvertiseAddr: k,
   195  				RPCMajorVersion:  int32(v.MajorVersion),
   196  				RPCMinorVersion:  int32(v.MinorVersion),
   197  				Datacenter:       v.Datacenter,
   198  			})
   199  	}
   200  
   201  	// TODO(sean@): Use an indexed node count instead
   202  	//
   203  	// Snapshot is used only to iterate over all nodes to create a node
   204  	// count to send back to Nomad Clients in their heartbeat so Clients
   205  	// can estimate the size of the cluster.
   206  	iter, err := snap.Nodes()
   207  	if err == nil {
   208  		for {
   209  			raw := iter.Next()
   210  			if raw == nil {
   211  				break
   212  			}
   213  			reply.NumNodes++
   214  		}
   215  	}
   216  
   217  	return nil
   218  }
   219  
   220  // Deregister is used to remove a client from the cluster. If a client should
   221  // just be made unavailable for scheduling, a status update is preferred.
   222  func (n *Node) Deregister(args *structs.NodeDeregisterRequest, reply *structs.NodeUpdateResponse) error {
   223  	if done, err := n.srv.forward("Node.Deregister", args, args, reply); done {
   224  		return err
   225  	}
   226  	defer metrics.MeasureSince([]string{"nomad", "client", "deregister"}, time.Now())
   227  
   228  	// Verify the arguments
   229  	if args.NodeID == "" {
   230  		return fmt.Errorf("missing node ID for client deregistration")
   231  	}
   232  
   233  	// Commit this update via Raft
   234  	_, index, err := n.srv.raftApply(structs.NodeDeregisterRequestType, args)
   235  	if err != nil {
   236  		n.srv.logger.Printf("[ERR] nomad.client: Deregister failed: %v", err)
   237  		return err
   238  	}
   239  
   240  	// Clear the heartbeat timer if any
   241  	n.srv.clearHeartbeatTimer(args.NodeID)
   242  
   243  	// Create the evaluations for this node
   244  	evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
   245  	if err != nil {
   246  		n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   247  		return err
   248  	}
   249  
   250  	// Determine if there are any Vault accessors on the node
   251  	accessors, err := n.srv.State().VaultAccessorsByNode(args.NodeID)
   252  	if err != nil {
   253  		n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for node %q failed: %v", args.NodeID, err)
   254  		return err
   255  	}
   256  
   257  	if len(accessors) != 0 {
   258  		if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil {
   259  			n.srv.logger.Printf("[ERR] nomad.client: revoking accessors for node %q failed: %v", args.NodeID, err)
   260  			return err
   261  		}
   262  	}
   263  
   264  	// Setup the reply
   265  	reply.EvalIDs = evalIDs
   266  	reply.EvalCreateIndex = evalIndex
   267  	reply.NodeModifyIndex = index
   268  	reply.Index = index
   269  	return nil
   270  }
   271  
   272  // UpdateStatus is used to update the status of a client node
   273  func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *structs.NodeUpdateResponse) error {
   274  	if done, err := n.srv.forward("Node.UpdateStatus", args, args, reply); done {
   275  		return err
   276  	}
   277  	defer metrics.MeasureSince([]string{"nomad", "client", "update_status"}, time.Now())
   278  
   279  	// Verify the arguments
   280  	if args.NodeID == "" {
   281  		return fmt.Errorf("missing node ID for client status update")
   282  	}
   283  	if !structs.ValidNodeStatus(args.Status) {
   284  		return fmt.Errorf("invalid status for node")
   285  	}
   286  
   287  	// Look for the node
   288  	snap, err := n.srv.fsm.State().Snapshot()
   289  	if err != nil {
   290  		return err
   291  	}
   292  	node, err := snap.NodeByID(args.NodeID)
   293  	if err != nil {
   294  		return err
   295  	}
   296  	if node == nil {
   297  		return fmt.Errorf("node not found")
   298  	}
   299  
   300  	// XXX: Could use the SecretID here but have to update the heartbeat system
   301  	// to track SecretIDs.
   302  
   303  	// Update the timestamp of when the node status was updated
   304  	node.StatusUpdatedAt = time.Now().Unix()
   305  
   306  	// Commit this update via Raft
   307  	var index uint64
   308  	if node.Status != args.Status {
   309  		_, index, err = n.srv.raftApply(structs.NodeUpdateStatusRequestType, args)
   310  		if err != nil {
   311  			n.srv.logger.Printf("[ERR] nomad.client: status update failed: %v", err)
   312  			return err
   313  		}
   314  		reply.NodeModifyIndex = index
   315  	}
   316  
   317  	// Check if we should trigger evaluations
   318  	transitionToReady := transitionedToReady(args.Status, node.Status)
   319  	if structs.ShouldDrainNode(args.Status) || transitionToReady {
   320  		evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
   321  		if err != nil {
   322  			n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   323  			return err
   324  		}
   325  		reply.EvalIDs = evalIDs
   326  		reply.EvalCreateIndex = evalIndex
   327  	}
   328  
   329  	// Check if we need to setup a heartbeat
   330  	switch args.Status {
   331  	case structs.NodeStatusDown:
   332  		// Determine if there are any Vault accessors on the node
   333  		accessors, err := n.srv.State().VaultAccessorsByNode(args.NodeID)
   334  		if err != nil {
   335  			n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for node %q failed: %v", args.NodeID, err)
   336  			return err
   337  		}
   338  
   339  		if len(accessors) != 0 {
   340  			if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil {
   341  				n.srv.logger.Printf("[ERR] nomad.client: revoking accessors for node %q failed: %v", args.NodeID, err)
   342  				return err
   343  			}
   344  		}
   345  	default:
   346  		ttl, err := n.srv.resetHeartbeatTimer(args.NodeID)
   347  		if err != nil {
   348  			n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err)
   349  			return err
   350  		}
   351  		reply.HeartbeatTTL = ttl
   352  	}
   353  
   354  	// Set the reply index and leader
   355  	reply.Index = index
   356  	n.srv.peerLock.RLock()
   357  	defer n.srv.peerLock.RUnlock()
   358  	if err := n.constructNodeServerInfoResponse(snap, reply); err != nil {
   359  		n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err)
   360  		return err
   361  	}
   362  
   363  	return nil
   364  }
   365  
   366  // transitionedToReady is a helper that takes a nodes new and old status and
   367  // returns whether it has transistioned to ready.
   368  func transitionedToReady(newStatus, oldStatus string) bool {
   369  	initToReady := oldStatus == structs.NodeStatusInit && newStatus == structs.NodeStatusReady
   370  	terminalToReady := oldStatus == structs.NodeStatusDown && newStatus == structs.NodeStatusReady
   371  	return initToReady || terminalToReady
   372  }
   373  
   374  // UpdateDrain is used to update the drain mode of a client node
   375  func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest,
   376  	reply *structs.NodeDrainUpdateResponse) error {
   377  	if done, err := n.srv.forward("Node.UpdateDrain", args, args, reply); done {
   378  		return err
   379  	}
   380  	defer metrics.MeasureSince([]string{"nomad", "client", "update_drain"}, time.Now())
   381  
   382  	// Verify the arguments
   383  	if args.NodeID == "" {
   384  		return fmt.Errorf("missing node ID for drain update")
   385  	}
   386  
   387  	// Look for the node
   388  	snap, err := n.srv.fsm.State().Snapshot()
   389  	if err != nil {
   390  		return err
   391  	}
   392  	node, err := snap.NodeByID(args.NodeID)
   393  	if err != nil {
   394  		return err
   395  	}
   396  	if node == nil {
   397  		return fmt.Errorf("node not found")
   398  	}
   399  
   400  	// Update the timestamp to
   401  	node.StatusUpdatedAt = time.Now().Unix()
   402  
   403  	// Commit this update via Raft
   404  	var index uint64
   405  	if node.Drain != args.Drain {
   406  		_, index, err = n.srv.raftApply(structs.NodeUpdateDrainRequestType, args)
   407  		if err != nil {
   408  			n.srv.logger.Printf("[ERR] nomad.client: drain update failed: %v", err)
   409  			return err
   410  		}
   411  		reply.NodeModifyIndex = index
   412  	}
   413  
   414  	// Always attempt to create Node evaluations because there may be a System
   415  	// job registered that should be evaluated.
   416  	evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
   417  	if err != nil {
   418  		n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   419  		return err
   420  	}
   421  	reply.EvalIDs = evalIDs
   422  	reply.EvalCreateIndex = evalIndex
   423  
   424  	// Set the reply index
   425  	reply.Index = index
   426  	return nil
   427  }
   428  
   429  // Evaluate is used to force a re-evaluation of the node
   430  func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUpdateResponse) error {
   431  	if done, err := n.srv.forward("Node.Evaluate", args, args, reply); done {
   432  		return err
   433  	}
   434  	defer metrics.MeasureSince([]string{"nomad", "client", "evaluate"}, time.Now())
   435  
   436  	// Verify the arguments
   437  	if args.NodeID == "" {
   438  		return fmt.Errorf("missing node ID for evaluation")
   439  	}
   440  
   441  	// Look for the node
   442  	snap, err := n.srv.fsm.State().Snapshot()
   443  	if err != nil {
   444  		return err
   445  	}
   446  	node, err := snap.NodeByID(args.NodeID)
   447  	if err != nil {
   448  		return err
   449  	}
   450  	if node == nil {
   451  		return fmt.Errorf("node not found")
   452  	}
   453  
   454  	// Create the evaluation
   455  	evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, node.ModifyIndex)
   456  	if err != nil {
   457  		n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   458  		return err
   459  	}
   460  	reply.EvalIDs = evalIDs
   461  	reply.EvalCreateIndex = evalIndex
   462  
   463  	// Set the reply index
   464  	reply.Index = evalIndex
   465  
   466  	n.srv.peerLock.RLock()
   467  	defer n.srv.peerLock.RUnlock()
   468  	if err := n.constructNodeServerInfoResponse(snap, reply); err != nil {
   469  		n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err)
   470  		return err
   471  	}
   472  	return nil
   473  }
   474  
   475  // GetNode is used to request information about a specific node
   476  func (n *Node) GetNode(args *structs.NodeSpecificRequest,
   477  	reply *structs.SingleNodeResponse) error {
   478  	if done, err := n.srv.forward("Node.GetNode", args, args, reply); done {
   479  		return err
   480  	}
   481  	defer metrics.MeasureSince([]string{"nomad", "client", "get_node"}, time.Now())
   482  
   483  	// Setup the blocking query
   484  	opts := blockingOptions{
   485  		queryOpts: &args.QueryOptions,
   486  		queryMeta: &reply.QueryMeta,
   487  		watch:     watch.NewItems(watch.Item{Node: args.NodeID}),
   488  		run: func() error {
   489  			// Verify the arguments
   490  			if args.NodeID == "" {
   491  				return fmt.Errorf("missing node ID")
   492  			}
   493  
   494  			// Look for the node
   495  			snap, err := n.srv.fsm.State().Snapshot()
   496  			if err != nil {
   497  				return err
   498  			}
   499  			out, err := snap.NodeByID(args.NodeID)
   500  			if err != nil {
   501  				return err
   502  			}
   503  
   504  			// Setup the output
   505  			if out != nil {
   506  				// Clear the secret ID
   507  				reply.Node = out.Copy()
   508  				reply.Node.SecretID = ""
   509  				reply.Index = out.ModifyIndex
   510  			} else {
   511  				// Use the last index that affected the nodes table
   512  				index, err := snap.Index("nodes")
   513  				if err != nil {
   514  					return err
   515  				}
   516  				reply.Node = nil
   517  				reply.Index = index
   518  			}
   519  
   520  			// Set the query response
   521  			n.srv.setQueryMeta(&reply.QueryMeta)
   522  			return nil
   523  		}}
   524  	return n.srv.blockingRPC(&opts)
   525  }
   526  
   527  // GetAllocs is used to request allocations for a specific node
   528  func (n *Node) GetAllocs(args *structs.NodeSpecificRequest,
   529  	reply *structs.NodeAllocsResponse) error {
   530  	if done, err := n.srv.forward("Node.GetAllocs", args, args, reply); done {
   531  		return err
   532  	}
   533  	defer metrics.MeasureSince([]string{"nomad", "client", "get_allocs"}, time.Now())
   534  
   535  	// Verify the arguments
   536  	if args.NodeID == "" {
   537  		return fmt.Errorf("missing node ID")
   538  	}
   539  
   540  	// Setup the blocking query
   541  	opts := blockingOptions{
   542  		queryOpts: &args.QueryOptions,
   543  		queryMeta: &reply.QueryMeta,
   544  		watch:     watch.NewItems(watch.Item{AllocNode: args.NodeID}),
   545  		run: func() error {
   546  			// Look for the node
   547  			snap, err := n.srv.fsm.State().Snapshot()
   548  			if err != nil {
   549  				return err
   550  			}
   551  			allocs, err := snap.AllocsByNode(args.NodeID)
   552  			if err != nil {
   553  				return err
   554  			}
   555  
   556  			// Setup the output
   557  			if len(allocs) != 0 {
   558  				reply.Allocs = allocs
   559  				for _, alloc := range allocs {
   560  					reply.Index = maxUint64(reply.Index, alloc.ModifyIndex)
   561  				}
   562  			} else {
   563  				reply.Allocs = nil
   564  
   565  				// Use the last index that affected the nodes table
   566  				index, err := snap.Index("allocs")
   567  				if err != nil {
   568  					return err
   569  				}
   570  
   571  				// Must provide non-zero index to prevent blocking
   572  				// Index 1 is impossible anyways (due to Raft internals)
   573  				if index == 0 {
   574  					reply.Index = 1
   575  				} else {
   576  					reply.Index = index
   577  				}
   578  			}
   579  			return nil
   580  		}}
   581  	return n.srv.blockingRPC(&opts)
   582  }
   583  
   584  // GetClientAllocs is used to request a lightweight list of alloc modify indexes
   585  // per allocation.
   586  func (n *Node) GetClientAllocs(args *structs.NodeSpecificRequest,
   587  	reply *structs.NodeClientAllocsResponse) error {
   588  	if done, err := n.srv.forward("Node.GetClientAllocs", args, args, reply); done {
   589  		return err
   590  	}
   591  	defer metrics.MeasureSince([]string{"nomad", "client", "get_client_allocs"}, time.Now())
   592  
   593  	// Verify the arguments
   594  	if args.NodeID == "" {
   595  		return fmt.Errorf("missing node ID")
   596  	}
   597  
   598  	// Setup the blocking query
   599  	opts := blockingOptions{
   600  		queryOpts: &args.QueryOptions,
   601  		queryMeta: &reply.QueryMeta,
   602  		watch:     watch.NewItems(watch.Item{AllocNode: args.NodeID}),
   603  		run: func() error {
   604  			// Look for the node
   605  			snap, err := n.srv.fsm.State().Snapshot()
   606  			if err != nil {
   607  				return err
   608  			}
   609  
   610  			// Look for the node
   611  			node, err := snap.NodeByID(args.NodeID)
   612  			if err != nil {
   613  				return err
   614  			}
   615  
   616  			var allocs []*structs.Allocation
   617  			if node != nil {
   618  				// COMPAT: Remove in 0.6
   619  				// Check if the node should have a SecretID set
   620  				if args.SecretID == "" {
   621  					if pre, err := nodePreSecretID(node); err != nil {
   622  						return err
   623  					} else if !pre {
   624  						return fmt.Errorf("missing node secret ID for client status update")
   625  					}
   626  				} else if args.SecretID != node.SecretID {
   627  					return fmt.Errorf("node secret ID does not match")
   628  				}
   629  
   630  				var err error
   631  				allocs, err = snap.AllocsByNode(args.NodeID)
   632  				if err != nil {
   633  					return err
   634  				}
   635  			}
   636  
   637  			reply.Allocs = make(map[string]uint64)
   638  			// Setup the output
   639  			if len(allocs) != 0 {
   640  				for _, alloc := range allocs {
   641  					reply.Allocs[alloc.ID] = alloc.AllocModifyIndex
   642  					reply.Index = maxUint64(reply.Index, alloc.ModifyIndex)
   643  				}
   644  			} else {
   645  				// Use the last index that affected the nodes table
   646  				index, err := snap.Index("allocs")
   647  				if err != nil {
   648  					return err
   649  				}
   650  
   651  				// Must provide non-zero index to prevent blocking
   652  				// Index 1 is impossible anyways (due to Raft internals)
   653  				if index == 0 {
   654  					reply.Index = 1
   655  				} else {
   656  					reply.Index = index
   657  				}
   658  			}
   659  			return nil
   660  		}}
   661  	return n.srv.blockingRPC(&opts)
   662  }
   663  
   664  // UpdateAlloc is used to update the client status of an allocation
   665  func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.GenericResponse) error {
   666  	if done, err := n.srv.forward("Node.UpdateAlloc", args, args, reply); done {
   667  		return err
   668  	}
   669  	defer metrics.MeasureSince([]string{"nomad", "client", "update_alloc"}, time.Now())
   670  
   671  	// Ensure at least a single alloc
   672  	if len(args.Alloc) == 0 {
   673  		return fmt.Errorf("must update at least one allocation")
   674  	}
   675  
   676  	// Add this to the batch
   677  	n.updatesLock.Lock()
   678  	n.updates = append(n.updates, args.Alloc...)
   679  
   680  	// Start a new batch if none
   681  	future := n.updateFuture
   682  	if future == nil {
   683  		future = NewBatchFuture()
   684  		n.updateFuture = future
   685  		n.updateTimer = time.AfterFunc(batchUpdateInterval, func() {
   686  			// Get the pending updates
   687  			n.updatesLock.Lock()
   688  			updates := n.updates
   689  			future := n.updateFuture
   690  			n.updates = nil
   691  			n.updateFuture = nil
   692  			n.updateTimer = nil
   693  			n.updatesLock.Unlock()
   694  
   695  			// Perform the batch update
   696  			n.batchUpdate(future, updates)
   697  		})
   698  	}
   699  	n.updatesLock.Unlock()
   700  
   701  	// Wait for the future
   702  	if err := future.Wait(); err != nil {
   703  		return err
   704  	}
   705  
   706  	// Setup the response
   707  	reply.Index = future.Index()
   708  	return nil
   709  }
   710  
   711  // batchUpdate is used to update all the allocations
   712  func (n *Node) batchUpdate(future *batchFuture, updates []*structs.Allocation) {
   713  	// Prepare the batch update
   714  	batch := &structs.AllocUpdateRequest{
   715  		Alloc:        updates,
   716  		WriteRequest: structs.WriteRequest{Region: n.srv.config.Region},
   717  	}
   718  
   719  	// Commit this update via Raft
   720  	var mErr multierror.Error
   721  	_, index, err := n.srv.raftApply(structs.AllocClientUpdateRequestType, batch)
   722  	if err != nil {
   723  		n.srv.logger.Printf("[ERR] nomad.client: alloc update failed: %v", err)
   724  		mErr.Errors = append(mErr.Errors, err)
   725  	}
   726  
   727  	// For each allocation we are updating check if we should revoke any
   728  	// Vault Accessors
   729  	var revoke []*structs.VaultAccessor
   730  	for _, alloc := range updates {
   731  		// Skip any allocation that isn't dead on the client
   732  		if !alloc.Terminated() {
   733  			continue
   734  		}
   735  
   736  		// Determine if there are any Vault accessors for the allocation
   737  		accessors, err := n.srv.State().VaultAccessorsByAlloc(alloc.ID)
   738  		if err != nil {
   739  			n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for alloc %q failed: %v", alloc.ID, err)
   740  			mErr.Errors = append(mErr.Errors, err)
   741  		}
   742  
   743  		revoke = append(revoke, accessors...)
   744  	}
   745  
   746  	if len(revoke) != 0 {
   747  		if err := n.srv.vault.RevokeTokens(context.Background(), revoke, true); err != nil {
   748  			n.srv.logger.Printf("[ERR] nomad.client: batched accessor revocation failed: %v", err)
   749  			mErr.Errors = append(mErr.Errors, err)
   750  		}
   751  	}
   752  
   753  	// Respond to the future
   754  	future.Respond(index, mErr.ErrorOrNil())
   755  }
   756  
   757  // List is used to list the available nodes
   758  func (n *Node) List(args *structs.NodeListRequest,
   759  	reply *structs.NodeListResponse) error {
   760  	if done, err := n.srv.forward("Node.List", args, args, reply); done {
   761  		return err
   762  	}
   763  	defer metrics.MeasureSince([]string{"nomad", "client", "list"}, time.Now())
   764  
   765  	// Setup the blocking query
   766  	opts := blockingOptions{
   767  		queryOpts: &args.QueryOptions,
   768  		queryMeta: &reply.QueryMeta,
   769  		watch:     watch.NewItems(watch.Item{Table: "nodes"}),
   770  		run: func() error {
   771  			// Capture all the nodes
   772  			snap, err := n.srv.fsm.State().Snapshot()
   773  			if err != nil {
   774  				return err
   775  			}
   776  			var iter memdb.ResultIterator
   777  			if prefix := args.QueryOptions.Prefix; prefix != "" {
   778  				iter, err = snap.NodesByIDPrefix(prefix)
   779  			} else {
   780  				iter, err = snap.Nodes()
   781  			}
   782  			if err != nil {
   783  				return err
   784  			}
   785  
   786  			var nodes []*structs.NodeListStub
   787  			for {
   788  				raw := iter.Next()
   789  				if raw == nil {
   790  					break
   791  				}
   792  				node := raw.(*structs.Node)
   793  				nodes = append(nodes, node.Stub())
   794  			}
   795  			reply.Nodes = nodes
   796  
   797  			// Use the last index that affected the jobs table
   798  			index, err := snap.Index("nodes")
   799  			if err != nil {
   800  				return err
   801  			}
   802  			reply.Index = index
   803  
   804  			// Set the query response
   805  			n.srv.setQueryMeta(&reply.QueryMeta)
   806  			return nil
   807  		}}
   808  	return n.srv.blockingRPC(&opts)
   809  }
   810  
   811  // createNodeEvals is used to create evaluations for each alloc on a node.
   812  // Each Eval is scoped to a job, so we need to potentially trigger many evals.
   813  func (n *Node) createNodeEvals(nodeID string, nodeIndex uint64) ([]string, uint64, error) {
   814  	// Snapshot the state
   815  	snap, err := n.srv.fsm.State().Snapshot()
   816  	if err != nil {
   817  		return nil, 0, fmt.Errorf("failed to snapshot state: %v", err)
   818  	}
   819  
   820  	// Find all the allocations for this node
   821  	allocs, err := snap.AllocsByNode(nodeID)
   822  	if err != nil {
   823  		return nil, 0, fmt.Errorf("failed to find allocs for '%s': %v", nodeID, err)
   824  	}
   825  
   826  	sysJobsIter, err := snap.JobsByScheduler("system")
   827  	if err != nil {
   828  		return nil, 0, fmt.Errorf("failed to find system jobs for '%s': %v", nodeID, err)
   829  	}
   830  
   831  	var sysJobs []*structs.Job
   832  	for job := sysJobsIter.Next(); job != nil; job = sysJobsIter.Next() {
   833  		sysJobs = append(sysJobs, job.(*structs.Job))
   834  	}
   835  
   836  	// Fast-path if nothing to do
   837  	if len(allocs) == 0 && len(sysJobs) == 0 {
   838  		return nil, 0, nil
   839  	}
   840  
   841  	// Create an eval for each JobID affected
   842  	var evals []*structs.Evaluation
   843  	var evalIDs []string
   844  	jobIDs := make(map[string]struct{})
   845  
   846  	for _, alloc := range allocs {
   847  		// Deduplicate on JobID
   848  		if _, ok := jobIDs[alloc.JobID]; ok {
   849  			continue
   850  		}
   851  		jobIDs[alloc.JobID] = struct{}{}
   852  
   853  		// Create a new eval
   854  		eval := &structs.Evaluation{
   855  			ID:              structs.GenerateUUID(),
   856  			Priority:        alloc.Job.Priority,
   857  			Type:            alloc.Job.Type,
   858  			TriggeredBy:     structs.EvalTriggerNodeUpdate,
   859  			JobID:           alloc.JobID,
   860  			NodeID:          nodeID,
   861  			NodeModifyIndex: nodeIndex,
   862  			Status:          structs.EvalStatusPending,
   863  		}
   864  		evals = append(evals, eval)
   865  		evalIDs = append(evalIDs, eval.ID)
   866  	}
   867  
   868  	// Create an evaluation for each system job.
   869  	for _, job := range sysJobs {
   870  		// Still dedup on JobID as the node may already have the system job.
   871  		if _, ok := jobIDs[job.ID]; ok {
   872  			continue
   873  		}
   874  		jobIDs[job.ID] = struct{}{}
   875  
   876  		// Create a new eval
   877  		eval := &structs.Evaluation{
   878  			ID:              structs.GenerateUUID(),
   879  			Priority:        job.Priority,
   880  			Type:            job.Type,
   881  			TriggeredBy:     structs.EvalTriggerNodeUpdate,
   882  			JobID:           job.ID,
   883  			NodeID:          nodeID,
   884  			NodeModifyIndex: nodeIndex,
   885  			Status:          structs.EvalStatusPending,
   886  		}
   887  		evals = append(evals, eval)
   888  		evalIDs = append(evalIDs, eval.ID)
   889  	}
   890  
   891  	// Create the Raft transaction
   892  	update := &structs.EvalUpdateRequest{
   893  		Evals:        evals,
   894  		WriteRequest: structs.WriteRequest{Region: n.srv.config.Region},
   895  	}
   896  
   897  	// Commit this evaluation via Raft
   898  	// XXX: There is a risk of partial failure where the node update succeeds
   899  	// but that the EvalUpdate does not.
   900  	_, evalIndex, err := n.srv.raftApply(structs.EvalUpdateRequestType, update)
   901  	if err != nil {
   902  		return nil, 0, err
   903  	}
   904  	return evalIDs, evalIndex, nil
   905  }
   906  
   907  // batchFuture is used to wait on a batch update to complete
   908  type batchFuture struct {
   909  	doneCh chan struct{}
   910  	err    error
   911  	index  uint64
   912  }
   913  
   914  // NewBatchFuture creates a new batch future
   915  func NewBatchFuture() *batchFuture {
   916  	return &batchFuture{
   917  		doneCh: make(chan struct{}),
   918  	}
   919  }
   920  
   921  // Wait is used to block for the future to complete and returns the error
   922  func (b *batchFuture) Wait() error {
   923  	<-b.doneCh
   924  	return b.err
   925  }
   926  
   927  // Index is used to return the index of the batch, only after Wait()
   928  func (b *batchFuture) Index() uint64 {
   929  	return b.index
   930  }
   931  
   932  // Respond is used to unblock the future
   933  func (b *batchFuture) Respond(index uint64, err error) {
   934  	b.index = index
   935  	b.err = err
   936  	close(b.doneCh)
   937  }
   938  
   939  // DeriveVaultToken is used by the clients to request wrapped Vault tokens for
   940  // tasks
   941  func (n *Node) DeriveVaultToken(args *structs.DeriveVaultTokenRequest,
   942  	reply *structs.DeriveVaultTokenResponse) error {
   943  
   944  	// setErr is a helper for setting the recoverable error on the reply and
   945  	// logging it
   946  	setErr := func(e error, recoverable bool) {
   947  		reply.Error = structs.NewRecoverableError(e, recoverable)
   948  		n.srv.logger.Printf("[ERR] nomad.client: DeriveVaultToken failed (recoverable %v): %v", recoverable, e)
   949  	}
   950  
   951  	if done, err := n.srv.forward("Node.DeriveVaultToken", args, args, reply); done {
   952  		setErr(err, err == structs.ErrNoLeader)
   953  		return nil
   954  	}
   955  	defer metrics.MeasureSince([]string{"nomad", "client", "derive_vault_token"}, time.Now())
   956  
   957  	// Verify the arguments
   958  	if args.NodeID == "" {
   959  		setErr(fmt.Errorf("missing node ID"), false)
   960  		return nil
   961  	}
   962  	if args.SecretID == "" {
   963  		setErr(fmt.Errorf("missing node SecretID"), false)
   964  		return nil
   965  	}
   966  	if args.AllocID == "" {
   967  		setErr(fmt.Errorf("missing allocation ID"), false)
   968  		return nil
   969  	}
   970  	if len(args.Tasks) == 0 {
   971  		setErr(fmt.Errorf("no tasks specified"), false)
   972  		return nil
   973  	}
   974  
   975  	// Verify the following:
   976  	// * The Node exists and has the correct SecretID
   977  	// * The Allocation exists on the specified node
   978  	// * The allocation contains the given tasks and they each require Vault
   979  	//   tokens
   980  	snap, err := n.srv.fsm.State().Snapshot()
   981  	if err != nil {
   982  		setErr(err, false)
   983  		return nil
   984  	}
   985  	node, err := snap.NodeByID(args.NodeID)
   986  	if err != nil {
   987  		setErr(err, false)
   988  		return nil
   989  	}
   990  	if node == nil {
   991  		setErr(fmt.Errorf("Node %q does not exist", args.NodeID), false)
   992  		return nil
   993  	}
   994  	if node.SecretID != args.SecretID {
   995  		setErr(fmt.Errorf("SecretID mismatch"), false)
   996  		return nil
   997  	}
   998  
   999  	alloc, err := snap.AllocByID(args.AllocID)
  1000  	if err != nil {
  1001  		setErr(err, false)
  1002  		return nil
  1003  	}
  1004  	if alloc == nil {
  1005  		setErr(fmt.Errorf("Allocation %q does not exist", args.AllocID), false)
  1006  		return nil
  1007  	}
  1008  	if alloc.NodeID != args.NodeID {
  1009  		setErr(fmt.Errorf("Allocation %q not running on Node %q", args.AllocID, args.NodeID), false)
  1010  		return nil
  1011  	}
  1012  	if alloc.TerminalStatus() {
  1013  		setErr(fmt.Errorf("Can't request Vault token for terminal allocation"), false)
  1014  		return nil
  1015  	}
  1016  
  1017  	// Check the policies
  1018  	policies := alloc.Job.VaultPolicies()
  1019  	if policies == nil {
  1020  		setErr(fmt.Errorf("Job doesn't require Vault policies"), false)
  1021  		return nil
  1022  	}
  1023  	tg, ok := policies[alloc.TaskGroup]
  1024  	if !ok {
  1025  		setErr(fmt.Errorf("Task group does not require Vault policies"), false)
  1026  		return nil
  1027  	}
  1028  
  1029  	var unneeded []string
  1030  	for _, task := range args.Tasks {
  1031  		taskVault := tg[task]
  1032  		if taskVault == nil || len(taskVault.Policies) == 0 {
  1033  			unneeded = append(unneeded, task)
  1034  		}
  1035  	}
  1036  
  1037  	if len(unneeded) != 0 {
  1038  		e := fmt.Errorf("Requested Vault tokens for tasks without defined Vault policies: %s",
  1039  			strings.Join(unneeded, ", "))
  1040  		setErr(e, false)
  1041  		return nil
  1042  	}
  1043  
  1044  	// At this point the request is valid and we should contact Vault for
  1045  	// tokens.
  1046  
  1047  	// Create an error group where we will spin up a fixed set of goroutines to
  1048  	// handle deriving tokens but where if any fails the whole group is
  1049  	// canceled.
  1050  	g, ctx := errgroup.WithContext(context.Background())
  1051  
  1052  	// Cap the handlers
  1053  	handlers := len(args.Tasks)
  1054  	if handlers > maxParallelRequestsPerDerive {
  1055  		handlers = maxParallelRequestsPerDerive
  1056  	}
  1057  
  1058  	// Create the Vault Tokens
  1059  	input := make(chan string, handlers)
  1060  	results := make(map[string]*vapi.Secret, len(args.Tasks))
  1061  	for i := 0; i < handlers; i++ {
  1062  		g.Go(func() error {
  1063  			for {
  1064  				select {
  1065  				case task, ok := <-input:
  1066  					if !ok {
  1067  						return nil
  1068  					}
  1069  
  1070  					secret, err := n.srv.vault.CreateToken(ctx, alloc, task)
  1071  					if err != nil {
  1072  						wrapped := fmt.Errorf("failed to create token for task %q: %v", task, err)
  1073  						if rerr, ok := err.(*structs.RecoverableError); ok && rerr.Recoverable {
  1074  							// If the error is recoverable, propogate it
  1075  							return structs.NewRecoverableError(wrapped, true)
  1076  						}
  1077  
  1078  						return wrapped
  1079  					}
  1080  
  1081  					results[task] = secret
  1082  				case <-ctx.Done():
  1083  					return nil
  1084  				}
  1085  			}
  1086  		})
  1087  	}
  1088  
  1089  	// Send the input
  1090  	go func() {
  1091  		defer close(input)
  1092  		for _, task := range args.Tasks {
  1093  			select {
  1094  			case <-ctx.Done():
  1095  				return
  1096  			case input <- task:
  1097  			}
  1098  		}
  1099  
  1100  	}()
  1101  
  1102  	// Wait for everything to complete or for an error
  1103  	createErr := g.Wait()
  1104  
  1105  	// Retrieve the results
  1106  	accessors := make([]*structs.VaultAccessor, 0, len(results))
  1107  	tokens := make(map[string]string, len(results))
  1108  	for task, secret := range results {
  1109  		w := secret.WrapInfo
  1110  		if w == nil {
  1111  			return fmt.Errorf("Vault returned Secret without WrapInfo")
  1112  		}
  1113  
  1114  		tokens[task] = w.Token
  1115  		accessor := &structs.VaultAccessor{
  1116  			Accessor:    w.WrappedAccessor,
  1117  			Task:        task,
  1118  			NodeID:      alloc.NodeID,
  1119  			AllocID:     alloc.ID,
  1120  			CreationTTL: w.TTL,
  1121  		}
  1122  
  1123  		accessors = append(accessors, accessor)
  1124  	}
  1125  
  1126  	// If there was an error revoke the created tokens
  1127  	if createErr != nil {
  1128  		n.srv.logger.Printf("[ERR] nomad.node: Vault token creation failed: %v", createErr)
  1129  
  1130  		if revokeErr := n.srv.vault.RevokeTokens(context.Background(), accessors, false); revokeErr != nil {
  1131  			n.srv.logger.Printf("[ERR] nomad.node: Vault token revocation failed: %v", revokeErr)
  1132  		}
  1133  
  1134  		if rerr, ok := createErr.(*structs.RecoverableError); ok {
  1135  			reply.Error = rerr
  1136  		} else {
  1137  			reply.Error = structs.NewRecoverableError(createErr, false)
  1138  		}
  1139  
  1140  		return nil
  1141  	}
  1142  
  1143  	// Commit to Raft before returning any of the tokens
  1144  	req := structs.VaultAccessorsRequest{Accessors: accessors}
  1145  	_, index, err := n.srv.raftApply(structs.VaultAccessorRegisterRequestType, &req)
  1146  	if err != nil {
  1147  		n.srv.logger.Printf("[ERR] nomad.client: Register Vault accessors failed: %v", err)
  1148  
  1149  		// Determine if we can recover from the error
  1150  		retry := false
  1151  		switch err {
  1152  		case raft.ErrNotLeader, raft.ErrLeadershipLost, raft.ErrRaftShutdown, raft.ErrEnqueueTimeout:
  1153  			retry = true
  1154  		}
  1155  
  1156  		setErr(err, retry)
  1157  		return nil
  1158  	}
  1159  
  1160  	reply.Index = index
  1161  	reply.Tasks = tokens
  1162  	n.srv.setQueryMeta(&reply.QueryMeta)
  1163  	return nil
  1164  }