github.com/hooklift/nomad@v0.5.7-0.20170407200202-db11e7dd7b55/nomad/node_endpoint.go

github.com/hooklift/nomad@v0.5.7-0.20170407200202-db11e7dd7b55/nomad/node_endpoint.go (about)

     1  package nomad
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"strings"
     7  	"sync"
     8  	"time"
     9  
    10  	"golang.org/x/sync/errgroup"
    11  
    12  	"github.com/armon/go-metrics"
    13  	"github.com/hashicorp/go-memdb"
    14  	"github.com/hashicorp/go-multierror"
    15  	"github.com/hashicorp/nomad/nomad/state"
    16  	"github.com/hashicorp/nomad/nomad/structs"
    17  	"github.com/hashicorp/raft"
    18  	vapi "github.com/hashicorp/vault/api"
    19  )
    20  
    21  const (
    22  	// batchUpdateInterval is how long we wait to batch updates
    23  	batchUpdateInterval = 50 * time.Millisecond
    24  
    25  	// maxParallelRequestsPerDerive  is the maximum number of parallel Vault
    26  	// create token requests that may be outstanding per derive request
    27  	maxParallelRequestsPerDerive = 16
    28  )
    29  
    30  // Node endpoint is used for client interactions
    31  type Node struct {
    32  	srv *Server
    33  
    34  	// updates holds pending client status updates for allocations
    35  	updates []*structs.Allocation
    36  
    37  	// updateFuture is used to wait for the pending batch update
    38  	// to complete. This may be nil if no batch is pending.
    39  	updateFuture *batchFuture
    40  
    41  	// updateTimer is the timer that will trigger the next batch
    42  	// update, and may be nil if there is no batch pending.
    43  	updateTimer *time.Timer
    44  
    45  	// updatesLock synchronizes access to the updates list,
    46  	// the future and the timer.
    47  	updatesLock sync.Mutex
    48  }
    49  
    50  // Register is used to upsert a client that is available for scheduling
    51  func (n *Node) Register(args *structs.NodeRegisterRequest, reply *structs.NodeUpdateResponse) error {
    52  	if done, err := n.srv.forward("Node.Register", args, args, reply); done {
    53  		return err
    54  	}
    55  	defer metrics.MeasureSince([]string{"nomad", "client", "register"}, time.Now())
    56  
    57  	// Validate the arguments
    58  	if args.Node == nil {
    59  		return fmt.Errorf("missing node for client registration")
    60  	}
    61  	if args.Node.ID == "" {
    62  		return fmt.Errorf("missing node ID for client registration")
    63  	}
    64  	if args.Node.Datacenter == "" {
    65  		return fmt.Errorf("missing datacenter for client registration")
    66  	}
    67  	if args.Node.Name == "" {
    68  		return fmt.Errorf("missing node name for client registration")
    69  	}
    70  	if len(args.Node.Attributes) == 0 {
    71  		return fmt.Errorf("missing attributes for client registration")
    72  	}
    73  
    74  	// COMPAT: Remove after 0.6
    75  	// Need to check if this node is <0.4.x since SecretID is new in 0.5
    76  	pre, err := nodePreSecretID(args.Node)
    77  	if err != nil {
    78  		return err
    79  	}
    80  	if args.Node.SecretID == "" && !pre {
    81  		return fmt.Errorf("missing node secret ID for client registration")
    82  	}
    83  
    84  	// Default the status if none is given
    85  	if args.Node.Status == "" {
    86  		args.Node.Status = structs.NodeStatusInit
    87  	}
    88  	if !structs.ValidNodeStatus(args.Node.Status) {
    89  		return fmt.Errorf("invalid status for node")
    90  	}
    91  
    92  	// Set the timestamp when the node is registered
    93  	args.Node.StatusUpdatedAt = time.Now().Unix()
    94  
    95  	// Compute the node class
    96  	if err := args.Node.ComputeClass(); err != nil {
    97  		return fmt.Errorf("failed to computed node class: %v", err)
    98  	}
    99  
   100  	// Look for the node so we can detect a state transistion
   101  	snap, err := n.srv.fsm.State().Snapshot()
   102  	if err != nil {
   103  		return err
   104  	}
   105  
   106  	ws := memdb.NewWatchSet()
   107  	originalNode, err := snap.NodeByID(ws, args.Node.ID)
   108  	if err != nil {
   109  		return err
   110  	}
   111  
   112  	// Check if the SecretID has been tampered with
   113  	if !pre && originalNode != nil {
   114  		if args.Node.SecretID != originalNode.SecretID && originalNode.SecretID != "" {
   115  			return fmt.Errorf("node secret ID does not match. Not registering node.")
   116  		}
   117  	}
   118  
   119  	// Commit this update via Raft
   120  	_, index, err := n.srv.raftApply(structs.NodeRegisterRequestType, args)
   121  	if err != nil {
   122  		n.srv.logger.Printf("[ERR] nomad.client: Register failed: %v", err)
   123  		return err
   124  	}
   125  	reply.NodeModifyIndex = index
   126  
   127  	// Check if we should trigger evaluations
   128  	originalStatus := structs.NodeStatusInit
   129  	if originalNode != nil {
   130  		originalStatus = originalNode.Status
   131  	}
   132  	transitionToReady := transitionedToReady(args.Node.Status, originalStatus)
   133  	if structs.ShouldDrainNode(args.Node.Status) || transitionToReady {
   134  		evalIDs, evalIndex, err := n.createNodeEvals(args.Node.ID, index)
   135  		if err != nil {
   136  			n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   137  			return err
   138  		}
   139  		reply.EvalIDs = evalIDs
   140  		reply.EvalCreateIndex = evalIndex
   141  	}
   142  
   143  	// Check if we need to setup a heartbeat
   144  	if !args.Node.TerminalStatus() {
   145  		ttl, err := n.srv.resetHeartbeatTimer(args.Node.ID)
   146  		if err != nil {
   147  			n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err)
   148  			return err
   149  		}
   150  		reply.HeartbeatTTL = ttl
   151  	}
   152  
   153  	// Set the reply index
   154  	reply.Index = index
   155  	snap, err = n.srv.fsm.State().Snapshot()
   156  	if err != nil {
   157  		return err
   158  	}
   159  
   160  	n.srv.peerLock.RLock()
   161  	defer n.srv.peerLock.RUnlock()
   162  	if err := n.constructNodeServerInfoResponse(snap, reply); err != nil {
   163  		n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err)
   164  		return err
   165  	}
   166  
   167  	return nil
   168  }
   169  
   170  // nodePreSecretID is a helper that returns whether the node is on a version
   171  // that is before SecretIDs were introduced
   172  func nodePreSecretID(node *structs.Node) (bool, error) {
   173  	a := node.Attributes
   174  	if a == nil {
   175  		return false, fmt.Errorf("node doesn't have attributes set")
   176  	}
   177  
   178  	v, ok := a["nomad.version"]
   179  	if !ok {
   180  		return false, fmt.Errorf("missing Nomad version in attributes")
   181  	}
   182  
   183  	return !strings.HasPrefix(v, "0.5"), nil
   184  }
   185  
   186  // updateNodeUpdateResponse assumes the n.srv.peerLock is held for reading.
   187  func (n *Node) constructNodeServerInfoResponse(snap *state.StateSnapshot, reply *structs.NodeUpdateResponse) error {
   188  	reply.LeaderRPCAddr = string(n.srv.raft.Leader())
   189  
   190  	// Reply with config information required for future RPC requests
   191  	reply.Servers = make([]*structs.NodeServerInfo, 0, len(n.srv.localPeers))
   192  	for k, v := range n.srv.localPeers {
   193  		reply.Servers = append(reply.Servers,
   194  			&structs.NodeServerInfo{
   195  				RPCAdvertiseAddr: string(k),
   196  				RPCMajorVersion:  int32(v.MajorVersion),
   197  				RPCMinorVersion:  int32(v.MinorVersion),
   198  				Datacenter:       v.Datacenter,
   199  			})
   200  	}
   201  
   202  	// TODO(sean@): Use an indexed node count instead
   203  	//
   204  	// Snapshot is used only to iterate over all nodes to create a node
   205  	// count to send back to Nomad Clients in their heartbeat so Clients
   206  	// can estimate the size of the cluster.
   207  	ws := memdb.NewWatchSet()
   208  	iter, err := snap.Nodes(ws)
   209  	if err == nil {
   210  		for {
   211  			raw := iter.Next()
   212  			if raw == nil {
   213  				break
   214  			}
   215  			reply.NumNodes++
   216  		}
   217  	}
   218  
   219  	return nil
   220  }
   221  
   222  // Deregister is used to remove a client from the cluster. If a client should
   223  // just be made unavailable for scheduling, a status update is preferred.
   224  func (n *Node) Deregister(args *structs.NodeDeregisterRequest, reply *structs.NodeUpdateResponse) error {
   225  	if done, err := n.srv.forward("Node.Deregister", args, args, reply); done {
   226  		return err
   227  	}
   228  	defer metrics.MeasureSince([]string{"nomad", "client", "deregister"}, time.Now())
   229  
   230  	// Verify the arguments
   231  	if args.NodeID == "" {
   232  		return fmt.Errorf("missing node ID for client deregistration")
   233  	}
   234  
   235  	// Commit this update via Raft
   236  	_, index, err := n.srv.raftApply(structs.NodeDeregisterRequestType, args)
   237  	if err != nil {
   238  		n.srv.logger.Printf("[ERR] nomad.client: Deregister failed: %v", err)
   239  		return err
   240  	}
   241  
   242  	// Clear the heartbeat timer if any
   243  	n.srv.clearHeartbeatTimer(args.NodeID)
   244  
   245  	// Create the evaluations for this node
   246  	evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
   247  	if err != nil {
   248  		n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   249  		return err
   250  	}
   251  
   252  	// Determine if there are any Vault accessors on the node
   253  	ws := memdb.NewWatchSet()
   254  	accessors, err := n.srv.State().VaultAccessorsByNode(ws, args.NodeID)
   255  	if err != nil {
   256  		n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for node %q failed: %v", args.NodeID, err)
   257  		return err
   258  	}
   259  
   260  	if l := len(accessors); l != 0 {
   261  		n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors on node %q due to deregister", l, args.NodeID)
   262  		if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil {
   263  			n.srv.logger.Printf("[ERR] nomad.client: revoking accessors for node %q failed: %v", args.NodeID, err)
   264  			return err
   265  		}
   266  	}
   267  
   268  	// Setup the reply
   269  	reply.EvalIDs = evalIDs
   270  	reply.EvalCreateIndex = evalIndex
   271  	reply.NodeModifyIndex = index
   272  	reply.Index = index
   273  	return nil
   274  }
   275  
   276  // UpdateStatus is used to update the status of a client node
   277  func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *structs.NodeUpdateResponse) error {
   278  	if done, err := n.srv.forward("Node.UpdateStatus", args, args, reply); done {
   279  		return err
   280  	}
   281  	defer metrics.MeasureSince([]string{"nomad", "client", "update_status"}, time.Now())
   282  
   283  	// Verify the arguments
   284  	if args.NodeID == "" {
   285  		return fmt.Errorf("missing node ID for client status update")
   286  	}
   287  	if !structs.ValidNodeStatus(args.Status) {
   288  		return fmt.Errorf("invalid status for node")
   289  	}
   290  
   291  	// Look for the node
   292  	snap, err := n.srv.fsm.State().Snapshot()
   293  	if err != nil {
   294  		return err
   295  	}
   296  
   297  	ws := memdb.NewWatchSet()
   298  	node, err := snap.NodeByID(ws, args.NodeID)
   299  	if err != nil {
   300  		return err
   301  	}
   302  	if node == nil {
   303  		return fmt.Errorf("node not found")
   304  	}
   305  
   306  	// XXX: Could use the SecretID here but have to update the heartbeat system
   307  	// to track SecretIDs.
   308  
   309  	// Update the timestamp of when the node status was updated
   310  	node.StatusUpdatedAt = time.Now().Unix()
   311  
   312  	// Commit this update via Raft
   313  	var index uint64
   314  	if node.Status != args.Status {
   315  		_, index, err = n.srv.raftApply(structs.NodeUpdateStatusRequestType, args)
   316  		if err != nil {
   317  			n.srv.logger.Printf("[ERR] nomad.client: status update failed: %v", err)
   318  			return err
   319  		}
   320  		reply.NodeModifyIndex = index
   321  	}
   322  
   323  	// Check if we should trigger evaluations
   324  	transitionToReady := transitionedToReady(args.Status, node.Status)
   325  	if structs.ShouldDrainNode(args.Status) || transitionToReady {
   326  		evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
   327  		if err != nil {
   328  			n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   329  			return err
   330  		}
   331  		reply.EvalIDs = evalIDs
   332  		reply.EvalCreateIndex = evalIndex
   333  	}
   334  
   335  	// Check if we need to setup a heartbeat
   336  	switch args.Status {
   337  	case structs.NodeStatusDown:
   338  		// Determine if there are any Vault accessors on the node
   339  		accessors, err := n.srv.State().VaultAccessorsByNode(ws, args.NodeID)
   340  		if err != nil {
   341  			n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for node %q failed: %v", args.NodeID, err)
   342  			return err
   343  		}
   344  
   345  		if l := len(accessors); l != 0 {
   346  			n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors on node %q due to down state", l, args.NodeID)
   347  			if err := n.srv.vault.RevokeTokens(context.Background(), accessors, true); err != nil {
   348  				n.srv.logger.Printf("[ERR] nomad.client: revoking accessors for node %q failed: %v", args.NodeID, err)
   349  				return err
   350  			}
   351  		}
   352  	default:
   353  		ttl, err := n.srv.resetHeartbeatTimer(args.NodeID)
   354  		if err != nil {
   355  			n.srv.logger.Printf("[ERR] nomad.client: heartbeat reset failed: %v", err)
   356  			return err
   357  		}
   358  		reply.HeartbeatTTL = ttl
   359  	}
   360  
   361  	// Set the reply index and leader
   362  	reply.Index = index
   363  	n.srv.peerLock.RLock()
   364  	defer n.srv.peerLock.RUnlock()
   365  	if err := n.constructNodeServerInfoResponse(snap, reply); err != nil {
   366  		n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err)
   367  		return err
   368  	}
   369  
   370  	return nil
   371  }
   372  
   373  // transitionedToReady is a helper that takes a nodes new and old status and
   374  // returns whether it has transistioned to ready.
   375  func transitionedToReady(newStatus, oldStatus string) bool {
   376  	initToReady := oldStatus == structs.NodeStatusInit && newStatus == structs.NodeStatusReady
   377  	terminalToReady := oldStatus == structs.NodeStatusDown && newStatus == structs.NodeStatusReady
   378  	return initToReady || terminalToReady
   379  }
   380  
   381  // UpdateDrain is used to update the drain mode of a client node
   382  func (n *Node) UpdateDrain(args *structs.NodeUpdateDrainRequest,
   383  	reply *structs.NodeDrainUpdateResponse) error {
   384  	if done, err := n.srv.forward("Node.UpdateDrain", args, args, reply); done {
   385  		return err
   386  	}
   387  	defer metrics.MeasureSince([]string{"nomad", "client", "update_drain"}, time.Now())
   388  
   389  	// Verify the arguments
   390  	if args.NodeID == "" {
   391  		return fmt.Errorf("missing node ID for drain update")
   392  	}
   393  
   394  	// Look for the node
   395  	snap, err := n.srv.fsm.State().Snapshot()
   396  	if err != nil {
   397  		return err
   398  	}
   399  	ws := memdb.NewWatchSet()
   400  	node, err := snap.NodeByID(ws, args.NodeID)
   401  	if err != nil {
   402  		return err
   403  	}
   404  	if node == nil {
   405  		return fmt.Errorf("node not found")
   406  	}
   407  
   408  	// Update the timestamp to
   409  	node.StatusUpdatedAt = time.Now().Unix()
   410  
   411  	// Commit this update via Raft
   412  	var index uint64
   413  	if node.Drain != args.Drain {
   414  		_, index, err = n.srv.raftApply(structs.NodeUpdateDrainRequestType, args)
   415  		if err != nil {
   416  			n.srv.logger.Printf("[ERR] nomad.client: drain update failed: %v", err)
   417  			return err
   418  		}
   419  		reply.NodeModifyIndex = index
   420  	}
   421  
   422  	// Always attempt to create Node evaluations because there may be a System
   423  	// job registered that should be evaluated.
   424  	evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, index)
   425  	if err != nil {
   426  		n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   427  		return err
   428  	}
   429  	reply.EvalIDs = evalIDs
   430  	reply.EvalCreateIndex = evalIndex
   431  
   432  	// Set the reply index
   433  	reply.Index = index
   434  	return nil
   435  }
   436  
   437  // Evaluate is used to force a re-evaluation of the node
   438  func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUpdateResponse) error {
   439  	if done, err := n.srv.forward("Node.Evaluate", args, args, reply); done {
   440  		return err
   441  	}
   442  	defer metrics.MeasureSince([]string{"nomad", "client", "evaluate"}, time.Now())
   443  
   444  	// Verify the arguments
   445  	if args.NodeID == "" {
   446  		return fmt.Errorf("missing node ID for evaluation")
   447  	}
   448  
   449  	// Look for the node
   450  	snap, err := n.srv.fsm.State().Snapshot()
   451  	if err != nil {
   452  		return err
   453  	}
   454  	ws := memdb.NewWatchSet()
   455  	node, err := snap.NodeByID(ws, args.NodeID)
   456  	if err != nil {
   457  		return err
   458  	}
   459  	if node == nil {
   460  		return fmt.Errorf("node not found")
   461  	}
   462  
   463  	// Create the evaluation
   464  	evalIDs, evalIndex, err := n.createNodeEvals(args.NodeID, node.ModifyIndex)
   465  	if err != nil {
   466  		n.srv.logger.Printf("[ERR] nomad.client: eval creation failed: %v", err)
   467  		return err
   468  	}
   469  	reply.EvalIDs = evalIDs
   470  	reply.EvalCreateIndex = evalIndex
   471  
   472  	// Set the reply index
   473  	reply.Index = evalIndex
   474  
   475  	n.srv.peerLock.RLock()
   476  	defer n.srv.peerLock.RUnlock()
   477  	if err := n.constructNodeServerInfoResponse(snap, reply); err != nil {
   478  		n.srv.logger.Printf("[ERR] nomad.client: failed to populate NodeUpdateResponse: %v", err)
   479  		return err
   480  	}
   481  	return nil
   482  }
   483  
   484  // GetNode is used to request information about a specific node
   485  func (n *Node) GetNode(args *structs.NodeSpecificRequest,
   486  	reply *structs.SingleNodeResponse) error {
   487  	if done, err := n.srv.forward("Node.GetNode", args, args, reply); done {
   488  		return err
   489  	}
   490  	defer metrics.MeasureSince([]string{"nomad", "client", "get_node"}, time.Now())
   491  
   492  	// Setup the blocking query
   493  	opts := blockingOptions{
   494  		queryOpts: &args.QueryOptions,
   495  		queryMeta: &reply.QueryMeta,
   496  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   497  			// Verify the arguments
   498  			if args.NodeID == "" {
   499  				return fmt.Errorf("missing node ID")
   500  			}
   501  
   502  			// Look for the node
   503  			out, err := state.NodeByID(ws, args.NodeID)
   504  			if err != nil {
   505  				return err
   506  			}
   507  
   508  			// Setup the output
   509  			if out != nil {
   510  				// Clear the secret ID
   511  				reply.Node = out.Copy()
   512  				reply.Node.SecretID = ""
   513  				reply.Index = out.ModifyIndex
   514  			} else {
   515  				// Use the last index that affected the nodes table
   516  				index, err := state.Index("nodes")
   517  				if err != nil {
   518  					return err
   519  				}
   520  				reply.Node = nil
   521  				reply.Index = index
   522  			}
   523  
   524  			// Set the query response
   525  			n.srv.setQueryMeta(&reply.QueryMeta)
   526  			return nil
   527  		}}
   528  	return n.srv.blockingRPC(&opts)
   529  }
   530  
   531  // GetAllocs is used to request allocations for a specific node
   532  func (n *Node) GetAllocs(args *structs.NodeSpecificRequest,
   533  	reply *structs.NodeAllocsResponse) error {
   534  	if done, err := n.srv.forward("Node.GetAllocs", args, args, reply); done {
   535  		return err
   536  	}
   537  	defer metrics.MeasureSince([]string{"nomad", "client", "get_allocs"}, time.Now())
   538  
   539  	// Verify the arguments
   540  	if args.NodeID == "" {
   541  		return fmt.Errorf("missing node ID")
   542  	}
   543  
   544  	// Setup the blocking query
   545  	opts := blockingOptions{
   546  		queryOpts: &args.QueryOptions,
   547  		queryMeta: &reply.QueryMeta,
   548  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   549  			// Look for the node
   550  			allocs, err := state.AllocsByNode(ws, args.NodeID)
   551  			if err != nil {
   552  				return err
   553  			}
   554  
   555  			// Setup the output
   556  			if len(allocs) != 0 {
   557  				reply.Allocs = allocs
   558  				for _, alloc := range allocs {
   559  					reply.Index = maxUint64(reply.Index, alloc.ModifyIndex)
   560  				}
   561  			} else {
   562  				reply.Allocs = nil
   563  
   564  				// Use the last index that affected the nodes table
   565  				index, err := state.Index("allocs")
   566  				if err != nil {
   567  					return err
   568  				}
   569  
   570  				// Must provide non-zero index to prevent blocking
   571  				// Index 1 is impossible anyways (due to Raft internals)
   572  				if index == 0 {
   573  					reply.Index = 1
   574  				} else {
   575  					reply.Index = index
   576  				}
   577  			}
   578  			return nil
   579  		}}
   580  	return n.srv.blockingRPC(&opts)
   581  }
   582  
   583  // GetClientAllocs is used to request a lightweight list of alloc modify indexes
   584  // per allocation.
   585  func (n *Node) GetClientAllocs(args *structs.NodeSpecificRequest,
   586  	reply *structs.NodeClientAllocsResponse) error {
   587  	if done, err := n.srv.forward("Node.GetClientAllocs", args, args, reply); done {
   588  		return err
   589  	}
   590  	defer metrics.MeasureSince([]string{"nomad", "client", "get_client_allocs"}, time.Now())
   591  
   592  	// Verify the arguments
   593  	if args.NodeID == "" {
   594  		return fmt.Errorf("missing node ID")
   595  	}
   596  
   597  	// Setup the blocking query
   598  	opts := blockingOptions{
   599  		queryOpts: &args.QueryOptions,
   600  		queryMeta: &reply.QueryMeta,
   601  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   602  			// Look for the node
   603  			node, err := state.NodeByID(ws, args.NodeID)
   604  			if err != nil {
   605  				return err
   606  			}
   607  
   608  			var allocs []*structs.Allocation
   609  			if node != nil {
   610  				// COMPAT: Remove in 0.6
   611  				// Check if the node should have a SecretID set
   612  				if args.SecretID == "" {
   613  					if pre, err := nodePreSecretID(node); err != nil {
   614  						return err
   615  					} else if !pre {
   616  						return fmt.Errorf("missing node secret ID for client status update")
   617  					}
   618  				} else if args.SecretID != node.SecretID {
   619  					return fmt.Errorf("node secret ID does not match")
   620  				}
   621  
   622  				var err error
   623  				allocs, err = state.AllocsByNode(ws, args.NodeID)
   624  				if err != nil {
   625  					return err
   626  				}
   627  			}
   628  
   629  			reply.Allocs = make(map[string]uint64)
   630  			// Setup the output
   631  			if len(allocs) != 0 {
   632  				for _, alloc := range allocs {
   633  					reply.Allocs[alloc.ID] = alloc.AllocModifyIndex
   634  					reply.Index = maxUint64(reply.Index, alloc.ModifyIndex)
   635  				}
   636  			} else {
   637  				// Use the last index that affected the nodes table
   638  				index, err := state.Index("allocs")
   639  				if err != nil {
   640  					return err
   641  				}
   642  
   643  				// Must provide non-zero index to prevent blocking
   644  				// Index 1 is impossible anyways (due to Raft internals)
   645  				if index == 0 {
   646  					reply.Index = 1
   647  				} else {
   648  					reply.Index = index
   649  				}
   650  			}
   651  			return nil
   652  		}}
   653  	return n.srv.blockingRPC(&opts)
   654  }
   655  
   656  // UpdateAlloc is used to update the client status of an allocation
   657  func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.GenericResponse) error {
   658  	if done, err := n.srv.forward("Node.UpdateAlloc", args, args, reply); done {
   659  		return err
   660  	}
   661  	defer metrics.MeasureSince([]string{"nomad", "client", "update_alloc"}, time.Now())
   662  
   663  	// Ensure at least a single alloc
   664  	if len(args.Alloc) == 0 {
   665  		return fmt.Errorf("must update at least one allocation")
   666  	}
   667  
   668  	// Add this to the batch
   669  	n.updatesLock.Lock()
   670  	n.updates = append(n.updates, args.Alloc...)
   671  
   672  	// Start a new batch if none
   673  	future := n.updateFuture
   674  	if future == nil {
   675  		future = NewBatchFuture()
   676  		n.updateFuture = future
   677  		n.updateTimer = time.AfterFunc(batchUpdateInterval, func() {
   678  			// Get the pending updates
   679  			n.updatesLock.Lock()
   680  			updates := n.updates
   681  			future := n.updateFuture
   682  			n.updates = nil
   683  			n.updateFuture = nil
   684  			n.updateTimer = nil
   685  			n.updatesLock.Unlock()
   686  
   687  			// Perform the batch update
   688  			n.batchUpdate(future, updates)
   689  		})
   690  	}
   691  	n.updatesLock.Unlock()
   692  
   693  	// Wait for the future
   694  	if err := future.Wait(); err != nil {
   695  		return err
   696  	}
   697  
   698  	// Setup the response
   699  	reply.Index = future.Index()
   700  	return nil
   701  }
   702  
   703  // batchUpdate is used to update all the allocations
   704  func (n *Node) batchUpdate(future *batchFuture, updates []*structs.Allocation) {
   705  	// Prepare the batch update
   706  	batch := &structs.AllocUpdateRequest{
   707  		Alloc:        updates,
   708  		WriteRequest: structs.WriteRequest{Region: n.srv.config.Region},
   709  	}
   710  
   711  	// Commit this update via Raft
   712  	var mErr multierror.Error
   713  	_, index, err := n.srv.raftApply(structs.AllocClientUpdateRequestType, batch)
   714  	if err != nil {
   715  		n.srv.logger.Printf("[ERR] nomad.client: alloc update failed: %v", err)
   716  		mErr.Errors = append(mErr.Errors, err)
   717  	}
   718  
   719  	// For each allocation we are updating check if we should revoke any
   720  	// Vault Accessors
   721  	var revoke []*structs.VaultAccessor
   722  	for _, alloc := range updates {
   723  		// Skip any allocation that isn't dead on the client
   724  		if !alloc.Terminated() {
   725  			continue
   726  		}
   727  
   728  		// Determine if there are any Vault accessors for the allocation
   729  		ws := memdb.NewWatchSet()
   730  		accessors, err := n.srv.State().VaultAccessorsByAlloc(ws, alloc.ID)
   731  		if err != nil {
   732  			n.srv.logger.Printf("[ERR] nomad.client: looking up accessors for alloc %q failed: %v", alloc.ID, err)
   733  			mErr.Errors = append(mErr.Errors, err)
   734  		}
   735  
   736  		revoke = append(revoke, accessors...)
   737  	}
   738  
   739  	if l := len(revoke); l != 0 {
   740  		n.srv.logger.Printf("[DEBUG] nomad.client: revoking %d accessors due to terminal allocations", l)
   741  		if err := n.srv.vault.RevokeTokens(context.Background(), revoke, true); err != nil {
   742  			n.srv.logger.Printf("[ERR] nomad.client: batched accessor revocation failed: %v", err)
   743  			mErr.Errors = append(mErr.Errors, err)
   744  		}
   745  	}
   746  
   747  	// Respond to the future
   748  	future.Respond(index, mErr.ErrorOrNil())
   749  }
   750  
   751  // List is used to list the available nodes
   752  func (n *Node) List(args *structs.NodeListRequest,
   753  	reply *structs.NodeListResponse) error {
   754  	if done, err := n.srv.forward("Node.List", args, args, reply); done {
   755  		return err
   756  	}
   757  	defer metrics.MeasureSince([]string{"nomad", "client", "list"}, time.Now())
   758  
   759  	// Setup the blocking query
   760  	opts := blockingOptions{
   761  		queryOpts: &args.QueryOptions,
   762  		queryMeta: &reply.QueryMeta,
   763  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   764  			// Capture all the nodes
   765  			var err error
   766  			var iter memdb.ResultIterator
   767  			if prefix := args.QueryOptions.Prefix; prefix != "" {
   768  				iter, err = state.NodesByIDPrefix(ws, prefix)
   769  			} else {
   770  				iter, err = state.Nodes(ws)
   771  			}
   772  			if err != nil {
   773  				return err
   774  			}
   775  
   776  			var nodes []*structs.NodeListStub
   777  			for {
   778  				raw := iter.Next()
   779  				if raw == nil {
   780  					break
   781  				}
   782  				node := raw.(*structs.Node)
   783  				nodes = append(nodes, node.Stub())
   784  			}
   785  			reply.Nodes = nodes
   786  
   787  			// Use the last index that affected the jobs table
   788  			index, err := state.Index("nodes")
   789  			if err != nil {
   790  				return err
   791  			}
   792  			reply.Index = index
   793  
   794  			// Set the query response
   795  			n.srv.setQueryMeta(&reply.QueryMeta)
   796  			return nil
   797  		}}
   798  	return n.srv.blockingRPC(&opts)
   799  }
   800  
   801  // createNodeEvals is used to create evaluations for each alloc on a node.
   802  // Each Eval is scoped to a job, so we need to potentially trigger many evals.
   803  func (n *Node) createNodeEvals(nodeID string, nodeIndex uint64) ([]string, uint64, error) {
   804  	// Snapshot the state
   805  	snap, err := n.srv.fsm.State().Snapshot()
   806  	if err != nil {
   807  		return nil, 0, fmt.Errorf("failed to snapshot state: %v", err)
   808  	}
   809  
   810  	// Find all the allocations for this node
   811  	ws := memdb.NewWatchSet()
   812  	allocs, err := snap.AllocsByNode(ws, nodeID)
   813  	if err != nil {
   814  		return nil, 0, fmt.Errorf("failed to find allocs for '%s': %v", nodeID, err)
   815  	}
   816  
   817  	sysJobsIter, err := snap.JobsByScheduler(ws, "system")
   818  	if err != nil {
   819  		return nil, 0, fmt.Errorf("failed to find system jobs for '%s': %v", nodeID, err)
   820  	}
   821  
   822  	var sysJobs []*structs.Job
   823  	for job := sysJobsIter.Next(); job != nil; job = sysJobsIter.Next() {
   824  		sysJobs = append(sysJobs, job.(*structs.Job))
   825  	}
   826  
   827  	// Fast-path if nothing to do
   828  	if len(allocs) == 0 && len(sysJobs) == 0 {
   829  		return nil, 0, nil
   830  	}
   831  
   832  	// Create an eval for each JobID affected
   833  	var evals []*structs.Evaluation
   834  	var evalIDs []string
   835  	jobIDs := make(map[string]struct{})
   836  
   837  	for _, alloc := range allocs {
   838  		// Deduplicate on JobID
   839  		if _, ok := jobIDs[alloc.JobID]; ok {
   840  			continue
   841  		}
   842  		jobIDs[alloc.JobID] = struct{}{}
   843  
   844  		// Create a new eval
   845  		eval := &structs.Evaluation{
   846  			ID:              structs.GenerateUUID(),
   847  			Priority:        alloc.Job.Priority,
   848  			Type:            alloc.Job.Type,
   849  			TriggeredBy:     structs.EvalTriggerNodeUpdate,
   850  			JobID:           alloc.JobID,
   851  			NodeID:          nodeID,
   852  			NodeModifyIndex: nodeIndex,
   853  			Status:          structs.EvalStatusPending,
   854  		}
   855  		evals = append(evals, eval)
   856  		evalIDs = append(evalIDs, eval.ID)
   857  	}
   858  
   859  	// Create an evaluation for each system job.
   860  	for _, job := range sysJobs {
   861  		// Still dedup on JobID as the node may already have the system job.
   862  		if _, ok := jobIDs[job.ID]; ok {
   863  			continue
   864  		}
   865  		jobIDs[job.ID] = struct{}{}
   866  
   867  		// Create a new eval
   868  		eval := &structs.Evaluation{
   869  			ID:              structs.GenerateUUID(),
   870  			Priority:        job.Priority,
   871  			Type:            job.Type,
   872  			TriggeredBy:     structs.EvalTriggerNodeUpdate,
   873  			JobID:           job.ID,
   874  			NodeID:          nodeID,
   875  			NodeModifyIndex: nodeIndex,
   876  			Status:          structs.EvalStatusPending,
   877  		}
   878  		evals = append(evals, eval)
   879  		evalIDs = append(evalIDs, eval.ID)
   880  	}
   881  
   882  	// Create the Raft transaction
   883  	update := &structs.EvalUpdateRequest{
   884  		Evals:        evals,
   885  		WriteRequest: structs.WriteRequest{Region: n.srv.config.Region},
   886  	}
   887  
   888  	// Commit this evaluation via Raft
   889  	// XXX: There is a risk of partial failure where the node update succeeds
   890  	// but that the EvalUpdate does not.
   891  	_, evalIndex, err := n.srv.raftApply(structs.EvalUpdateRequestType, update)
   892  	if err != nil {
   893  		return nil, 0, err
   894  	}
   895  	return evalIDs, evalIndex, nil
   896  }
   897  
   898  // batchFuture is used to wait on a batch update to complete
   899  type batchFuture struct {
   900  	doneCh chan struct{}
   901  	err    error
   902  	index  uint64
   903  }
   904  
   905  // NewBatchFuture creates a new batch future
   906  func NewBatchFuture() *batchFuture {
   907  	return &batchFuture{
   908  		doneCh: make(chan struct{}),
   909  	}
   910  }
   911  
   912  // Wait is used to block for the future to complete and returns the error
   913  func (b *batchFuture) Wait() error {
   914  	<-b.doneCh
   915  	return b.err
   916  }
   917  
   918  // Index is used to return the index of the batch, only after Wait()
   919  func (b *batchFuture) Index() uint64 {
   920  	return b.index
   921  }
   922  
   923  // Respond is used to unblock the future
   924  func (b *batchFuture) Respond(index uint64, err error) {
   925  	b.index = index
   926  	b.err = err
   927  	close(b.doneCh)
   928  }
   929  
   930  // DeriveVaultToken is used by the clients to request wrapped Vault tokens for
   931  // tasks
   932  func (n *Node) DeriveVaultToken(args *structs.DeriveVaultTokenRequest,
   933  	reply *structs.DeriveVaultTokenResponse) error {
   934  
   935  	// setErr is a helper for setting the recoverable error on the reply and
   936  	// logging it
   937  	setErr := func(e error, recoverable bool) {
   938  		if e == nil {
   939  			return
   940  		}
   941  		reply.Error = structs.NewRecoverableError(e, recoverable).(*structs.RecoverableError)
   942  		n.srv.logger.Printf("[ERR] nomad.client: DeriveVaultToken failed (recoverable %v): %v", recoverable, e)
   943  	}
   944  
   945  	if done, err := n.srv.forward("Node.DeriveVaultToken", args, args, reply); done {
   946  		setErr(err, structs.IsRecoverable(err) || err == structs.ErrNoLeader)
   947  		return nil
   948  	}
   949  	defer metrics.MeasureSince([]string{"nomad", "client", "derive_vault_token"}, time.Now())
   950  
   951  	// Verify the arguments
   952  	if args.NodeID == "" {
   953  		setErr(fmt.Errorf("missing node ID"), false)
   954  		return nil
   955  	}
   956  	if args.SecretID == "" {
   957  		setErr(fmt.Errorf("missing node SecretID"), false)
   958  		return nil
   959  	}
   960  	if args.AllocID == "" {
   961  		setErr(fmt.Errorf("missing allocation ID"), false)
   962  		return nil
   963  	}
   964  	if len(args.Tasks) == 0 {
   965  		setErr(fmt.Errorf("no tasks specified"), false)
   966  		return nil
   967  	}
   968  
   969  	// Verify the following:
   970  	// * The Node exists and has the correct SecretID
   971  	// * The Allocation exists on the specified node
   972  	// * The allocation contains the given tasks and they each require Vault
   973  	//   tokens
   974  	snap, err := n.srv.fsm.State().Snapshot()
   975  	if err != nil {
   976  		setErr(err, false)
   977  		return nil
   978  	}
   979  	ws := memdb.NewWatchSet()
   980  	node, err := snap.NodeByID(ws, args.NodeID)
   981  	if err != nil {
   982  		setErr(err, false)
   983  		return nil
   984  	}
   985  	if node == nil {
   986  		setErr(fmt.Errorf("Node %q does not exist", args.NodeID), false)
   987  		return nil
   988  	}
   989  	if node.SecretID != args.SecretID {
   990  		setErr(fmt.Errorf("SecretID mismatch"), false)
   991  		return nil
   992  	}
   993  
   994  	alloc, err := snap.AllocByID(ws, args.AllocID)
   995  	if err != nil {
   996  		setErr(err, false)
   997  		return nil
   998  	}
   999  	if alloc == nil {
  1000  		setErr(fmt.Errorf("Allocation %q does not exist", args.AllocID), false)
  1001  		return nil
  1002  	}
  1003  	if alloc.NodeID != args.NodeID {
  1004  		setErr(fmt.Errorf("Allocation %q not running on Node %q", args.AllocID, args.NodeID), false)
  1005  		return nil
  1006  	}
  1007  	if alloc.TerminalStatus() {
  1008  		setErr(fmt.Errorf("Can't request Vault token for terminal allocation"), false)
  1009  		return nil
  1010  	}
  1011  
  1012  	// Check the policies
  1013  	policies := alloc.Job.VaultPolicies()
  1014  	if policies == nil {
  1015  		setErr(fmt.Errorf("Job doesn't require Vault policies"), false)
  1016  		return nil
  1017  	}
  1018  	tg, ok := policies[alloc.TaskGroup]
  1019  	if !ok {
  1020  		setErr(fmt.Errorf("Task group does not require Vault policies"), false)
  1021  		return nil
  1022  	}
  1023  
  1024  	var unneeded []string
  1025  	for _, task := range args.Tasks {
  1026  		taskVault := tg[task]
  1027  		if taskVault == nil || len(taskVault.Policies) == 0 {
  1028  			unneeded = append(unneeded, task)
  1029  		}
  1030  	}
  1031  
  1032  	if len(unneeded) != 0 {
  1033  		e := fmt.Errorf("Requested Vault tokens for tasks without defined Vault policies: %s",
  1034  			strings.Join(unneeded, ", "))
  1035  		setErr(e, false)
  1036  		return nil
  1037  	}
  1038  
  1039  	// At this point the request is valid and we should contact Vault for
  1040  	// tokens.
  1041  
  1042  	// Create an error group where we will spin up a fixed set of goroutines to
  1043  	// handle deriving tokens but where if any fails the whole group is
  1044  	// canceled.
  1045  	g, ctx := errgroup.WithContext(context.Background())
  1046  
  1047  	// Cap the handlers
  1048  	handlers := len(args.Tasks)
  1049  	if handlers > maxParallelRequestsPerDerive {
  1050  		handlers = maxParallelRequestsPerDerive
  1051  	}
  1052  
  1053  	// Create the Vault Tokens
  1054  	input := make(chan string, handlers)
  1055  	results := make(map[string]*vapi.Secret, len(args.Tasks))
  1056  	for i := 0; i < handlers; i++ {
  1057  		g.Go(func() error {
  1058  			for {
  1059  				select {
  1060  				case task, ok := <-input:
  1061  					if !ok {
  1062  						return nil
  1063  					}
  1064  
  1065  					secret, err := n.srv.vault.CreateToken(ctx, alloc, task)
  1066  					if err != nil {
  1067  						wrapped := fmt.Sprintf("failed to create token for task %q on alloc %q: %v", task, alloc.ID, err)
  1068  						return structs.WrapRecoverable(wrapped, err)
  1069  					}
  1070  
  1071  					results[task] = secret
  1072  				case <-ctx.Done():
  1073  					return nil
  1074  				}
  1075  			}
  1076  		})
  1077  	}
  1078  
  1079  	// Send the input
  1080  	go func() {
  1081  		defer close(input)
  1082  		for _, task := range args.Tasks {
  1083  			select {
  1084  			case <-ctx.Done():
  1085  				return
  1086  			case input <- task:
  1087  			}
  1088  		}
  1089  
  1090  	}()
  1091  
  1092  	// Wait for everything to complete or for an error
  1093  	createErr := g.Wait()
  1094  
  1095  	// Retrieve the results
  1096  	accessors := make([]*structs.VaultAccessor, 0, len(results))
  1097  	tokens := make(map[string]string, len(results))
  1098  	for task, secret := range results {
  1099  		w := secret.WrapInfo
  1100  		if w == nil {
  1101  			return fmt.Errorf("Vault returned Secret without WrapInfo")
  1102  		}
  1103  
  1104  		tokens[task] = w.Token
  1105  		accessor := &structs.VaultAccessor{
  1106  			Accessor:    w.WrappedAccessor,
  1107  			Task:        task,
  1108  			NodeID:      alloc.NodeID,
  1109  			AllocID:     alloc.ID,
  1110  			CreationTTL: w.TTL,
  1111  		}
  1112  
  1113  		accessors = append(accessors, accessor)
  1114  	}
  1115  
  1116  	// If there was an error revoke the created tokens
  1117  	if createErr != nil {
  1118  		n.srv.logger.Printf("[ERR] nomad.node: Vault token creation for alloc %q failed: %v", alloc.ID, createErr)
  1119  
  1120  		if revokeErr := n.srv.vault.RevokeTokens(context.Background(), accessors, false); revokeErr != nil {
  1121  			n.srv.logger.Printf("[ERR] nomad.node: Vault token revocation for alloc %q failed: %v", alloc.ID, revokeErr)
  1122  		}
  1123  
  1124  		if rerr, ok := createErr.(*structs.RecoverableError); ok {
  1125  			reply.Error = rerr
  1126  		} else {
  1127  			reply.Error = structs.NewRecoverableError(createErr, false).(*structs.RecoverableError)
  1128  		}
  1129  
  1130  		return nil
  1131  	}
  1132  
  1133  	// Commit to Raft before returning any of the tokens
  1134  	req := structs.VaultAccessorsRequest{Accessors: accessors}
  1135  	_, index, err := n.srv.raftApply(structs.VaultAccessorRegisterRequestType, &req)
  1136  	if err != nil {
  1137  		n.srv.logger.Printf("[ERR] nomad.client: Register Vault accessors for alloc %q failed: %v", alloc.ID, err)
  1138  
  1139  		// Determine if we can recover from the error
  1140  		retry := false
  1141  		switch err {
  1142  		case raft.ErrNotLeader, raft.ErrLeadershipLost, raft.ErrRaftShutdown, raft.ErrEnqueueTimeout:
  1143  			retry = true
  1144  		}
  1145  
  1146  		setErr(err, retry)
  1147  		return nil
  1148  	}
  1149  
  1150  	reply.Index = index
  1151  	reply.Tasks = tokens
  1152  	n.srv.setQueryMeta(&reply.QueryMeta)
  1153  	return nil
  1154  }