github.com/hernad/nomad@v1.6.112/nomad/client_alloc_endpoint.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package nomad
     5  
     6  import (
     7  	"errors"
     8  	"fmt"
     9  	"io"
    10  	"net"
    11  	"time"
    12  
    13  	"github.com/armon/go-metrics"
    14  	"github.com/hashicorp/go-hclog"
    15  	"github.com/hashicorp/go-msgpack/codec"
    16  
    17  	"github.com/hernad/nomad/acl"
    18  	cstructs "github.com/hernad/nomad/client/structs"
    19  	"github.com/hernad/nomad/helper/pointer"
    20  	"github.com/hernad/nomad/nomad/structs"
    21  )
    22  
    23  // ClientAllocations is used to forward RPC requests to the targeted Nomad client's
    24  // Allocation endpoint.
    25  type ClientAllocations struct {
    26  	srv    *Server
    27  	logger hclog.Logger
    28  }
    29  
    30  func NewClientAllocationsEndpoint(srv *Server) *ClientAllocations {
    31  	return &ClientAllocations{srv: srv, logger: srv.logger.Named("client_allocs")}
    32  }
    33  
    34  func (a *ClientAllocations) register() {
    35  	a.srv.streamingRpcs.Register("Allocations.Exec", a.exec)
    36  }
    37  
    38  // GarbageCollectAll is used to garbage collect all allocations on a client.
    39  func (a *ClientAllocations) GarbageCollectAll(args *structs.NodeSpecificRequest, reply *structs.GenericResponse) error {
    40  	// We only allow stale reads since the only potentially stale information is
    41  	// the Node registration and the cost is fairly high for adding another hop
    42  	// in the forwarding chain.
    43  	args.QueryOptions.AllowStale = true
    44  
    45  	authErr := a.srv.Authenticate(nil, args)
    46  
    47  	// Potentially forward to a different region.
    48  	if done, err := a.srv.forward("ClientAllocations.GarbageCollectAll", args, args, reply); done {
    49  		return err
    50  	}
    51  	a.srv.MeasureRPCRate("client_allocations", structs.RateMetricWrite, args)
    52  	if authErr != nil {
    53  		return structs.ErrPermissionDenied
    54  	}
    55  	defer metrics.MeasureSince([]string{"nomad", "client_allocations", "garbage_collect_all"}, time.Now())
    56  
    57  	// Check node read permissions
    58  	if aclObj, err := a.srv.ResolveACL(args); err != nil {
    59  		return err
    60  	} else if aclObj != nil && !aclObj.AllowNodeWrite() {
    61  		return structs.ErrPermissionDenied
    62  	}
    63  
    64  	// Verify the arguments.
    65  	if args.NodeID == "" {
    66  		return errors.New("missing NodeID")
    67  	}
    68  
    69  	// Make sure Node is valid and new enough to support RPC
    70  	snap, err := a.srv.State().Snapshot()
    71  	if err != nil {
    72  		return err
    73  	}
    74  
    75  	_, err = getNodeForRpc(snap, args.NodeID)
    76  	if err != nil {
    77  		return err
    78  	}
    79  
    80  	// Get the connection to the client
    81  	state, ok := a.srv.getNodeConn(args.NodeID)
    82  	if !ok {
    83  		return findNodeConnAndForward(a.srv, args.NodeID, "ClientAllocations.GarbageCollectAll", args, reply)
    84  	}
    85  
    86  	// Make the RPC
    87  	return NodeRpc(state.Session, "Allocations.GarbageCollectAll", args, reply)
    88  }
    89  
    90  // Signal is used to send a signal to an allocation on a client.
    91  func (a *ClientAllocations) Signal(args *structs.AllocSignalRequest, reply *structs.GenericResponse) error {
    92  	// We only allow stale reads since the only potentially stale information is
    93  	// the Node registration and the cost is fairly high for adding another hope
    94  	// in the forwarding chain.
    95  	args.QueryOptions.AllowStale = true
    96  
    97  	authErr := a.srv.Authenticate(nil, args)
    98  
    99  	// Potentially forward to a different region.
   100  	if done, err := a.srv.forward("ClientAllocations.Signal", args, args, reply); done {
   101  		return err
   102  	}
   103  	a.srv.MeasureRPCRate("client_allocations", structs.RateMetricWrite, args)
   104  	if authErr != nil {
   105  		return structs.ErrPermissionDenied
   106  	}
   107  	defer metrics.MeasureSince([]string{"nomad", "client_allocations", "signal"}, time.Now())
   108  
   109  	// Verify the arguments.
   110  	if args.AllocID == "" {
   111  		return errors.New("missing AllocID")
   112  	}
   113  
   114  	// Find the allocation
   115  	snap, err := a.srv.State().Snapshot()
   116  	if err != nil {
   117  		return err
   118  	}
   119  
   120  	alloc, err := getAlloc(snap, args.AllocID)
   121  	if err != nil {
   122  		return err
   123  	}
   124  
   125  	// Check namespace alloc-lifecycle permission.
   126  	if aclObj, err := a.srv.ResolveACL(args); err != nil {
   127  		return err
   128  	} else if aclObj != nil && !aclObj.AllowNsOp(alloc.Namespace, acl.NamespaceCapabilityAllocLifecycle) {
   129  		return structs.ErrPermissionDenied
   130  	}
   131  
   132  	// Make sure Node is valid and new enough to support RPC
   133  	_, err = getNodeForRpc(snap, alloc.NodeID)
   134  	if err != nil {
   135  		return err
   136  	}
   137  
   138  	// Get the connection to the client
   139  	state, ok := a.srv.getNodeConn(alloc.NodeID)
   140  	if !ok {
   141  		return findNodeConnAndForward(a.srv, alloc.NodeID, "ClientAllocations.Signal", args, reply)
   142  	}
   143  
   144  	// Make the RPC
   145  	return NodeRpc(state.Session, "Allocations.Signal", args, reply)
   146  }
   147  
   148  // GarbageCollect is used to garbage collect an allocation on a client.
   149  func (a *ClientAllocations) GarbageCollect(args *structs.AllocSpecificRequest, reply *structs.GenericResponse) error {
   150  	// We only allow stale reads since the only potentially stale information is
   151  	// the Node registration and the cost is fairly high for adding another hop
   152  	// in the forwarding chain.
   153  	args.QueryOptions.AllowStale = true
   154  
   155  	authErr := a.srv.Authenticate(nil, args)
   156  
   157  	// Potentially forward to a different region.
   158  	if done, err := a.srv.forward("ClientAllocations.GarbageCollect", args, args, reply); done {
   159  		return err
   160  	}
   161  	a.srv.MeasureRPCRate("client_allocations", structs.RateMetricWrite, args)
   162  	if authErr != nil {
   163  		return structs.ErrPermissionDenied
   164  	}
   165  	defer metrics.MeasureSince([]string{"nomad", "client_allocations", "garbage_collect"}, time.Now())
   166  
   167  	// Verify the arguments.
   168  	if args.AllocID == "" {
   169  		return errors.New("missing AllocID")
   170  	}
   171  
   172  	// Find the allocation
   173  	snap, err := a.srv.State().Snapshot()
   174  	if err != nil {
   175  		return err
   176  	}
   177  
   178  	alloc, err := getAlloc(snap, args.AllocID)
   179  	if err != nil {
   180  		return err
   181  	}
   182  
   183  	// Check namespace submit-job permission.
   184  	if aclObj, err := a.srv.ResolveACL(args); err != nil {
   185  		return err
   186  	} else if aclObj != nil && !aclObj.AllowNsOp(alloc.Namespace, acl.NamespaceCapabilitySubmitJob) {
   187  		return structs.ErrPermissionDenied
   188  	}
   189  
   190  	// Make sure Node is valid and new enough to support RPC
   191  	_, err = getNodeForRpc(snap, alloc.NodeID)
   192  	if err != nil {
   193  		return err
   194  	}
   195  
   196  	// Get the connection to the client
   197  	state, ok := a.srv.getNodeConn(alloc.NodeID)
   198  	if !ok {
   199  		return findNodeConnAndForward(a.srv, alloc.NodeID, "ClientAllocations.GarbageCollect", args, reply)
   200  	}
   201  
   202  	// Make the RPC
   203  	return NodeRpc(state.Session, "Allocations.GarbageCollect", args, reply)
   204  }
   205  
   206  // Restart is used to trigger a restart of an allocation or a subtask on a client.
   207  func (a *ClientAllocations) Restart(args *structs.AllocRestartRequest, reply *structs.GenericResponse) error {
   208  	// We only allow stale reads since the only potentially stale information is
   209  	// the Node registration and the cost is fairly high for adding another hop
   210  	// in the forwarding chain.
   211  	args.QueryOptions.AllowStale = true
   212  
   213  	authErr := a.srv.Authenticate(nil, args)
   214  
   215  	// Potentially forward to a different region.
   216  	if done, err := a.srv.forward("ClientAllocations.Restart", args, args, reply); done {
   217  		return err
   218  	}
   219  	a.srv.MeasureRPCRate("client_allocations", structs.RateMetricWrite, args)
   220  	if authErr != nil {
   221  		return structs.ErrPermissionDenied
   222  	}
   223  	defer metrics.MeasureSince([]string{"nomad", "client_allocations", "restart"}, time.Now())
   224  
   225  	// Find the allocation
   226  	snap, err := a.srv.State().Snapshot()
   227  	if err != nil {
   228  		return err
   229  	}
   230  
   231  	alloc, err := getAlloc(snap, args.AllocID)
   232  	if err != nil {
   233  		return err
   234  	}
   235  
   236  	// Check for namespace alloc-lifecycle permissions.
   237  	if aclObj, err := a.srv.ResolveACL(args); err != nil {
   238  		return err
   239  	} else if aclObj != nil && !aclObj.AllowNsOp(alloc.Namespace, acl.NamespaceCapabilityAllocLifecycle) {
   240  		return structs.ErrPermissionDenied
   241  	}
   242  
   243  	// Make sure Node is valid and new enough to support RPC
   244  	_, err = getNodeForRpc(snap, alloc.NodeID)
   245  	if err != nil {
   246  		return err
   247  	}
   248  
   249  	// Get the connection to the client
   250  	state, ok := a.srv.getNodeConn(alloc.NodeID)
   251  	if !ok {
   252  		return findNodeConnAndForward(a.srv, alloc.NodeID, "ClientAllocations.Restart", args, reply)
   253  	}
   254  
   255  	// Make the RPC
   256  	return NodeRpc(state.Session, "Allocations.Restart", args, reply)
   257  }
   258  
   259  // Stats is used to collect allocation statistics
   260  func (a *ClientAllocations) Stats(args *cstructs.AllocStatsRequest, reply *cstructs.AllocStatsResponse) error {
   261  	// We only allow stale reads since the only potentially stale information is
   262  	// the Node registration and the cost is fairly high for adding another hop
   263  	// in the forwarding chain.
   264  	args.QueryOptions.AllowStale = true
   265  
   266  	authErr := a.srv.Authenticate(nil, args)
   267  
   268  	// Potentially forward to a different region.
   269  	if done, err := a.srv.forward("ClientAllocations.Stats", args, args, reply); done {
   270  		return err
   271  	}
   272  	a.srv.MeasureRPCRate("client_allocations", structs.RateMetricRead, args)
   273  	if authErr != nil {
   274  		return structs.ErrPermissionDenied
   275  	}
   276  	defer metrics.MeasureSince([]string{"nomad", "client_allocations", "stats"}, time.Now())
   277  
   278  	// Find the allocation
   279  	snap, err := a.srv.State().Snapshot()
   280  	if err != nil {
   281  		return err
   282  	}
   283  
   284  	alloc, err := getAlloc(snap, args.AllocID)
   285  	if err != nil {
   286  		return err
   287  	}
   288  
   289  	// Check for namespace read-job permissions.
   290  	if aclObj, err := a.srv.ResolveACL(args); err != nil {
   291  		return err
   292  	} else if aclObj != nil && !aclObj.AllowNsOp(alloc.Namespace, acl.NamespaceCapabilityReadJob) {
   293  		return structs.ErrPermissionDenied
   294  	}
   295  
   296  	// Make sure Node is valid and new enough to support RPC
   297  	_, err = getNodeForRpc(snap, alloc.NodeID)
   298  	if err != nil {
   299  		return err
   300  	}
   301  
   302  	// Get the connection to the client
   303  	state, ok := a.srv.getNodeConn(alloc.NodeID)
   304  	if !ok {
   305  		return findNodeConnAndForward(a.srv, alloc.NodeID, "ClientAllocations.Stats", args, reply)
   306  	}
   307  
   308  	// Make the RPC
   309  	return NodeRpc(state.Session, "Allocations.Stats", args, reply)
   310  }
   311  
   312  // Checks is the server implementation of the allocation checks RPC. The
   313  // ultimate response is provided by the node running the allocation. This RPC
   314  // is needed to handle queries which hit the server agent API directly, or via
   315  // another node which is not running the allocation.
   316  func (a *ClientAllocations) Checks(args *cstructs.AllocChecksRequest, reply *cstructs.AllocChecksResponse) error {
   317  
   318  	// We only allow stale reads since the only potentially stale information
   319  	// is the Node registration and the cost is fairly high for adding another
   320  	// hop in the forwarding chain.
   321  	args.QueryOptions.AllowStale = true
   322  
   323  	authErr := a.srv.Authenticate(nil, args)
   324  
   325  	// Potentially forward to a different region.
   326  	if done, err := a.srv.forward("ClientAllocations.Checks", args, args, reply); done {
   327  		return err
   328  	}
   329  	a.srv.MeasureRPCRate("client_allocations", structs.RateMetricRead, args)
   330  	if authErr != nil {
   331  		return structs.ErrPermissionDenied
   332  	}
   333  	defer metrics.MeasureSince([]string{"nomad", "client_allocations", "checks"}, time.Now())
   334  
   335  	// Grab the state snapshot, as we need this to perform lookups for a number
   336  	// of objects, all things being well.
   337  	snap, err := a.srv.State().Snapshot()
   338  	if err != nil {
   339  		return err
   340  	}
   341  
   342  	// Get the full allocation object, so we have information such as the
   343  	// namespace and node ID.
   344  	alloc, err := getAlloc(snap, args.AllocID)
   345  	if err != nil {
   346  		return err
   347  	}
   348  
   349  	// Check for namespace read-job permissions.
   350  	if aclObj, err := a.srv.ResolveACL(args); err != nil {
   351  		return err
   352  	} else if aclObj != nil && !aclObj.AllowNsOp(alloc.Namespace, acl.NamespaceCapabilityReadJob) {
   353  		return structs.ErrPermissionDenied
   354  	}
   355  
   356  	// Make sure Node is valid and new enough to support RPC.
   357  	if _, err = getNodeForRpc(snap, alloc.NodeID); err != nil {
   358  		return err
   359  	}
   360  
   361  	// Get the connection to the client.
   362  	state, ok := a.srv.getNodeConn(alloc.NodeID)
   363  	if !ok {
   364  		return findNodeConnAndForward(a.srv, alloc.NodeID, "ClientAllocations.Checks", args, reply)
   365  	}
   366  
   367  	// Make the RPC
   368  	return NodeRpc(state.Session, "Allocations.Checks", args, reply)
   369  }
   370  
   371  // exec is used to execute command in a running task
   372  func (a *ClientAllocations) exec(conn io.ReadWriteCloser) {
   373  	defer conn.Close()
   374  	defer metrics.MeasureSince([]string{"nomad", "alloc", "exec"}, time.Now())
   375  
   376  	// Decode the arguments
   377  	var args cstructs.AllocExecRequest
   378  	decoder := codec.NewDecoder(conn, structs.MsgpackHandle)
   379  	encoder := codec.NewEncoder(conn, structs.MsgpackHandle)
   380  
   381  	if err := decoder.Decode(&args); err != nil {
   382  		handleStreamResultError(err, pointer.Of(int64(500)), encoder)
   383  		return
   384  	}
   385  
   386  	authErr := a.srv.Authenticate(nil, &args)
   387  
   388  	// Check if we need to forward to a different region
   389  	if r := args.RequestRegion(); r != a.srv.Region() {
   390  		forwardRegionStreamingRpc(a.srv, conn, encoder, &args, "Allocations.Exec",
   391  			args.AllocID, &args.QueryOptions)
   392  		return
   393  	}
   394  	a.srv.MeasureRPCRate("client_allocations", structs.RateMetricWrite, &args)
   395  	if authErr != nil {
   396  		handleStreamResultError(structs.ErrPermissionDenied, nil, encoder)
   397  		return
   398  	}
   399  
   400  	// Verify the arguments.
   401  	if args.AllocID == "" {
   402  		handleStreamResultError(errors.New("missing AllocID"), pointer.Of(int64(400)), encoder)
   403  		return
   404  	}
   405  
   406  	// Retrieve the allocation
   407  	snap, err := a.srv.State().Snapshot()
   408  	if err != nil {
   409  		handleStreamResultError(err, nil, encoder)
   410  		return
   411  	}
   412  
   413  	alloc, err := getAlloc(snap, args.AllocID)
   414  	if structs.IsErrUnknownAllocation(err) {
   415  		handleStreamResultError(err, pointer.Of(int64(404)), encoder)
   416  		return
   417  	}
   418  	if err != nil {
   419  		handleStreamResultError(err, nil, encoder)
   420  		return
   421  	}
   422  
   423  	// Check node read permissions
   424  	if aclObj, err := a.srv.ResolveACL(&args); err != nil {
   425  		handleStreamResultError(err, nil, encoder)
   426  		return
   427  	} else if aclObj != nil && !aclObj.AllowNsOp(alloc.Namespace, acl.NamespaceCapabilityAllocExec) {
   428  		// client ultimately checks if AllocNodeExec is required
   429  		handleStreamResultError(structs.ErrPermissionDenied, nil, encoder)
   430  		return
   431  	}
   432  
   433  	nodeID := alloc.NodeID
   434  
   435  	// Make sure Node is valid and new enough to support RPC
   436  	node, err := snap.NodeByID(nil, nodeID)
   437  	if err != nil {
   438  		handleStreamResultError(err, pointer.Of(int64(500)), encoder)
   439  		return
   440  	}
   441  
   442  	if node == nil {
   443  		err := fmt.Errorf("Unknown node %q", nodeID)
   444  		handleStreamResultError(err, pointer.Of(int64(400)), encoder)
   445  		return
   446  	}
   447  
   448  	if err := nodeSupportsRpc(node); err != nil {
   449  		handleStreamResultError(err, pointer.Of(int64(400)), encoder)
   450  		return
   451  	}
   452  
   453  	// Get the connection to the client either by forwarding to another server
   454  	// or creating a direct stream
   455  	var clientConn net.Conn
   456  	state, ok := a.srv.getNodeConn(nodeID)
   457  	if !ok {
   458  		// Determine the Server that has a connection to the node.
   459  		srv, err := a.srv.serverWithNodeConn(nodeID, a.srv.Region())
   460  		if err != nil {
   461  			var code *int64
   462  			if structs.IsErrNoNodeConn(err) {
   463  				code = pointer.Of(int64(404))
   464  			}
   465  			handleStreamResultError(err, code, encoder)
   466  			return
   467  		}
   468  
   469  		// Get a connection to the server
   470  		conn, err := a.srv.streamingRpc(srv, "Allocations.Exec")
   471  		if err != nil {
   472  			handleStreamResultError(err, nil, encoder)
   473  			return
   474  		}
   475  
   476  		clientConn = conn
   477  	} else {
   478  		stream, err := NodeStreamingRpc(state.Session, "Allocations.Exec")
   479  		if err != nil {
   480  			handleStreamResultError(err, nil, encoder)
   481  			return
   482  		}
   483  		clientConn = stream
   484  	}
   485  	defer clientConn.Close()
   486  
   487  	// Send the request.
   488  	outEncoder := codec.NewEncoder(clientConn, structs.MsgpackHandle)
   489  	if err := outEncoder.Encode(args); err != nil {
   490  		handleStreamResultError(err, nil, encoder)
   491  		return
   492  	}
   493  
   494  	structs.Bridge(conn, clientConn)
   495  }