github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/client/rpc.go (about)

     1  package client
     2  
     3  import (
     4  	"errors"
     5  	"io"
     6  	"net"
     7  	"net/rpc"
     8  	"strings"
     9  	"time"
    10  
    11  	metrics "github.com/armon/go-metrics"
    12  	"github.com/hashicorp/consul/lib"
    13  	"github.com/hashicorp/go-msgpack/codec"
    14  	"github.com/hashicorp/nomad/client/servers"
    15  	inmem "github.com/hashicorp/nomad/helper/codec"
    16  	"github.com/hashicorp/nomad/helper/pool"
    17  	"github.com/hashicorp/nomad/nomad/structs"
    18  	"github.com/hashicorp/yamux"
    19  )
    20  
    21  // rpcEndpoints holds the RPC endpoints
    22  type rpcEndpoints struct {
    23  	ClientStats *ClientStats
    24  	CSI         *CSI
    25  	FileSystem  *FileSystem
    26  	Allocations *Allocations
    27  	Agent       *Agent
    28  }
    29  
    30  // ClientRPC is used to make a local, client only RPC call
    31  func (c *Client) ClientRPC(method string, args interface{}, reply interface{}) error {
    32  	codec := &inmem.InmemCodec{
    33  		Method: method,
    34  		Args:   args,
    35  		Reply:  reply,
    36  	}
    37  	if err := c.rpcServer.ServeRequest(codec); err != nil {
    38  		return err
    39  	}
    40  	return codec.Err
    41  }
    42  
    43  // StreamingRpcHandler is used to make a local, client only streaming RPC
    44  // call.
    45  func (c *Client) StreamingRpcHandler(method string) (structs.StreamingRpcHandler, error) {
    46  	return c.streamingRpcs.GetHandler(method)
    47  }
    48  
    49  // RPC is used to forward an RPC call to a nomad server, or fail if no servers.
    50  func (c *Client) RPC(method string, args interface{}, reply interface{}) error {
    51  	// Invoke the RPCHandler if it exists
    52  	if c.config.RPCHandler != nil {
    53  		return c.config.RPCHandler.RPC(method, args, reply)
    54  	}
    55  
    56  	// We will try to automatically retry requests that fail due to things like server unavailability
    57  	// but instead of retrying forever, lets have a solid upper-bound
    58  	deadline := time.Now()
    59  
    60  	// A reasonable amount of time for leader election. Note when servers forward() our RPC requests
    61  	// to the leader they may also allow for an RPCHoldTimeout while waiting for leader election.
    62  	// That's OK, we won't double up because we are using it here not as a sleep but
    63  	// as a hint to give up
    64  	deadline = deadline.Add(c.config.RPCHoldTimeout)
    65  
    66  	// If its a blocking query, allow the time specified by the request
    67  	if info, ok := args.(structs.RPCInfo); ok {
    68  		deadline = deadline.Add(info.TimeToBlock())
    69  	}
    70  
    71  TRY:
    72  	server := c.servers.FindServer()
    73  	if server == nil {
    74  		return noServersErr
    75  	}
    76  
    77  	// Make the request.
    78  	rpcErr := c.connPool.RPC(c.Region(), server.Addr, c.RPCMajorVersion(), method, args, reply)
    79  
    80  	if rpcErr == nil {
    81  		c.fireRpcRetryWatcher()
    82  		return nil
    83  	}
    84  
    85  	// If shutting down, exit without logging the error
    86  	select {
    87  	case <-c.shutdownCh:
    88  		return nil
    89  	default:
    90  	}
    91  
    92  	// Move off to another server, and see if we can retry.
    93  	c.rpcLogger.Error("error performing RPC to server", "error", rpcErr, "rpc", method, "server", server.Addr)
    94  	c.servers.NotifyFailedServer(server)
    95  
    96  	if !canRetry(args, rpcErr) {
    97  		c.rpcLogger.Error("error performing RPC to server which is not safe to automatically retry", "error", rpcErr, "rpc", method, "server", server.Addr)
    98  		return rpcErr
    99  	}
   100  	if time.Now().After(deadline) {
   101  		// Blocking queries are tricky.  jitters and rpcholdtimes in multiple places can result in our server call taking longer than we wanted it to. For example:
   102  		// a block time of 5s may easily turn into the server blocking for 10s since it applies its own RPCHoldTime. If the server dies at t=7s we still want to retry
   103  		// so before we give up on blocking queries make one last attempt for an immediate answer
   104  		if info, ok := args.(structs.RPCInfo); ok && info.TimeToBlock() > 0 {
   105  			info.SetTimeToBlock(0)
   106  			return c.RPC(method, args, reply)
   107  		}
   108  		c.rpcLogger.Error("error performing RPC to server, deadline exceeded, cannot retry", "error", rpcErr, "rpc", method, "server", server.Addr)
   109  		return rpcErr
   110  	}
   111  
   112  	// Wait to avoid thundering herd
   113  	select {
   114  	case <-time.After(lib.RandomStagger(c.config.RPCHoldTimeout / structs.JitterFraction)):
   115  		// If we are going to retry a blocking query we need to update the time to block so it finishes by our deadline.
   116  		if info, ok := args.(structs.RPCInfo); ok && info.TimeToBlock() > 0 {
   117  			newBlockTime := time.Until(deadline)
   118  			// We can get below 0 here on slow computers because we slept for jitter so at least try to get an immediate response
   119  			if newBlockTime < 0 {
   120  				newBlockTime = 0
   121  			}
   122  			info.SetTimeToBlock(newBlockTime)
   123  			return c.RPC(method, args, reply)
   124  		}
   125  
   126  		goto TRY
   127  	case <-c.shutdownCh:
   128  	}
   129  	return rpcErr
   130  }
   131  
   132  // canRetry returns true if the given situation is safe for a retry.
   133  func canRetry(args interface{}, err error) bool {
   134  	// No leader errors are always safe to retry since no state could have
   135  	// been changed.
   136  	if structs.IsErrNoLeader(err) {
   137  		return true
   138  	}
   139  
   140  	// Reads are safe to retry for stream errors, such as if a server was
   141  	// being shut down.
   142  	info, ok := args.(structs.RPCInfo)
   143  	if ok && info.IsRead() && lib.IsErrEOF(err) {
   144  		return true
   145  	}
   146  
   147  	return false
   148  }
   149  
   150  // RemoteStreamingRpcHandler is used to make a streaming RPC call to a remote
   151  // server.
   152  func (c *Client) RemoteStreamingRpcHandler(method string) (structs.StreamingRpcHandler, error) {
   153  	server := c.servers.FindServer()
   154  	if server == nil {
   155  		return nil, noServersErr
   156  	}
   157  
   158  	conn, err := c.streamingRpcConn(server, method)
   159  	if err != nil {
   160  		// Move off to another server
   161  		c.rpcLogger.Error("error performing RPC to server", "error", err, "rpc", method, "server", server.Addr)
   162  		c.servers.NotifyFailedServer(server)
   163  		return nil, err
   164  	}
   165  
   166  	return bridgedStreamingRpcHandler(conn), nil
   167  }
   168  
   169  // bridgedStreamingRpcHandler creates a bridged streaming RPC handler by copying
   170  // data between the two sides.
   171  func bridgedStreamingRpcHandler(sideA io.ReadWriteCloser) structs.StreamingRpcHandler {
   172  	return func(sideB io.ReadWriteCloser) {
   173  		defer sideA.Close()
   174  		defer sideB.Close()
   175  		structs.Bridge(sideA, sideB)
   176  	}
   177  }
   178  
   179  // streamingRpcConn is used to retrieve a connection to a server to conduct a
   180  // streaming RPC.
   181  func (c *Client) streamingRpcConn(server *servers.Server, method string) (net.Conn, error) {
   182  	// Dial the server
   183  	conn, err := net.DialTimeout("tcp", server.Addr.String(), 10*time.Second)
   184  	if err != nil {
   185  		return nil, err
   186  	}
   187  
   188  	// Cast to TCPConn
   189  	if tcp, ok := conn.(*net.TCPConn); ok {
   190  		tcp.SetKeepAlive(true)
   191  		tcp.SetNoDelay(true)
   192  	}
   193  
   194  	// Check if TLS is enabled
   195  	c.tlsWrapLock.RLock()
   196  	tlsWrap := c.tlsWrap
   197  	c.tlsWrapLock.RUnlock()
   198  
   199  	if tlsWrap != nil {
   200  		// Switch the connection into TLS mode
   201  		if _, err := conn.Write([]byte{byte(pool.RpcTLS)}); err != nil {
   202  			conn.Close()
   203  			return nil, err
   204  		}
   205  
   206  		// Wrap the connection in a TLS client
   207  		tlsConn, err := tlsWrap(c.Region(), conn)
   208  		if err != nil {
   209  			conn.Close()
   210  			return nil, err
   211  		}
   212  		conn = tlsConn
   213  	}
   214  
   215  	// Write the multiplex byte to set the mode
   216  	if _, err := conn.Write([]byte{byte(pool.RpcStreaming)}); err != nil {
   217  		conn.Close()
   218  		return nil, err
   219  	}
   220  
   221  	// Send the header
   222  	encoder := codec.NewEncoder(conn, structs.MsgpackHandle)
   223  	decoder := codec.NewDecoder(conn, structs.MsgpackHandle)
   224  	header := structs.StreamingRpcHeader{
   225  		Method: method,
   226  	}
   227  	if err := encoder.Encode(header); err != nil {
   228  		conn.Close()
   229  		return nil, err
   230  	}
   231  
   232  	// Wait for the acknowledgement
   233  	var ack structs.StreamingRpcAck
   234  	if err := decoder.Decode(&ack); err != nil {
   235  		conn.Close()
   236  		return nil, err
   237  	}
   238  
   239  	if ack.Error != "" {
   240  		conn.Close()
   241  		return nil, errors.New(ack.Error)
   242  	}
   243  
   244  	return conn, nil
   245  }
   246  
   247  // setupClientRpc is used to setup the Client's RPC endpoints
   248  func (c *Client) setupClientRpc() {
   249  	// Initialize the RPC handlers
   250  	c.endpoints.ClientStats = &ClientStats{c}
   251  	c.endpoints.CSI = &CSI{c}
   252  	c.endpoints.FileSystem = NewFileSystemEndpoint(c)
   253  	c.endpoints.Allocations = NewAllocationsEndpoint(c)
   254  	c.endpoints.Agent = NewAgentEndpoint(c)
   255  
   256  	// Create the RPC Server
   257  	c.rpcServer = rpc.NewServer()
   258  
   259  	// Register the endpoints with the RPC server
   260  	c.setupClientRpcServer(c.rpcServer)
   261  
   262  	go c.rpcConnListener()
   263  }
   264  
   265  // setupClientRpcServer is used to populate a client RPC server with endpoints.
   266  func (c *Client) setupClientRpcServer(server *rpc.Server) {
   267  	// Register the endpoints
   268  	server.Register(c.endpoints.ClientStats)
   269  	server.Register(c.endpoints.CSI)
   270  	server.Register(c.endpoints.FileSystem)
   271  	server.Register(c.endpoints.Allocations)
   272  	server.Register(c.endpoints.Agent)
   273  }
   274  
   275  // rpcConnListener is a long lived function that listens for new connections
   276  // being made on the connection pool and starts an RPC listener for each
   277  // connection.
   278  func (c *Client) rpcConnListener() {
   279  	// Make a channel for new connections.
   280  	conns := make(chan *yamux.Session, 4)
   281  	c.connPool.SetConnListener(conns)
   282  
   283  	for {
   284  		select {
   285  		case <-c.shutdownCh:
   286  			return
   287  		case session, ok := <-conns:
   288  			if !ok {
   289  				continue
   290  			}
   291  
   292  			go c.listenConn(session)
   293  		}
   294  	}
   295  }
   296  
   297  // listenConn is used to listen for connections being made from the server on
   298  // pre-existing connection. This should be called in a goroutine.
   299  func (c *Client) listenConn(s *yamux.Session) {
   300  	for {
   301  		conn, err := s.Accept()
   302  		if err != nil {
   303  			if s.IsClosed() {
   304  				return
   305  			}
   306  
   307  			c.rpcLogger.Error("failed to accept RPC conn", "error", err)
   308  			continue
   309  		}
   310  
   311  		go c.handleConn(conn)
   312  		metrics.IncrCounter([]string{"client", "rpc", "accept_conn"}, 1)
   313  	}
   314  }
   315  
   316  // handleConn is used to determine if this is a RPC or Streaming RPC connection and
   317  // invoke the correct handler
   318  func (c *Client) handleConn(conn net.Conn) {
   319  	// Read a single byte
   320  	buf := make([]byte, 1)
   321  	if _, err := conn.Read(buf); err != nil {
   322  		if err != io.EOF {
   323  			c.rpcLogger.Error("error reading byte", "error", err)
   324  		}
   325  		conn.Close()
   326  		return
   327  	}
   328  
   329  	// Switch on the byte
   330  	switch pool.RPCType(buf[0]) {
   331  	case pool.RpcNomad:
   332  		c.handleNomadConn(conn)
   333  
   334  	case pool.RpcStreaming:
   335  		c.handleStreamingConn(conn)
   336  
   337  	default:
   338  		c.rpcLogger.Error("unrecognized RPC byte", "byte", buf[0])
   339  		conn.Close()
   340  		return
   341  	}
   342  }
   343  
   344  // handleNomadConn is used to handle a single Nomad RPC connection.
   345  func (c *Client) handleNomadConn(conn net.Conn) {
   346  	defer conn.Close()
   347  	rpcCodec := pool.NewServerCodec(conn)
   348  	for {
   349  		select {
   350  		case <-c.shutdownCh:
   351  			return
   352  		default:
   353  		}
   354  
   355  		if err := c.rpcServer.ServeRequest(rpcCodec); err != nil {
   356  			if err != io.EOF && !strings.Contains(err.Error(), "closed") {
   357  				c.rpcLogger.Error("error performing RPC", "error", err, "addr", conn.RemoteAddr())
   358  				metrics.IncrCounter([]string{"client", "rpc", "request_error"}, 1)
   359  			}
   360  			return
   361  		}
   362  		metrics.IncrCounter([]string{"client", "rpc", "request"}, 1)
   363  	}
   364  }
   365  
   366  // handleStreamingConn is used to handle a single Streaming Nomad RPC connection.
   367  func (c *Client) handleStreamingConn(conn net.Conn) {
   368  	defer conn.Close()
   369  
   370  	// Decode the header
   371  	var header structs.StreamingRpcHeader
   372  	decoder := codec.NewDecoder(conn, structs.MsgpackHandle)
   373  	if err := decoder.Decode(&header); err != nil {
   374  		if err != io.EOF && !strings.Contains(err.Error(), "closed") {
   375  			c.rpcLogger.Error("error performing streaming RPC", "error", err, "addr", conn.RemoteAddr())
   376  			metrics.IncrCounter([]string{"client", "streaming_rpc", "request_error"}, 1)
   377  		}
   378  
   379  		return
   380  	}
   381  
   382  	ack := structs.StreamingRpcAck{}
   383  	handler, err := c.streamingRpcs.GetHandler(header.Method)
   384  	if err != nil {
   385  		c.rpcLogger.Error("streaming RPC error", "addr", conn.RemoteAddr(), "error", err)
   386  		metrics.IncrCounter([]string{"client", "streaming_rpc", "request_error"}, 1)
   387  		ack.Error = err.Error()
   388  	}
   389  
   390  	// Send the acknowledgement
   391  	encoder := codec.NewEncoder(conn, structs.MsgpackHandle)
   392  	if err := encoder.Encode(ack); err != nil {
   393  		conn.Close()
   394  		return
   395  	}
   396  
   397  	if ack.Error != "" {
   398  		return
   399  	}
   400  
   401  	// Invoke the handler
   402  	metrics.IncrCounter([]string{"client", "streaming_rpc", "request"}, 1)
   403  	handler(conn)
   404  }
   405  
   406  // resolveServer given a sever's address as a string, return it's resolved
   407  // net.Addr or an error.
   408  func resolveServer(s string) (net.Addr, error) {
   409  	const defaultClientPort = "4647" // default client RPC port
   410  	host, port, err := net.SplitHostPort(s)
   411  	if err != nil {
   412  		if strings.Contains(err.Error(), "missing port") {
   413  			host = s
   414  			port = defaultClientPort
   415  		} else {
   416  			return nil, err
   417  		}
   418  	}
   419  	return net.ResolveTCPAddr("tcp", net.JoinHostPort(host, port))
   420  }
   421  
   422  // Ping is used to ping a particular server and returns whether it is healthy or
   423  // a potential error.
   424  func (c *Client) Ping(srv net.Addr) error {
   425  	var reply struct{}
   426  	err := c.connPool.RPC(c.Region(), srv, c.RPCMajorVersion(), "Status.Ping", struct{}{}, &reply)
   427  	return err
   428  }
   429  
   430  // rpcRetryWatcher returns a channel that will be closed if an event happens
   431  // such that we expect the next RPC to be successful.
   432  func (c *Client) rpcRetryWatcher() <-chan struct{} {
   433  	c.rpcRetryLock.Lock()
   434  	defer c.rpcRetryLock.Unlock()
   435  
   436  	if c.rpcRetryCh == nil {
   437  		c.rpcRetryCh = make(chan struct{})
   438  	}
   439  
   440  	return c.rpcRetryCh
   441  }
   442  
   443  // fireRpcRetryWatcher causes any RPC retryloops to retry their RPCs because we
   444  // believe the will be successful.
   445  func (c *Client) fireRpcRetryWatcher() {
   446  	c.rpcRetryLock.Lock()
   447  	defer c.rpcRetryLock.Unlock()
   448  	if c.rpcRetryCh != nil {
   449  		close(c.rpcRetryCh)
   450  		c.rpcRetryCh = nil
   451  	}
   452  }