github.com/taylorchu/nomad@v0.5.3-rc1.0.20170407200202-db11e7dd7b55/nomad/rpc.go

github.com/taylorchu/nomad@v0.5.3-rc1.0.20170407200202-db11e7dd7b55/nomad/rpc.go (about)

     1  package nomad
     2  
     3  import (
     4  	"crypto/tls"
     5  	"fmt"
     6  	"io"
     7  	"math/rand"
     8  	"net"
     9  	"net/rpc"
    10  	"strings"
    11  	"time"
    12  
    13  	"github.com/armon/go-metrics"
    14  	"github.com/hashicorp/consul/lib"
    15  	memdb "github.com/hashicorp/go-memdb"
    16  	"github.com/hashicorp/net-rpc-msgpackrpc"
    17  	"github.com/hashicorp/nomad/nomad/state"
    18  	"github.com/hashicorp/nomad/nomad/structs"
    19  	"github.com/hashicorp/raft"
    20  	"github.com/hashicorp/yamux"
    21  )
    22  
    23  type RPCType byte
    24  
    25  const (
    26  	rpcNomad     RPCType = 0x01
    27  	rpcRaft              = 0x02
    28  	rpcMultiplex         = 0x03
    29  	rpcTLS               = 0x04
    30  )
    31  
    32  const (
    33  	// maxQueryTime is used to bound the limit of a blocking query
    34  	maxQueryTime = 300 * time.Second
    35  
    36  	// defaultQueryTime is the amount of time we block waiting for a change
    37  	// if no time is specified. Previously we would wait the maxQueryTime.
    38  	defaultQueryTime = 300 * time.Second
    39  
    40  	// jitterFraction is a the limit to the amount of jitter we apply
    41  	// to a user specified MaxQueryTime. We divide the specified time by
    42  	// the fraction. So 16 == 6.25% limit of jitter. This jitter is also
    43  	// applied to RPCHoldTimeout.
    44  	jitterFraction = 16
    45  
    46  	// Warn if the Raft command is larger than this.
    47  	// If it's over 1MB something is probably being abusive.
    48  	raftWarnSize = 1024 * 1024
    49  
    50  	// enqueueLimit caps how long we will wait to enqueue
    51  	// a new Raft command. Something is probably wrong if this
    52  	// value is ever reached. However, it prevents us from blocking
    53  	// the requesting goroutine forever.
    54  	enqueueLimit = 30 * time.Second
    55  )
    56  
    57  // NewClientCodec returns a new rpc.ClientCodec to be used to make RPC calls to
    58  // the Nomad Server.
    59  func NewClientCodec(conn io.ReadWriteCloser) rpc.ClientCodec {
    60  	return msgpackrpc.NewCodecFromHandle(true, true, conn, structs.HashiMsgpackHandle)
    61  }
    62  
    63  // NewServerCodec returns a new rpc.ServerCodec to be used by the Nomad Server
    64  // to handle rpcs.
    65  func NewServerCodec(conn io.ReadWriteCloser) rpc.ServerCodec {
    66  	return msgpackrpc.NewCodecFromHandle(true, true, conn, structs.HashiMsgpackHandle)
    67  }
    68  
    69  // listen is used to listen for incoming RPC connections
    70  func (s *Server) listen() {
    71  	for {
    72  		// Accept a connection
    73  		conn, err := s.rpcListener.Accept()
    74  		if err != nil {
    75  			if s.shutdown {
    76  				return
    77  			}
    78  			s.logger.Printf("[ERR] nomad.rpc: failed to accept RPC conn: %v", err)
    79  			continue
    80  		}
    81  
    82  		go s.handleConn(conn, false)
    83  		metrics.IncrCounter([]string{"nomad", "rpc", "accept_conn"}, 1)
    84  	}
    85  }
    86  
    87  // handleConn is used to determine if this is a Raft or
    88  // Nomad type RPC connection and invoke the correct handler
    89  func (s *Server) handleConn(conn net.Conn, isTLS bool) {
    90  	// Read a single byte
    91  	buf := make([]byte, 1)
    92  	if _, err := conn.Read(buf); err != nil {
    93  		if err != io.EOF {
    94  			s.logger.Printf("[ERR] nomad.rpc: failed to read byte: %v", err)
    95  		}
    96  		conn.Close()
    97  		return
    98  	}
    99  
   100  	// Enforce TLS if EnableRPC is set
   101  	if s.config.TLSConfig.EnableRPC && !isTLS && RPCType(buf[0]) != rpcTLS {
   102  		s.logger.Printf("[WARN] nomad.rpc: Non-TLS connection attempted with RequireTLS set")
   103  		conn.Close()
   104  		return
   105  	}
   106  
   107  	// Switch on the byte
   108  	switch RPCType(buf[0]) {
   109  	case rpcNomad:
   110  		s.handleNomadConn(conn)
   111  
   112  	case rpcRaft:
   113  		metrics.IncrCounter([]string{"nomad", "rpc", "raft_handoff"}, 1)
   114  		s.raftLayer.Handoff(conn)
   115  
   116  	case rpcMultiplex:
   117  		s.handleMultiplex(conn)
   118  
   119  	case rpcTLS:
   120  		if s.rpcTLS == nil {
   121  			s.logger.Printf("[WARN] nomad.rpc: TLS connection attempted, server not configured for TLS")
   122  			conn.Close()
   123  			return
   124  		}
   125  		conn = tls.Server(conn, s.rpcTLS)
   126  		s.handleConn(conn, true)
   127  
   128  	default:
   129  		s.logger.Printf("[ERR] nomad.rpc: unrecognized RPC byte: %v", buf[0])
   130  		conn.Close()
   131  		return
   132  	}
   133  }
   134  
   135  // handleMultiplex is used to multiplex a single incoming connection
   136  // using the Yamux multiplexer
   137  func (s *Server) handleMultiplex(conn net.Conn) {
   138  	defer conn.Close()
   139  	conf := yamux.DefaultConfig()
   140  	conf.LogOutput = s.config.LogOutput
   141  	server, _ := yamux.Server(conn, conf)
   142  	for {
   143  		sub, err := server.Accept()
   144  		if err != nil {
   145  			if err != io.EOF {
   146  				s.logger.Printf("[ERR] nomad.rpc: multiplex conn accept failed: %v", err)
   147  			}
   148  			return
   149  		}
   150  		go s.handleNomadConn(sub)
   151  	}
   152  }
   153  
   154  // handleNomadConn is used to service a single Nomad RPC connection
   155  func (s *Server) handleNomadConn(conn net.Conn) {
   156  	defer conn.Close()
   157  	rpcCodec := NewServerCodec(conn)
   158  	for {
   159  		select {
   160  		case <-s.shutdownCh:
   161  			return
   162  		default:
   163  		}
   164  
   165  		if err := s.rpcServer.ServeRequest(rpcCodec); err != nil {
   166  			if err != io.EOF && !strings.Contains(err.Error(), "closed") {
   167  				s.logger.Printf("[ERR] nomad.rpc: RPC error: %v (%v)", err, conn)
   168  				metrics.IncrCounter([]string{"nomad", "rpc", "request_error"}, 1)
   169  			}
   170  			return
   171  		}
   172  		metrics.IncrCounter([]string{"nomad", "rpc", "request"}, 1)
   173  	}
   174  }
   175  
   176  // forward is used to forward to a remote region or to forward to the local leader
   177  // Returns a bool of if forwarding was performed, as well as any error
   178  func (s *Server) forward(method string, info structs.RPCInfo, args interface{}, reply interface{}) (bool, error) {
   179  	var firstCheck time.Time
   180  
   181  	region := info.RequestRegion()
   182  	if region == "" {
   183  		return true, fmt.Errorf("missing target RPC")
   184  	}
   185  
   186  	// Handle region forwarding
   187  	if region != s.config.Region {
   188  		err := s.forwardRegion(region, method, args, reply)
   189  		return true, err
   190  	}
   191  
   192  	// Check if we can allow a stale read
   193  	if info.IsRead() && info.AllowStaleRead() {
   194  		return false, nil
   195  	}
   196  
   197  CHECK_LEADER:
   198  	// Find the leader
   199  	isLeader, remoteServer := s.getLeader()
   200  
   201  	// Handle the case we are the leader
   202  	if isLeader {
   203  		return false, nil
   204  	}
   205  
   206  	// Handle the case of a known leader
   207  	if remoteServer != nil {
   208  		err := s.forwardLeader(remoteServer, method, args, reply)
   209  		return true, err
   210  	}
   211  
   212  	// Gate the request until there is a leader
   213  	if firstCheck.IsZero() {
   214  		firstCheck = time.Now()
   215  	}
   216  	if time.Now().Sub(firstCheck) < s.config.RPCHoldTimeout {
   217  		jitter := lib.RandomStagger(s.config.RPCHoldTimeout / jitterFraction)
   218  		select {
   219  		case <-time.After(jitter):
   220  			goto CHECK_LEADER
   221  		case <-s.shutdownCh:
   222  		}
   223  	}
   224  
   225  	// No leader found and hold time exceeded
   226  	return true, structs.ErrNoLeader
   227  }
   228  
   229  // getLeader returns if the current node is the leader, and if not
   230  // then it returns the leader which is potentially nil if the cluster
   231  // has not yet elected a leader.
   232  func (s *Server) getLeader() (bool, *serverParts) {
   233  	// Check if we are the leader
   234  	if s.IsLeader() {
   235  		return true, nil
   236  	}
   237  
   238  	// Get the leader
   239  	leader := s.raft.Leader()
   240  	if leader == "" {
   241  		return false, nil
   242  	}
   243  
   244  	// Lookup the server
   245  	s.peerLock.RLock()
   246  	server := s.localPeers[leader]
   247  	s.peerLock.RUnlock()
   248  
   249  	// Server could be nil
   250  	return false, server
   251  }
   252  
   253  // forwardLeader is used to forward an RPC call to the leader, or fail if no leader
   254  func (s *Server) forwardLeader(server *serverParts, method string, args interface{}, reply interface{}) error {
   255  	// Handle a missing server
   256  	if server == nil {
   257  		return structs.ErrNoLeader
   258  	}
   259  	return s.connPool.RPC(s.config.Region, server.Addr, server.MajorVersion, method, args, reply)
   260  }
   261  
   262  // forwardRegion is used to forward an RPC call to a remote region, or fail if no servers
   263  func (s *Server) forwardRegion(region, method string, args interface{}, reply interface{}) error {
   264  	// Bail if we can't find any servers
   265  	s.peerLock.RLock()
   266  	servers := s.peers[region]
   267  	if len(servers) == 0 {
   268  		s.peerLock.RUnlock()
   269  		s.logger.Printf("[WARN] nomad.rpc: RPC request for region '%s', no path found",
   270  			region)
   271  		return structs.ErrNoRegionPath
   272  	}
   273  
   274  	// Select a random addr
   275  	offset := rand.Intn(len(servers))
   276  	server := servers[offset]
   277  	s.peerLock.RUnlock()
   278  
   279  	// Forward to remote Nomad
   280  	metrics.IncrCounter([]string{"nomad", "rpc", "cross-region", region}, 1)
   281  	return s.connPool.RPC(region, server.Addr, server.MajorVersion, method, args, reply)
   282  }
   283  
   284  // raftApplyFuture is used to encode a message, run it through raft, and return the Raft future.
   285  func (s *Server) raftApplyFuture(t structs.MessageType, msg interface{}) (raft.ApplyFuture, error) {
   286  	buf, err := structs.Encode(t, msg)
   287  	if err != nil {
   288  		return nil, fmt.Errorf("Failed to encode request: %v", err)
   289  	}
   290  
   291  	// Warn if the command is very large
   292  	if n := len(buf); n > raftWarnSize {
   293  		s.logger.Printf("[WARN] nomad: Attempting to apply large raft entry (type %d) (%d bytes)", t, n)
   294  	}
   295  
   296  	future := s.raft.Apply(buf, enqueueLimit)
   297  	return future, nil
   298  }
   299  
   300  // raftApply is used to encode a message, run it through raft, and return
   301  // the FSM response along with any errors
   302  func (s *Server) raftApply(t structs.MessageType, msg interface{}) (interface{}, uint64, error) {
   303  	future, err := s.raftApplyFuture(t, msg)
   304  	if err != nil {
   305  		return nil, 0, err
   306  	}
   307  	if err := future.Error(); err != nil {
   308  		return nil, 0, err
   309  	}
   310  	return future.Response(), future.Index(), nil
   311  }
   312  
   313  // setQueryMeta is used to populate the QueryMeta data for an RPC call
   314  func (s *Server) setQueryMeta(m *structs.QueryMeta) {
   315  	if s.IsLeader() {
   316  		m.LastContact = 0
   317  		m.KnownLeader = true
   318  	} else {
   319  		m.LastContact = time.Now().Sub(s.raft.LastContact())
   320  		m.KnownLeader = (s.raft.Leader() != "")
   321  	}
   322  }
   323  
   324  // queryFn is used to perform a query operation. If a re-query is needed, the
   325  // passed-in watch set will be used to block for changes. The passed-in state
   326  // store should be used (vs. calling fsm.State()) since the given state store
   327  // will be correctly watched for changes if the state store is restored from
   328  // a snapshot.
   329  type queryFn func(memdb.WatchSet, *state.StateStore) error
   330  
   331  // blockingOptions is used to parameterize blockingRPC
   332  type blockingOptions struct {
   333  	queryOpts *structs.QueryOptions
   334  	queryMeta *structs.QueryMeta
   335  	run       queryFn
   336  }
   337  
   338  // blockingRPC is used for queries that need to wait for a
   339  // minimum index. This is used to block and wait for changes.
   340  func (s *Server) blockingRPC(opts *blockingOptions) error {
   341  	var timeout *time.Timer
   342  	var state *state.StateStore
   343  
   344  	// Fast path non-blocking
   345  	if opts.queryOpts.MinQueryIndex == 0 {
   346  		goto RUN_QUERY
   347  	}
   348  
   349  	// Restrict the max query time, and ensure there is always one
   350  	if opts.queryOpts.MaxQueryTime > maxQueryTime {
   351  		opts.queryOpts.MaxQueryTime = maxQueryTime
   352  	} else if opts.queryOpts.MaxQueryTime <= 0 {
   353  		opts.queryOpts.MaxQueryTime = defaultQueryTime
   354  	}
   355  
   356  	// Apply a small amount of jitter to the request
   357  	opts.queryOpts.MaxQueryTime += lib.RandomStagger(opts.queryOpts.MaxQueryTime / jitterFraction)
   358  
   359  	// Setup a query timeout
   360  	timeout = time.NewTimer(opts.queryOpts.MaxQueryTime)
   361  	defer timeout.Stop()
   362  
   363  RUN_QUERY:
   364  	// Update the query meta data
   365  	s.setQueryMeta(opts.queryMeta)
   366  
   367  	// Increment the rpc query counter
   368  	metrics.IncrCounter([]string{"nomad", "rpc", "query"}, 1)
   369  
   370  	// We capture the state store and its abandon channel but pass a snapshot to
   371  	// the blocking query function. We operate on the snapshot to allow separate
   372  	// calls to the state store not all wrapped within the same transaction.
   373  	state = s.fsm.State()
   374  	abandonCh := state.AbandonCh()
   375  	snap, _ := state.Snapshot()
   376  	stateSnap := &snap.StateStore
   377  
   378  	// We can skip all watch tracking if this isn't a blocking query.
   379  	var ws memdb.WatchSet
   380  	if opts.queryOpts.MinQueryIndex > 0 {
   381  		ws = memdb.NewWatchSet()
   382  
   383  		// This channel will be closed if a snapshot is restored and the
   384  		// whole state store is abandoned.
   385  		ws.Add(abandonCh)
   386  	}
   387  
   388  	// Block up to the timeout if we didn't see anything fresh.
   389  	err := opts.run(ws, stateSnap)
   390  
   391  	// Check for minimum query time
   392  	if err == nil && opts.queryOpts.MinQueryIndex > 0 && opts.queryMeta.Index <= opts.queryOpts.MinQueryIndex {
   393  		if expired := ws.Watch(timeout.C); !expired {
   394  			goto RUN_QUERY
   395  		}
   396  	}
   397  	return err
   398  }