github.com/kardianos/nomad@v0.1.3-0.20151022182107-b13df73ee850/nomad/rpc.go (about)

     1  package nomad
     2  
     3  import (
     4  	"crypto/tls"
     5  	"fmt"
     6  	"io"
     7  	"math/rand"
     8  	"net"
     9  	"strings"
    10  	"time"
    11  
    12  	"github.com/armon/go-metrics"
    13  	"github.com/hashicorp/net-rpc-msgpackrpc"
    14  	"github.com/hashicorp/nomad/nomad/state"
    15  	"github.com/hashicorp/nomad/nomad/structs"
    16  	"github.com/hashicorp/raft"
    17  	"github.com/hashicorp/yamux"
    18  )
    19  
    20  type RPCType byte
    21  
    22  const (
    23  	rpcNomad     RPCType = 0x01
    24  	rpcRaft              = 0x02
    25  	rpcMultiplex         = 0x03
    26  	rpcTLS               = 0x04
    27  )
    28  
    29  const (
    30  	// maxQueryTime is used to bound the limit of a blocking query
    31  	maxQueryTime = 300 * time.Second
    32  
    33  	// defaultQueryTime is the amount of time we block waiting for a change
    34  	// if no time is specified. Previously we would wait the maxQueryTime.
    35  	defaultQueryTime = 300 * time.Second
    36  
    37  	// jitterFraction is a the limit to the amount of jitter we apply
    38  	// to a user specified MaxQueryTime. We divide the specified time by
    39  	// the fraction. So 16 == 6.25% limit of jitter
    40  	jitterFraction = 16
    41  
    42  	// Warn if the Raft command is larger than this.
    43  	// If it's over 1MB something is probably being abusive.
    44  	raftWarnSize = 1024 * 1024
    45  
    46  	// enqueueLimit caps how long we will wait to enqueue
    47  	// a new Raft command. Something is probably wrong if this
    48  	// value is ever reached. However, it prevents us from blocking
    49  	// the requesting goroutine forever.
    50  	enqueueLimit = 30 * time.Second
    51  )
    52  
    53  // listen is used to listen for incoming RPC connections
    54  func (s *Server) listen() {
    55  	for {
    56  		// Accept a connection
    57  		conn, err := s.rpcListener.Accept()
    58  		if err != nil {
    59  			if s.shutdown {
    60  				return
    61  			}
    62  			s.logger.Printf("[ERR] nomad.rpc: failed to accept RPC conn: %v", err)
    63  			continue
    64  		}
    65  
    66  		go s.handleConn(conn, false)
    67  		metrics.IncrCounter([]string{"nomad", "rpc", "accept_conn"}, 1)
    68  	}
    69  }
    70  
    71  // handleConn is used to determine if this is a Raft or
    72  // Nomad type RPC connection and invoke the correct handler
    73  func (s *Server) handleConn(conn net.Conn, isTLS bool) {
    74  	// Read a single byte
    75  	buf := make([]byte, 1)
    76  	if _, err := conn.Read(buf); err != nil {
    77  		if err != io.EOF {
    78  			s.logger.Printf("[ERR] nomad.rpc: failed to read byte: %v", err)
    79  		}
    80  		conn.Close()
    81  		return
    82  	}
    83  
    84  	// Enforce TLS if VerifyIncoming is set
    85  	if s.config.RequireTLS && !isTLS && RPCType(buf[0]) != rpcTLS {
    86  		s.logger.Printf("[WARN] nomad.rpc: Non-TLS connection attempted with RequireTLS set")
    87  		conn.Close()
    88  		return
    89  	}
    90  
    91  	// Switch on the byte
    92  	switch RPCType(buf[0]) {
    93  	case rpcNomad:
    94  		s.handleNomadConn(conn)
    95  
    96  	case rpcRaft:
    97  		metrics.IncrCounter([]string{"nomad", "rpc", "raft_handoff"}, 1)
    98  		s.raftLayer.Handoff(conn)
    99  
   100  	case rpcMultiplex:
   101  		s.handleMultiplex(conn)
   102  
   103  	case rpcTLS:
   104  		if s.rpcTLS == nil {
   105  			s.logger.Printf("[WARN] nomad.rpc: TLS connection attempted, server not configured for TLS")
   106  			conn.Close()
   107  			return
   108  		}
   109  		conn = tls.Server(conn, s.rpcTLS)
   110  		s.handleConn(conn, true)
   111  
   112  	default:
   113  		s.logger.Printf("[ERR] nomad.rpc: unrecognized RPC byte: %v", buf[0])
   114  		conn.Close()
   115  		return
   116  	}
   117  }
   118  
   119  // handleMultiplex is used to multiplex a single incoming connection
   120  // using the Yamux multiplexer
   121  func (s *Server) handleMultiplex(conn net.Conn) {
   122  	defer conn.Close()
   123  	conf := yamux.DefaultConfig()
   124  	conf.LogOutput = s.config.LogOutput
   125  	server, _ := yamux.Server(conn, conf)
   126  	for {
   127  		sub, err := server.Accept()
   128  		if err != nil {
   129  			if err != io.EOF {
   130  				s.logger.Printf("[ERR] nomad.rpc: multiplex conn accept failed: %v", err)
   131  			}
   132  			return
   133  		}
   134  		go s.handleNomadConn(sub)
   135  	}
   136  }
   137  
   138  // handleNomadConn is used to service a single Nomad RPC connection
   139  func (s *Server) handleNomadConn(conn net.Conn) {
   140  	defer conn.Close()
   141  	rpcCodec := msgpackrpc.NewServerCodec(conn)
   142  	for {
   143  		select {
   144  		case <-s.shutdownCh:
   145  			return
   146  		default:
   147  		}
   148  
   149  		if err := s.rpcServer.ServeRequest(rpcCodec); err != nil {
   150  			if err != io.EOF && !strings.Contains(err.Error(), "closed") {
   151  				s.logger.Printf("[ERR] nomad.rpc: RPC error: %v (%v)", err, conn)
   152  				metrics.IncrCounter([]string{"nomad", "rpc", "request_error"}, 1)
   153  			}
   154  			return
   155  		}
   156  		metrics.IncrCounter([]string{"nomad", "rpc", "request"}, 1)
   157  	}
   158  }
   159  
   160  // forward is used to forward to a remote region or to forward to the local leader
   161  // Returns a bool of if forwarding was performed, as well as any error
   162  func (s *Server) forward(method string, info structs.RPCInfo, args interface{}, reply interface{}) (bool, error) {
   163  	region := info.RequestRegion()
   164  	if region == "" {
   165  		return true, fmt.Errorf("missing target RPC")
   166  	}
   167  
   168  	// Handle region forwarding
   169  	if region != s.config.Region {
   170  		err := s.forwardRegion(region, method, args, reply)
   171  		return true, err
   172  	}
   173  
   174  	// Check if we can allow a stale read
   175  	if info.IsRead() && info.AllowStaleRead() {
   176  		return false, nil
   177  	}
   178  
   179  	// Handle leader forwarding
   180  	if !s.IsLeader() {
   181  		err := s.forwardLeader(method, args, reply)
   182  		return true, err
   183  	}
   184  	return false, nil
   185  }
   186  
   187  // forwardLeader is used to forward an RPC call to the leader, or fail if no leader
   188  func (s *Server) forwardLeader(method string, args interface{}, reply interface{}) error {
   189  	// Get the leader
   190  	leader := s.raft.Leader()
   191  	if leader == "" {
   192  		return structs.ErrNoLeader
   193  	}
   194  
   195  	// Lookup the server
   196  	s.peerLock.RLock()
   197  	server := s.localPeers[leader]
   198  	s.peerLock.RUnlock()
   199  
   200  	// Handle a missing server
   201  	if server == nil {
   202  		return structs.ErrNoLeader
   203  	}
   204  	return s.connPool.RPC(s.config.Region, server.Addr, server.Version, method, args, reply)
   205  }
   206  
   207  // forwardRegion is used to forward an RPC call to a remote region, or fail if no servers
   208  func (s *Server) forwardRegion(region, method string, args interface{}, reply interface{}) error {
   209  	// Bail if we can't find any servers
   210  	s.peerLock.RLock()
   211  	servers := s.peers[region]
   212  	if len(servers) == 0 {
   213  		s.peerLock.RUnlock()
   214  		s.logger.Printf("[WARN] nomad.rpc: RPC request for region '%s', no path found",
   215  			region)
   216  		return structs.ErrNoRegionPath
   217  	}
   218  
   219  	// Select a random addr
   220  	offset := rand.Int31() % int32(len(servers))
   221  	server := servers[offset]
   222  	s.peerLock.RUnlock()
   223  
   224  	// Forward to remote Nomad
   225  	metrics.IncrCounter([]string{"nomad", "rpc", "cross-region", region}, 1)
   226  	return s.connPool.RPC(region, server.Addr, server.Version, method, args, reply)
   227  }
   228  
   229  // raftApplyFuture is used to encode a message, run it through raft, and return the Raft future.
   230  func (s *Server) raftApplyFuture(t structs.MessageType, msg interface{}) (raft.ApplyFuture, error) {
   231  	buf, err := structs.Encode(t, msg)
   232  	if err != nil {
   233  		return nil, fmt.Errorf("Failed to encode request: %v", err)
   234  	}
   235  
   236  	// Warn if the command is very large
   237  	if n := len(buf); n > raftWarnSize {
   238  		s.logger.Printf("[WARN] nomad: Attempting to apply large raft entry (type %d) (%d bytes)", t, n)
   239  	}
   240  
   241  	future := s.raft.Apply(buf, enqueueLimit)
   242  	return future, nil
   243  }
   244  
   245  // raftApply is used to encode a message, run it through raft, and return
   246  // the FSM response along with any errors
   247  func (s *Server) raftApply(t structs.MessageType, msg interface{}) (interface{}, uint64, error) {
   248  	future, err := s.raftApplyFuture(t, msg)
   249  	if err != nil {
   250  		return nil, 0, err
   251  	}
   252  	if err := future.Error(); err != nil {
   253  		return nil, 0, err
   254  	}
   255  	return future.Response(), future.Index(), nil
   256  }
   257  
   258  // setQueryMeta is used to populate the QueryMeta data for an RPC call
   259  func (s *Server) setQueryMeta(m *structs.QueryMeta) {
   260  	if s.IsLeader() {
   261  		m.LastContact = 0
   262  		m.KnownLeader = true
   263  	} else {
   264  		m.LastContact = time.Now().Sub(s.raft.LastContact())
   265  		m.KnownLeader = (s.raft.Leader() != "")
   266  	}
   267  }
   268  
   269  // blockingOptions is used to parameterize blockingRPC
   270  type blockingOptions struct {
   271  	queryOpts  *structs.QueryOptions
   272  	queryMeta  *structs.QueryMeta
   273  	allocWatch string
   274  	run        func() error
   275  }
   276  
   277  // blockingRPC is used for queries that need to wait for a
   278  // minimum index. This is used to block and wait for changes.
   279  func (s *Server) blockingRPC(opts *blockingOptions) error {
   280  	var timeout *time.Timer
   281  	var notifyCh chan struct{}
   282  	var state *state.StateStore
   283  
   284  	// Fast path non-blocking
   285  	if opts.queryOpts.MinQueryIndex == 0 {
   286  		goto RUN_QUERY
   287  	}
   288  
   289  	// Restrict the max query time, and ensure there is always one
   290  	if opts.queryOpts.MaxQueryTime > maxQueryTime {
   291  		opts.queryOpts.MaxQueryTime = maxQueryTime
   292  	} else if opts.queryOpts.MaxQueryTime <= 0 {
   293  		opts.queryOpts.MaxQueryTime = defaultQueryTime
   294  	}
   295  
   296  	// Apply a small amount of jitter to the request
   297  	opts.queryOpts.MaxQueryTime += randomStagger(opts.queryOpts.MaxQueryTime / jitterFraction)
   298  
   299  	// Setup a query timeout
   300  	timeout = time.NewTimer(opts.queryOpts.MaxQueryTime)
   301  
   302  	// Setup the notify channel
   303  	notifyCh = make(chan struct{}, 1)
   304  
   305  	// Ensure we tear down any watchers on return
   306  	state = s.fsm.State()
   307  	defer func() {
   308  		timeout.Stop()
   309  		if opts.allocWatch != "" {
   310  			state.StopWatchAllocs(opts.allocWatch, notifyCh)
   311  		}
   312  	}()
   313  
   314  REGISTER_NOTIFY:
   315  	// Register the notification channel. This may be done
   316  	// multiple times if we have not reached the target wait index.
   317  	if opts.allocWatch != "" {
   318  		state.WatchAllocs(opts.allocWatch, notifyCh)
   319  	}
   320  
   321  RUN_QUERY:
   322  	// Update the query meta data
   323  	s.setQueryMeta(opts.queryMeta)
   324  
   325  	// Run the query function
   326  	metrics.IncrCounter([]string{"nomad", "rpc", "query"}, 1)
   327  	err := opts.run()
   328  
   329  	// Check for minimum query time
   330  	if err == nil && opts.queryMeta.Index > 0 && opts.queryMeta.Index <= opts.queryOpts.MinQueryIndex {
   331  		select {
   332  		case <-notifyCh:
   333  			goto REGISTER_NOTIFY
   334  		case <-timeout.C:
   335  		}
   336  	}
   337  	return err
   338  }