github.com/kardianos/nomad@v0.1.3-0.20151022182107-b13df73ee850/nomad/rpc.go (about) 1 package nomad 2 3 import ( 4 "crypto/tls" 5 "fmt" 6 "io" 7 "math/rand" 8 "net" 9 "strings" 10 "time" 11 12 "github.com/armon/go-metrics" 13 "github.com/hashicorp/net-rpc-msgpackrpc" 14 "github.com/hashicorp/nomad/nomad/state" 15 "github.com/hashicorp/nomad/nomad/structs" 16 "github.com/hashicorp/raft" 17 "github.com/hashicorp/yamux" 18 ) 19 20 type RPCType byte 21 22 const ( 23 rpcNomad RPCType = 0x01 24 rpcRaft = 0x02 25 rpcMultiplex = 0x03 26 rpcTLS = 0x04 27 ) 28 29 const ( 30 // maxQueryTime is used to bound the limit of a blocking query 31 maxQueryTime = 300 * time.Second 32 33 // defaultQueryTime is the amount of time we block waiting for a change 34 // if no time is specified. Previously we would wait the maxQueryTime. 35 defaultQueryTime = 300 * time.Second 36 37 // jitterFraction is a the limit to the amount of jitter we apply 38 // to a user specified MaxQueryTime. We divide the specified time by 39 // the fraction. So 16 == 6.25% limit of jitter 40 jitterFraction = 16 41 42 // Warn if the Raft command is larger than this. 43 // If it's over 1MB something is probably being abusive. 44 raftWarnSize = 1024 * 1024 45 46 // enqueueLimit caps how long we will wait to enqueue 47 // a new Raft command. Something is probably wrong if this 48 // value is ever reached. However, it prevents us from blocking 49 // the requesting goroutine forever. 50 enqueueLimit = 30 * time.Second 51 ) 52 53 // listen is used to listen for incoming RPC connections 54 func (s *Server) listen() { 55 for { 56 // Accept a connection 57 conn, err := s.rpcListener.Accept() 58 if err != nil { 59 if s.shutdown { 60 return 61 } 62 s.logger.Printf("[ERR] nomad.rpc: failed to accept RPC conn: %v", err) 63 continue 64 } 65 66 go s.handleConn(conn, false) 67 metrics.IncrCounter([]string{"nomad", "rpc", "accept_conn"}, 1) 68 } 69 } 70 71 // handleConn is used to determine if this is a Raft or 72 // Nomad type RPC connection and invoke the correct handler 73 func (s *Server) handleConn(conn net.Conn, isTLS bool) { 74 // Read a single byte 75 buf := make([]byte, 1) 76 if _, err := conn.Read(buf); err != nil { 77 if err != io.EOF { 78 s.logger.Printf("[ERR] nomad.rpc: failed to read byte: %v", err) 79 } 80 conn.Close() 81 return 82 } 83 84 // Enforce TLS if VerifyIncoming is set 85 if s.config.RequireTLS && !isTLS && RPCType(buf[0]) != rpcTLS { 86 s.logger.Printf("[WARN] nomad.rpc: Non-TLS connection attempted with RequireTLS set") 87 conn.Close() 88 return 89 } 90 91 // Switch on the byte 92 switch RPCType(buf[0]) { 93 case rpcNomad: 94 s.handleNomadConn(conn) 95 96 case rpcRaft: 97 metrics.IncrCounter([]string{"nomad", "rpc", "raft_handoff"}, 1) 98 s.raftLayer.Handoff(conn) 99 100 case rpcMultiplex: 101 s.handleMultiplex(conn) 102 103 case rpcTLS: 104 if s.rpcTLS == nil { 105 s.logger.Printf("[WARN] nomad.rpc: TLS connection attempted, server not configured for TLS") 106 conn.Close() 107 return 108 } 109 conn = tls.Server(conn, s.rpcTLS) 110 s.handleConn(conn, true) 111 112 default: 113 s.logger.Printf("[ERR] nomad.rpc: unrecognized RPC byte: %v", buf[0]) 114 conn.Close() 115 return 116 } 117 } 118 119 // handleMultiplex is used to multiplex a single incoming connection 120 // using the Yamux multiplexer 121 func (s *Server) handleMultiplex(conn net.Conn) { 122 defer conn.Close() 123 conf := yamux.DefaultConfig() 124 conf.LogOutput = s.config.LogOutput 125 server, _ := yamux.Server(conn, conf) 126 for { 127 sub, err := server.Accept() 128 if err != nil { 129 if err != io.EOF { 130 s.logger.Printf("[ERR] nomad.rpc: multiplex conn accept failed: %v", err) 131 } 132 return 133 } 134 go s.handleNomadConn(sub) 135 } 136 } 137 138 // handleNomadConn is used to service a single Nomad RPC connection 139 func (s *Server) handleNomadConn(conn net.Conn) { 140 defer conn.Close() 141 rpcCodec := msgpackrpc.NewServerCodec(conn) 142 for { 143 select { 144 case <-s.shutdownCh: 145 return 146 default: 147 } 148 149 if err := s.rpcServer.ServeRequest(rpcCodec); err != nil { 150 if err != io.EOF && !strings.Contains(err.Error(), "closed") { 151 s.logger.Printf("[ERR] nomad.rpc: RPC error: %v (%v)", err, conn) 152 metrics.IncrCounter([]string{"nomad", "rpc", "request_error"}, 1) 153 } 154 return 155 } 156 metrics.IncrCounter([]string{"nomad", "rpc", "request"}, 1) 157 } 158 } 159 160 // forward is used to forward to a remote region or to forward to the local leader 161 // Returns a bool of if forwarding was performed, as well as any error 162 func (s *Server) forward(method string, info structs.RPCInfo, args interface{}, reply interface{}) (bool, error) { 163 region := info.RequestRegion() 164 if region == "" { 165 return true, fmt.Errorf("missing target RPC") 166 } 167 168 // Handle region forwarding 169 if region != s.config.Region { 170 err := s.forwardRegion(region, method, args, reply) 171 return true, err 172 } 173 174 // Check if we can allow a stale read 175 if info.IsRead() && info.AllowStaleRead() { 176 return false, nil 177 } 178 179 // Handle leader forwarding 180 if !s.IsLeader() { 181 err := s.forwardLeader(method, args, reply) 182 return true, err 183 } 184 return false, nil 185 } 186 187 // forwardLeader is used to forward an RPC call to the leader, or fail if no leader 188 func (s *Server) forwardLeader(method string, args interface{}, reply interface{}) error { 189 // Get the leader 190 leader := s.raft.Leader() 191 if leader == "" { 192 return structs.ErrNoLeader 193 } 194 195 // Lookup the server 196 s.peerLock.RLock() 197 server := s.localPeers[leader] 198 s.peerLock.RUnlock() 199 200 // Handle a missing server 201 if server == nil { 202 return structs.ErrNoLeader 203 } 204 return s.connPool.RPC(s.config.Region, server.Addr, server.Version, method, args, reply) 205 } 206 207 // forwardRegion is used to forward an RPC call to a remote region, or fail if no servers 208 func (s *Server) forwardRegion(region, method string, args interface{}, reply interface{}) error { 209 // Bail if we can't find any servers 210 s.peerLock.RLock() 211 servers := s.peers[region] 212 if len(servers) == 0 { 213 s.peerLock.RUnlock() 214 s.logger.Printf("[WARN] nomad.rpc: RPC request for region '%s', no path found", 215 region) 216 return structs.ErrNoRegionPath 217 } 218 219 // Select a random addr 220 offset := rand.Int31() % int32(len(servers)) 221 server := servers[offset] 222 s.peerLock.RUnlock() 223 224 // Forward to remote Nomad 225 metrics.IncrCounter([]string{"nomad", "rpc", "cross-region", region}, 1) 226 return s.connPool.RPC(region, server.Addr, server.Version, method, args, reply) 227 } 228 229 // raftApplyFuture is used to encode a message, run it through raft, and return the Raft future. 230 func (s *Server) raftApplyFuture(t structs.MessageType, msg interface{}) (raft.ApplyFuture, error) { 231 buf, err := structs.Encode(t, msg) 232 if err != nil { 233 return nil, fmt.Errorf("Failed to encode request: %v", err) 234 } 235 236 // Warn if the command is very large 237 if n := len(buf); n > raftWarnSize { 238 s.logger.Printf("[WARN] nomad: Attempting to apply large raft entry (type %d) (%d bytes)", t, n) 239 } 240 241 future := s.raft.Apply(buf, enqueueLimit) 242 return future, nil 243 } 244 245 // raftApply is used to encode a message, run it through raft, and return 246 // the FSM response along with any errors 247 func (s *Server) raftApply(t structs.MessageType, msg interface{}) (interface{}, uint64, error) { 248 future, err := s.raftApplyFuture(t, msg) 249 if err != nil { 250 return nil, 0, err 251 } 252 if err := future.Error(); err != nil { 253 return nil, 0, err 254 } 255 return future.Response(), future.Index(), nil 256 } 257 258 // setQueryMeta is used to populate the QueryMeta data for an RPC call 259 func (s *Server) setQueryMeta(m *structs.QueryMeta) { 260 if s.IsLeader() { 261 m.LastContact = 0 262 m.KnownLeader = true 263 } else { 264 m.LastContact = time.Now().Sub(s.raft.LastContact()) 265 m.KnownLeader = (s.raft.Leader() != "") 266 } 267 } 268 269 // blockingOptions is used to parameterize blockingRPC 270 type blockingOptions struct { 271 queryOpts *structs.QueryOptions 272 queryMeta *structs.QueryMeta 273 allocWatch string 274 run func() error 275 } 276 277 // blockingRPC is used for queries that need to wait for a 278 // minimum index. This is used to block and wait for changes. 279 func (s *Server) blockingRPC(opts *blockingOptions) error { 280 var timeout *time.Timer 281 var notifyCh chan struct{} 282 var state *state.StateStore 283 284 // Fast path non-blocking 285 if opts.queryOpts.MinQueryIndex == 0 { 286 goto RUN_QUERY 287 } 288 289 // Restrict the max query time, and ensure there is always one 290 if opts.queryOpts.MaxQueryTime > maxQueryTime { 291 opts.queryOpts.MaxQueryTime = maxQueryTime 292 } else if opts.queryOpts.MaxQueryTime <= 0 { 293 opts.queryOpts.MaxQueryTime = defaultQueryTime 294 } 295 296 // Apply a small amount of jitter to the request 297 opts.queryOpts.MaxQueryTime += randomStagger(opts.queryOpts.MaxQueryTime / jitterFraction) 298 299 // Setup a query timeout 300 timeout = time.NewTimer(opts.queryOpts.MaxQueryTime) 301 302 // Setup the notify channel 303 notifyCh = make(chan struct{}, 1) 304 305 // Ensure we tear down any watchers on return 306 state = s.fsm.State() 307 defer func() { 308 timeout.Stop() 309 if opts.allocWatch != "" { 310 state.StopWatchAllocs(opts.allocWatch, notifyCh) 311 } 312 }() 313 314 REGISTER_NOTIFY: 315 // Register the notification channel. This may be done 316 // multiple times if we have not reached the target wait index. 317 if opts.allocWatch != "" { 318 state.WatchAllocs(opts.allocWatch, notifyCh) 319 } 320 321 RUN_QUERY: 322 // Update the query meta data 323 s.setQueryMeta(opts.queryMeta) 324 325 // Run the query function 326 metrics.IncrCounter([]string{"nomad", "rpc", "query"}, 1) 327 err := opts.run() 328 329 // Check for minimum query time 330 if err == nil && opts.queryMeta.Index > 0 && opts.queryMeta.Index <= opts.queryOpts.MinQueryIndex { 331 select { 332 case <-notifyCh: 333 goto REGISTER_NOTIFY 334 case <-timeout.C: 335 } 336 } 337 return err 338 }