github.com/zoomfoo/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/nomad/rpc.go (about) 1 package nomad 2 3 import ( 4 "context" 5 "crypto/tls" 6 "crypto/x509" 7 "errors" 8 "fmt" 9 "io" 10 "math/rand" 11 "net" 12 "net/rpc" 13 "strings" 14 "time" 15 16 metrics "github.com/armon/go-metrics" 17 "github.com/hashicorp/consul/lib" 18 memdb "github.com/hashicorp/go-memdb" 19 "github.com/hashicorp/nomad/helper/pool" 20 "github.com/hashicorp/nomad/nomad/state" 21 "github.com/hashicorp/nomad/nomad/structs" 22 "github.com/hashicorp/raft" 23 "github.com/hashicorp/yamux" 24 "github.com/ugorji/go/codec" 25 ) 26 27 const ( 28 // maxQueryTime is used to bound the limit of a blocking query 29 maxQueryTime = 300 * time.Second 30 31 // defaultQueryTime is the amount of time we block waiting for a change 32 // if no time is specified. Previously we would wait the maxQueryTime. 33 defaultQueryTime = 300 * time.Second 34 35 // Warn if the Raft command is larger than this. 36 // If it's over 1MB something is probably being abusive. 37 raftWarnSize = 1024 * 1024 38 39 // enqueueLimit caps how long we will wait to enqueue 40 // a new Raft command. Something is probably wrong if this 41 // value is ever reached. However, it prevents us from blocking 42 // the requesting goroutine forever. 43 enqueueLimit = 30 * time.Second 44 ) 45 46 // RPCContext provides metadata about the RPC connection. 47 type RPCContext struct { 48 // Conn exposes the raw connection. 49 Conn net.Conn 50 51 // Session exposes the multiplexed connection session. 52 Session *yamux.Session 53 54 // TLS marks whether the RPC is over a TLS based connection 55 TLS bool 56 57 // VerifiedChains is is the Verified certificates presented by the incoming 58 // connection. 59 VerifiedChains [][]*x509.Certificate 60 61 // NodeID marks the NodeID that initiated the connection. 62 NodeID string 63 } 64 65 // listen is used to listen for incoming RPC connections 66 func (s *Server) listen(ctx context.Context) { 67 defer close(s.listenerCh) 68 for { 69 select { 70 case <-ctx.Done(): 71 s.logger.Println("[INFO] nomad.rpc: Closing server RPC connection") 72 return 73 default: 74 } 75 76 // Accept a connection 77 conn, err := s.rpcListener.Accept() 78 if err != nil { 79 if s.shutdown { 80 return 81 } 82 83 select { 84 case <-ctx.Done(): 85 return 86 default: 87 } 88 89 s.logger.Printf("[ERR] nomad.rpc: failed to accept RPC conn: %v", err) 90 continue 91 } 92 93 go s.handleConn(ctx, conn, &RPCContext{Conn: conn}) 94 metrics.IncrCounter([]string{"nomad", "rpc", "accept_conn"}, 1) 95 } 96 } 97 98 // handleConn is used to determine if this is a Raft or 99 // Nomad type RPC connection and invoke the correct handler 100 func (s *Server) handleConn(ctx context.Context, conn net.Conn, rpcCtx *RPCContext) { 101 // Read a single byte 102 buf := make([]byte, 1) 103 if _, err := conn.Read(buf); err != nil { 104 if err != io.EOF { 105 s.logger.Printf("[ERR] nomad.rpc: failed to read byte: %v", err) 106 } 107 conn.Close() 108 return 109 } 110 111 // Enforce TLS if EnableRPC is set 112 if s.config.TLSConfig.EnableRPC && !rpcCtx.TLS && pool.RPCType(buf[0]) != pool.RpcTLS { 113 if !s.config.TLSConfig.RPCUpgradeMode { 114 s.logger.Printf("[WARN] nomad.rpc: Non-TLS connection attempted from %s with RequireTLS set", conn.RemoteAddr().String()) 115 conn.Close() 116 return 117 } 118 } 119 120 // Switch on the byte 121 switch pool.RPCType(buf[0]) { 122 case pool.RpcNomad: 123 // Create an RPC Server and handle the request 124 server := rpc.NewServer() 125 s.setupRpcServer(server, rpcCtx) 126 s.handleNomadConn(ctx, conn, server) 127 128 // Remove any potential mapping between a NodeID to this connection and 129 // close the underlying connection. 130 s.removeNodeConn(rpcCtx) 131 132 case pool.RpcRaft: 133 metrics.IncrCounter([]string{"nomad", "rpc", "raft_handoff"}, 1) 134 s.raftLayer.Handoff(ctx, conn) 135 136 case pool.RpcMultiplex: 137 s.handleMultiplex(ctx, conn, rpcCtx) 138 139 case pool.RpcTLS: 140 if s.rpcTLS == nil { 141 s.logger.Printf("[WARN] nomad.rpc: TLS connection attempted, server not configured for TLS") 142 conn.Close() 143 return 144 } 145 conn = tls.Server(conn, s.rpcTLS) 146 147 // Force a handshake so we can get information about the TLS connection 148 // state. 149 tlsConn, ok := conn.(*tls.Conn) 150 if !ok { 151 s.logger.Printf("[ERR] nomad.rpc: expected TLS connection but got %T", conn) 152 conn.Close() 153 return 154 } 155 156 if err := tlsConn.Handshake(); err != nil { 157 s.logger.Printf("[WARN] nomad.rpc: failed TLS handshake from connection from %v: %v", tlsConn.RemoteAddr(), err) 158 conn.Close() 159 return 160 } 161 162 // Update the connection context with the fact that the connection is 163 // using TLS 164 rpcCtx.TLS = true 165 166 // Store the verified chains so they can be inspected later. 167 state := tlsConn.ConnectionState() 168 rpcCtx.VerifiedChains = state.VerifiedChains 169 170 s.handleConn(ctx, conn, rpcCtx) 171 172 case pool.RpcStreaming: 173 s.handleStreamingConn(conn) 174 175 case pool.RpcMultiplexV2: 176 s.handleMultiplexV2(ctx, conn, rpcCtx) 177 178 default: 179 s.logger.Printf("[ERR] nomad.rpc: unrecognized RPC byte: %v", buf[0]) 180 conn.Close() 181 return 182 } 183 } 184 185 // handleMultiplex is used to multiplex a single incoming connection 186 // using the Yamux multiplexer 187 func (s *Server) handleMultiplex(ctx context.Context, conn net.Conn, rpcCtx *RPCContext) { 188 defer func() { 189 // Remove any potential mapping between a NodeID to this connection and 190 // close the underlying connection. 191 s.removeNodeConn(rpcCtx) 192 conn.Close() 193 }() 194 195 conf := yamux.DefaultConfig() 196 conf.LogOutput = s.config.LogOutput 197 server, err := yamux.Server(conn, conf) 198 if err != nil { 199 s.logger.Printf("[ERR] nomad.rpc: multiplex failed to create yamux server: %v", err) 200 return 201 } 202 203 // Update the context to store the yamux session 204 rpcCtx.Session = server 205 206 // Create the RPC server for this connection 207 rpcServer := rpc.NewServer() 208 s.setupRpcServer(rpcServer, rpcCtx) 209 210 for { 211 // stop handling connections if context was cancelled 212 if ctx.Err() != nil { 213 return 214 } 215 216 sub, err := server.Accept() 217 if err != nil { 218 if err != io.EOF { 219 s.logger.Printf("[ERR] nomad.rpc: multiplex conn accept failed: %v", err) 220 } 221 return 222 } 223 go s.handleNomadConn(ctx, sub, rpcServer) 224 } 225 } 226 227 // handleNomadConn is used to service a single Nomad RPC connection 228 func (s *Server) handleNomadConn(ctx context.Context, conn net.Conn, server *rpc.Server) { 229 defer conn.Close() 230 rpcCodec := pool.NewServerCodec(conn) 231 for { 232 select { 233 case <-ctx.Done(): 234 s.logger.Println("[INFO] nomad.rpc: Closing server RPC connection") 235 return 236 case <-s.shutdownCh: 237 return 238 default: 239 } 240 241 if err := server.ServeRequest(rpcCodec); err != nil { 242 if err != io.EOF && !strings.Contains(err.Error(), "closed") { 243 s.logger.Printf("[ERR] nomad.rpc: RPC error: %v (%v)", err, conn) 244 metrics.IncrCounter([]string{"nomad", "rpc", "request_error"}, 1) 245 } 246 return 247 } 248 metrics.IncrCounter([]string{"nomad", "rpc", "request"}, 1) 249 } 250 } 251 252 // handleStreamingConn is used to handle a single Streaming Nomad RPC connection. 253 func (s *Server) handleStreamingConn(conn net.Conn) { 254 defer conn.Close() 255 256 // Decode the header 257 var header structs.StreamingRpcHeader 258 decoder := codec.NewDecoder(conn, structs.MsgpackHandle) 259 if err := decoder.Decode(&header); err != nil { 260 if err != io.EOF && !strings.Contains(err.Error(), "closed") { 261 s.logger.Printf("[ERR] nomad.rpc: Streaming RPC error: %v (%v)", err, conn) 262 metrics.IncrCounter([]string{"nomad", "streaming_rpc", "request_error"}, 1) 263 } 264 265 return 266 } 267 268 ack := structs.StreamingRpcAck{} 269 handler, err := s.streamingRpcs.GetHandler(header.Method) 270 if err != nil { 271 s.logger.Printf("[ERR] nomad.rpc: Streaming RPC error: %v (%v)", err, conn) 272 metrics.IncrCounter([]string{"nomad", "streaming_rpc", "request_error"}, 1) 273 ack.Error = err.Error() 274 } 275 276 // Send the acknowledgement 277 encoder := codec.NewEncoder(conn, structs.MsgpackHandle) 278 if err := encoder.Encode(ack); err != nil { 279 conn.Close() 280 return 281 } 282 283 if ack.Error != "" { 284 return 285 } 286 287 // Invoke the handler 288 metrics.IncrCounter([]string{"nomad", "streaming_rpc", "request"}, 1) 289 handler(conn) 290 } 291 292 // handleMultiplexV2 is used to multiplex a single incoming connection 293 // using the Yamux multiplexer. Version 2 handling allows a single connection to 294 // switch streams between regulars RPCs and Streaming RPCs. 295 func (s *Server) handleMultiplexV2(ctx context.Context, conn net.Conn, rpcCtx *RPCContext) { 296 defer func() { 297 // Remove any potential mapping between a NodeID to this connection and 298 // close the underlying connection. 299 s.removeNodeConn(rpcCtx) 300 conn.Close() 301 }() 302 303 conf := yamux.DefaultConfig() 304 conf.LogOutput = s.config.LogOutput 305 server, err := yamux.Server(conn, conf) 306 if err != nil { 307 s.logger.Printf("[ERR] nomad.rpc: multiplex_v2 failed to create yamux server: %v", err) 308 return 309 } 310 311 // Update the context to store the yamux session 312 rpcCtx.Session = server 313 314 // Create the RPC server for this connection 315 rpcServer := rpc.NewServer() 316 s.setupRpcServer(rpcServer, rpcCtx) 317 318 for { 319 // stop handling connections if context was cancelled 320 if ctx.Err() != nil { 321 return 322 } 323 324 // Accept a new stream 325 sub, err := server.Accept() 326 if err != nil { 327 if err != io.EOF { 328 s.logger.Printf("[ERR] nomad.rpc: multiplex_v2 conn accept failed: %v", err) 329 } 330 return 331 } 332 333 // Read a single byte 334 buf := make([]byte, 1) 335 if _, err := sub.Read(buf); err != nil { 336 if err != io.EOF { 337 s.logger.Printf("[ERR] nomad.rpc: multiplex_v2 failed to read byte: %v", err) 338 } 339 return 340 } 341 342 // Determine which handler to use 343 switch pool.RPCType(buf[0]) { 344 case pool.RpcNomad: 345 go s.handleNomadConn(ctx, sub, rpcServer) 346 case pool.RpcStreaming: 347 go s.handleStreamingConn(sub) 348 349 default: 350 s.logger.Printf("[ERR] nomad.rpc: multiplex_v2 unrecognized RPC byte: %v", buf[0]) 351 return 352 } 353 } 354 355 } 356 357 // forward is used to forward to a remote region or to forward to the local leader 358 // Returns a bool of if forwarding was performed, as well as any error 359 func (s *Server) forward(method string, info structs.RPCInfo, args interface{}, reply interface{}) (bool, error) { 360 var firstCheck time.Time 361 362 region := info.RequestRegion() 363 if region == "" { 364 return true, fmt.Errorf("missing target RPC") 365 } 366 367 // Handle region forwarding 368 if region != s.config.Region { 369 // Mark that we are forwarding the RPC 370 info.SetForwarded() 371 err := s.forwardRegion(region, method, args, reply) 372 return true, err 373 } 374 375 // Check if we can allow a stale read 376 if info.IsRead() && info.AllowStaleRead() { 377 return false, nil 378 } 379 380 CHECK_LEADER: 381 // Find the leader 382 isLeader, remoteServer := s.getLeader() 383 384 // Handle the case we are the leader 385 if isLeader { 386 return false, nil 387 } 388 389 // Handle the case of a known leader 390 if remoteServer != nil { 391 // Mark that we are forwarding the RPC 392 info.SetForwarded() 393 err := s.forwardLeader(remoteServer, method, args, reply) 394 return true, err 395 } 396 397 // Gate the request until there is a leader 398 if firstCheck.IsZero() { 399 firstCheck = time.Now() 400 } 401 if time.Now().Sub(firstCheck) < s.config.RPCHoldTimeout { 402 jitter := lib.RandomStagger(s.config.RPCHoldTimeout / structs.JitterFraction) 403 select { 404 case <-time.After(jitter): 405 goto CHECK_LEADER 406 case <-s.shutdownCh: 407 } 408 } 409 410 // No leader found and hold time exceeded 411 return true, structs.ErrNoLeader 412 } 413 414 // getLeader returns if the current node is the leader, and if not 415 // then it returns the leader which is potentially nil if the cluster 416 // has not yet elected a leader. 417 func (s *Server) getLeader() (bool, *serverParts) { 418 // Check if we are the leader 419 if s.IsLeader() { 420 return true, nil 421 } 422 423 // Get the leader 424 leader := s.raft.Leader() 425 if leader == "" { 426 return false, nil 427 } 428 429 // Lookup the server 430 s.peerLock.RLock() 431 server := s.localPeers[leader] 432 s.peerLock.RUnlock() 433 434 // Server could be nil 435 return false, server 436 } 437 438 // forwardLeader is used to forward an RPC call to the leader, or fail if no leader 439 func (s *Server) forwardLeader(server *serverParts, method string, args interface{}, reply interface{}) error { 440 // Handle a missing server 441 if server == nil { 442 return structs.ErrNoLeader 443 } 444 return s.connPool.RPC(s.config.Region, server.Addr, server.MajorVersion, method, args, reply) 445 } 446 447 // forwardServer is used to forward an RPC call to a particular server 448 func (s *Server) forwardServer(server *serverParts, method string, args interface{}, reply interface{}) error { 449 // Handle a missing server 450 if server == nil { 451 return errors.New("must be given a valid server address") 452 } 453 return s.connPool.RPC(s.config.Region, server.Addr, server.MajorVersion, method, args, reply) 454 } 455 456 // forwardRegion is used to forward an RPC call to a remote region, or fail if no servers 457 func (s *Server) forwardRegion(region, method string, args interface{}, reply interface{}) error { 458 // Bail if we can't find any servers 459 s.peerLock.RLock() 460 servers := s.peers[region] 461 if len(servers) == 0 { 462 s.peerLock.RUnlock() 463 s.logger.Printf("[WARN] nomad.rpc: RPC request for region '%s', no path found", 464 region) 465 return structs.ErrNoRegionPath 466 } 467 468 // Select a random addr 469 offset := rand.Intn(len(servers)) 470 server := servers[offset] 471 s.peerLock.RUnlock() 472 473 // Forward to remote Nomad 474 metrics.IncrCounter([]string{"nomad", "rpc", "cross-region", region}, 1) 475 return s.connPool.RPC(region, server.Addr, server.MajorVersion, method, args, reply) 476 } 477 478 // streamingRpc creates a connection to the given server and conducts the 479 // initial handshake, returning the connection or an error. It is the callers 480 // responsibility to close the connection if there is no returned error. 481 func (s *Server) streamingRpc(server *serverParts, method string) (net.Conn, error) { 482 // Try to dial the server 483 conn, err := net.DialTimeout("tcp", server.Addr.String(), 10*time.Second) 484 if err != nil { 485 return nil, err 486 } 487 488 // Cast to TCPConn 489 if tcp, ok := conn.(*net.TCPConn); ok { 490 tcp.SetKeepAlive(true) 491 tcp.SetNoDelay(true) 492 } 493 494 if err := s.streamingRpcImpl(conn, server.Region, method); err != nil { 495 return nil, err 496 } 497 498 return conn, nil 499 } 500 501 // streamingRpcImpl takes a pre-established connection to a server and conducts 502 // the handshake to establish a streaming RPC for the given method. If an error 503 // is returned, the underlying connection has been closed. Otherwise it is 504 // assumed that the connection has been hijacked by the RPC method. 505 func (s *Server) streamingRpcImpl(conn net.Conn, region, method string) error { 506 // Check if TLS is enabled 507 s.tlsWrapLock.RLock() 508 tlsWrap := s.tlsWrap 509 s.tlsWrapLock.RUnlock() 510 511 if tlsWrap != nil { 512 // Switch the connection into TLS mode 513 if _, err := conn.Write([]byte{byte(pool.RpcTLS)}); err != nil { 514 conn.Close() 515 return err 516 } 517 518 // Wrap the connection in a TLS client 519 tlsConn, err := tlsWrap(region, conn) 520 if err != nil { 521 conn.Close() 522 return err 523 } 524 conn = tlsConn 525 } 526 527 // Write the multiplex byte to set the mode 528 if _, err := conn.Write([]byte{byte(pool.RpcStreaming)}); err != nil { 529 conn.Close() 530 return err 531 } 532 533 // Send the header 534 encoder := codec.NewEncoder(conn, structs.MsgpackHandle) 535 decoder := codec.NewDecoder(conn, structs.MsgpackHandle) 536 header := structs.StreamingRpcHeader{ 537 Method: method, 538 } 539 if err := encoder.Encode(header); err != nil { 540 conn.Close() 541 return err 542 } 543 544 // Wait for the acknowledgement 545 var ack structs.StreamingRpcAck 546 if err := decoder.Decode(&ack); err != nil { 547 conn.Close() 548 return err 549 } 550 551 if ack.Error != "" { 552 conn.Close() 553 return errors.New(ack.Error) 554 } 555 556 return nil 557 } 558 559 // raftApplyFuture is used to encode a message, run it through raft, and return the Raft future. 560 func (s *Server) raftApplyFuture(t structs.MessageType, msg interface{}) (raft.ApplyFuture, error) { 561 buf, err := structs.Encode(t, msg) 562 if err != nil { 563 return nil, fmt.Errorf("Failed to encode request: %v", err) 564 } 565 566 // Warn if the command is very large 567 if n := len(buf); n > raftWarnSize { 568 s.logger.Printf("[WARN] nomad: Attempting to apply large raft entry (type %d) (%d bytes)", t, n) 569 } 570 571 future := s.raft.Apply(buf, enqueueLimit) 572 return future, nil 573 } 574 575 // raftApplyFn is the function signature for applying a msg to Raft 576 type raftApplyFn func(t structs.MessageType, msg interface{}) (interface{}, uint64, error) 577 578 // raftApply is used to encode a message, run it through raft, and return 579 // the FSM response along with any errors 580 func (s *Server) raftApply(t structs.MessageType, msg interface{}) (interface{}, uint64, error) { 581 future, err := s.raftApplyFuture(t, msg) 582 if err != nil { 583 return nil, 0, err 584 } 585 if err := future.Error(); err != nil { 586 return nil, 0, err 587 } 588 return future.Response(), future.Index(), nil 589 } 590 591 // setQueryMeta is used to populate the QueryMeta data for an RPC call 592 func (s *Server) setQueryMeta(m *structs.QueryMeta) { 593 if s.IsLeader() { 594 m.LastContact = 0 595 m.KnownLeader = true 596 } else { 597 m.LastContact = time.Now().Sub(s.raft.LastContact()) 598 m.KnownLeader = (s.raft.Leader() != "") 599 } 600 } 601 602 // queryFn is used to perform a query operation. If a re-query is needed, the 603 // passed-in watch set will be used to block for changes. The passed-in state 604 // store should be used (vs. calling fsm.State()) since the given state store 605 // will be correctly watched for changes if the state store is restored from 606 // a snapshot. 607 type queryFn func(memdb.WatchSet, *state.StateStore) error 608 609 // blockingOptions is used to parameterize blockingRPC 610 type blockingOptions struct { 611 queryOpts *structs.QueryOptions 612 queryMeta *structs.QueryMeta 613 run queryFn 614 } 615 616 // blockingRPC is used for queries that need to wait for a 617 // minimum index. This is used to block and wait for changes. 618 func (s *Server) blockingRPC(opts *blockingOptions) error { 619 ctx := context.Background() 620 var cancel context.CancelFunc 621 var state *state.StateStore 622 623 // Fast path non-blocking 624 if opts.queryOpts.MinQueryIndex == 0 { 625 goto RUN_QUERY 626 } 627 628 // Restrict the max query time, and ensure there is always one 629 if opts.queryOpts.MaxQueryTime > maxQueryTime { 630 opts.queryOpts.MaxQueryTime = maxQueryTime 631 } else if opts.queryOpts.MaxQueryTime <= 0 { 632 opts.queryOpts.MaxQueryTime = defaultQueryTime 633 } 634 635 // Apply a small amount of jitter to the request 636 opts.queryOpts.MaxQueryTime += lib.RandomStagger(opts.queryOpts.MaxQueryTime / structs.JitterFraction) 637 638 // Setup a query timeout 639 ctx, cancel = context.WithTimeout(context.Background(), opts.queryOpts.MaxQueryTime) 640 defer cancel() 641 642 RUN_QUERY: 643 // Update the query meta data 644 s.setQueryMeta(opts.queryMeta) 645 646 // Increment the rpc query counter 647 metrics.IncrCounter([]string{"nomad", "rpc", "query"}, 1) 648 649 // We capture the state store and its abandon channel but pass a snapshot to 650 // the blocking query function. We operate on the snapshot to allow separate 651 // calls to the state store not all wrapped within the same transaction. 652 state = s.fsm.State() 653 abandonCh := state.AbandonCh() 654 snap, _ := state.Snapshot() 655 stateSnap := &snap.StateStore 656 657 // We can skip all watch tracking if this isn't a blocking query. 658 var ws memdb.WatchSet 659 if opts.queryOpts.MinQueryIndex > 0 { 660 ws = memdb.NewWatchSet() 661 662 // This channel will be closed if a snapshot is restored and the 663 // whole state store is abandoned. 664 ws.Add(abandonCh) 665 } 666 667 // Block up to the timeout if we didn't see anything fresh. 668 err := opts.run(ws, stateSnap) 669 670 // Check for minimum query time 671 if err == nil && opts.queryOpts.MinQueryIndex > 0 && opts.queryMeta.Index <= opts.queryOpts.MinQueryIndex { 672 if err := ws.WatchCtx(ctx); err == nil { 673 goto RUN_QUERY 674 } 675 } 676 return err 677 }