github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/client/rpc.go (about) 1 package client 2 3 import ( 4 "errors" 5 "io" 6 "net" 7 "net/rpc" 8 "strings" 9 "time" 10 11 metrics "github.com/armon/go-metrics" 12 "github.com/hashicorp/consul/lib" 13 "github.com/hashicorp/go-msgpack/codec" 14 "github.com/hashicorp/nomad/client/servers" 15 inmem "github.com/hashicorp/nomad/helper/codec" 16 "github.com/hashicorp/nomad/helper/pool" 17 "github.com/hashicorp/nomad/nomad/structs" 18 "github.com/hashicorp/yamux" 19 ) 20 21 // rpcEndpoints holds the RPC endpoints 22 type rpcEndpoints struct { 23 ClientStats *ClientStats 24 CSI *CSI 25 FileSystem *FileSystem 26 Allocations *Allocations 27 Agent *Agent 28 } 29 30 // ClientRPC is used to make a local, client only RPC call 31 func (c *Client) ClientRPC(method string, args interface{}, reply interface{}) error { 32 codec := &inmem.InmemCodec{ 33 Method: method, 34 Args: args, 35 Reply: reply, 36 } 37 if err := c.rpcServer.ServeRequest(codec); err != nil { 38 return err 39 } 40 return codec.Err 41 } 42 43 // StreamingRpcHandler is used to make a local, client only streaming RPC 44 // call. 45 func (c *Client) StreamingRpcHandler(method string) (structs.StreamingRpcHandler, error) { 46 return c.streamingRpcs.GetHandler(method) 47 } 48 49 // RPC is used to forward an RPC call to a nomad server, or fail if no servers. 50 func (c *Client) RPC(method string, args interface{}, reply interface{}) error { 51 // Invoke the RPCHandler if it exists 52 if c.config.RPCHandler != nil { 53 return c.config.RPCHandler.RPC(method, args, reply) 54 } 55 56 // We will try to automatically retry requests that fail due to things like server unavailability 57 // but instead of retrying forever, lets have a solid upper-bound 58 deadline := time.Now() 59 60 // A reasonable amount of time for leader election. Note when servers forward() our RPC requests 61 // to the leader they may also allow for an RPCHoldTimeout while waiting for leader election. 62 // That's OK, we won't double up because we are using it here not as a sleep but 63 // as a hint to give up 64 deadline = deadline.Add(c.config.RPCHoldTimeout) 65 66 // If its a blocking query, allow the time specified by the request 67 if info, ok := args.(structs.RPCInfo); ok { 68 deadline = deadline.Add(info.TimeToBlock()) 69 } 70 71 TRY: 72 server := c.servers.FindServer() 73 if server == nil { 74 return noServersErr 75 } 76 77 // Make the request. 78 rpcErr := c.connPool.RPC(c.Region(), server.Addr, c.RPCMajorVersion(), method, args, reply) 79 80 if rpcErr == nil { 81 c.fireRpcRetryWatcher() 82 return nil 83 } 84 85 // If shutting down, exit without logging the error 86 select { 87 case <-c.shutdownCh: 88 return nil 89 default: 90 } 91 92 // Move off to another server, and see if we can retry. 93 c.rpcLogger.Error("error performing RPC to server", "error", rpcErr, "rpc", method, "server", server.Addr) 94 c.servers.NotifyFailedServer(server) 95 96 if !canRetry(args, rpcErr) { 97 c.rpcLogger.Error("error performing RPC to server which is not safe to automatically retry", "error", rpcErr, "rpc", method, "server", server.Addr) 98 return rpcErr 99 } 100 if time.Now().After(deadline) { 101 // Blocking queries are tricky. jitters and rpcholdtimes in multiple places can result in our server call taking longer than we wanted it to. For example: 102 // a block time of 5s may easily turn into the server blocking for 10s since it applies its own RPCHoldTime. If the server dies at t=7s we still want to retry 103 // so before we give up on blocking queries make one last attempt for an immediate answer 104 if info, ok := args.(structs.RPCInfo); ok && info.TimeToBlock() > 0 { 105 info.SetTimeToBlock(0) 106 return c.RPC(method, args, reply) 107 } 108 c.rpcLogger.Error("error performing RPC to server, deadline exceeded, cannot retry", "error", rpcErr, "rpc", method, "server", server.Addr) 109 return rpcErr 110 } 111 112 // Wait to avoid thundering herd 113 select { 114 case <-time.After(lib.RandomStagger(c.config.RPCHoldTimeout / structs.JitterFraction)): 115 // If we are going to retry a blocking query we need to update the time to block so it finishes by our deadline. 116 if info, ok := args.(structs.RPCInfo); ok && info.TimeToBlock() > 0 { 117 newBlockTime := time.Until(deadline) 118 // We can get below 0 here on slow computers because we slept for jitter so at least try to get an immediate response 119 if newBlockTime < 0 { 120 newBlockTime = 0 121 } 122 info.SetTimeToBlock(newBlockTime) 123 return c.RPC(method, args, reply) 124 } 125 126 goto TRY 127 case <-c.shutdownCh: 128 } 129 return rpcErr 130 } 131 132 // canRetry returns true if the given situation is safe for a retry. 133 func canRetry(args interface{}, err error) bool { 134 // No leader errors are always safe to retry since no state could have 135 // been changed. 136 if structs.IsErrNoLeader(err) { 137 return true 138 } 139 140 // Reads are safe to retry for stream errors, such as if a server was 141 // being shut down. 142 info, ok := args.(structs.RPCInfo) 143 if ok && info.IsRead() && lib.IsErrEOF(err) { 144 return true 145 } 146 147 return false 148 } 149 150 // RemoteStreamingRpcHandler is used to make a streaming RPC call to a remote 151 // server. 152 func (c *Client) RemoteStreamingRpcHandler(method string) (structs.StreamingRpcHandler, error) { 153 server := c.servers.FindServer() 154 if server == nil { 155 return nil, noServersErr 156 } 157 158 conn, err := c.streamingRpcConn(server, method) 159 if err != nil { 160 // Move off to another server 161 c.rpcLogger.Error("error performing RPC to server", "error", err, "rpc", method, "server", server.Addr) 162 c.servers.NotifyFailedServer(server) 163 return nil, err 164 } 165 166 return bridgedStreamingRpcHandler(conn), nil 167 } 168 169 // bridgedStreamingRpcHandler creates a bridged streaming RPC handler by copying 170 // data between the two sides. 171 func bridgedStreamingRpcHandler(sideA io.ReadWriteCloser) structs.StreamingRpcHandler { 172 return func(sideB io.ReadWriteCloser) { 173 defer sideA.Close() 174 defer sideB.Close() 175 structs.Bridge(sideA, sideB) 176 } 177 } 178 179 // streamingRpcConn is used to retrieve a connection to a server to conduct a 180 // streaming RPC. 181 func (c *Client) streamingRpcConn(server *servers.Server, method string) (net.Conn, error) { 182 // Dial the server 183 conn, err := net.DialTimeout("tcp", server.Addr.String(), 10*time.Second) 184 if err != nil { 185 return nil, err 186 } 187 188 // Cast to TCPConn 189 if tcp, ok := conn.(*net.TCPConn); ok { 190 tcp.SetKeepAlive(true) 191 tcp.SetNoDelay(true) 192 } 193 194 // Check if TLS is enabled 195 c.tlsWrapLock.RLock() 196 tlsWrap := c.tlsWrap 197 c.tlsWrapLock.RUnlock() 198 199 if tlsWrap != nil { 200 // Switch the connection into TLS mode 201 if _, err := conn.Write([]byte{byte(pool.RpcTLS)}); err != nil { 202 conn.Close() 203 return nil, err 204 } 205 206 // Wrap the connection in a TLS client 207 tlsConn, err := tlsWrap(c.Region(), conn) 208 if err != nil { 209 conn.Close() 210 return nil, err 211 } 212 conn = tlsConn 213 } 214 215 // Write the multiplex byte to set the mode 216 if _, err := conn.Write([]byte{byte(pool.RpcStreaming)}); err != nil { 217 conn.Close() 218 return nil, err 219 } 220 221 // Send the header 222 encoder := codec.NewEncoder(conn, structs.MsgpackHandle) 223 decoder := codec.NewDecoder(conn, structs.MsgpackHandle) 224 header := structs.StreamingRpcHeader{ 225 Method: method, 226 } 227 if err := encoder.Encode(header); err != nil { 228 conn.Close() 229 return nil, err 230 } 231 232 // Wait for the acknowledgement 233 var ack structs.StreamingRpcAck 234 if err := decoder.Decode(&ack); err != nil { 235 conn.Close() 236 return nil, err 237 } 238 239 if ack.Error != "" { 240 conn.Close() 241 return nil, errors.New(ack.Error) 242 } 243 244 return conn, nil 245 } 246 247 // setupClientRpc is used to setup the Client's RPC endpoints 248 func (c *Client) setupClientRpc() { 249 // Initialize the RPC handlers 250 c.endpoints.ClientStats = &ClientStats{c} 251 c.endpoints.CSI = &CSI{c} 252 c.endpoints.FileSystem = NewFileSystemEndpoint(c) 253 c.endpoints.Allocations = NewAllocationsEndpoint(c) 254 c.endpoints.Agent = NewAgentEndpoint(c) 255 256 // Create the RPC Server 257 c.rpcServer = rpc.NewServer() 258 259 // Register the endpoints with the RPC server 260 c.setupClientRpcServer(c.rpcServer) 261 262 go c.rpcConnListener() 263 } 264 265 // setupClientRpcServer is used to populate a client RPC server with endpoints. 266 func (c *Client) setupClientRpcServer(server *rpc.Server) { 267 // Register the endpoints 268 server.Register(c.endpoints.ClientStats) 269 server.Register(c.endpoints.CSI) 270 server.Register(c.endpoints.FileSystem) 271 server.Register(c.endpoints.Allocations) 272 server.Register(c.endpoints.Agent) 273 } 274 275 // rpcConnListener is a long lived function that listens for new connections 276 // being made on the connection pool and starts an RPC listener for each 277 // connection. 278 func (c *Client) rpcConnListener() { 279 // Make a channel for new connections. 280 conns := make(chan *yamux.Session, 4) 281 c.connPool.SetConnListener(conns) 282 283 for { 284 select { 285 case <-c.shutdownCh: 286 return 287 case session, ok := <-conns: 288 if !ok { 289 continue 290 } 291 292 go c.listenConn(session) 293 } 294 } 295 } 296 297 // listenConn is used to listen for connections being made from the server on 298 // pre-existing connection. This should be called in a goroutine. 299 func (c *Client) listenConn(s *yamux.Session) { 300 for { 301 conn, err := s.Accept() 302 if err != nil { 303 if s.IsClosed() { 304 return 305 } 306 307 c.rpcLogger.Error("failed to accept RPC conn", "error", err) 308 continue 309 } 310 311 go c.handleConn(conn) 312 metrics.IncrCounter([]string{"client", "rpc", "accept_conn"}, 1) 313 } 314 } 315 316 // handleConn is used to determine if this is a RPC or Streaming RPC connection and 317 // invoke the correct handler 318 func (c *Client) handleConn(conn net.Conn) { 319 // Read a single byte 320 buf := make([]byte, 1) 321 if _, err := conn.Read(buf); err != nil { 322 if err != io.EOF { 323 c.rpcLogger.Error("error reading byte", "error", err) 324 } 325 conn.Close() 326 return 327 } 328 329 // Switch on the byte 330 switch pool.RPCType(buf[0]) { 331 case pool.RpcNomad: 332 c.handleNomadConn(conn) 333 334 case pool.RpcStreaming: 335 c.handleStreamingConn(conn) 336 337 default: 338 c.rpcLogger.Error("unrecognized RPC byte", "byte", buf[0]) 339 conn.Close() 340 return 341 } 342 } 343 344 // handleNomadConn is used to handle a single Nomad RPC connection. 345 func (c *Client) handleNomadConn(conn net.Conn) { 346 defer conn.Close() 347 rpcCodec := pool.NewServerCodec(conn) 348 for { 349 select { 350 case <-c.shutdownCh: 351 return 352 default: 353 } 354 355 if err := c.rpcServer.ServeRequest(rpcCodec); err != nil { 356 if err != io.EOF && !strings.Contains(err.Error(), "closed") { 357 c.rpcLogger.Error("error performing RPC", "error", err, "addr", conn.RemoteAddr()) 358 metrics.IncrCounter([]string{"client", "rpc", "request_error"}, 1) 359 } 360 return 361 } 362 metrics.IncrCounter([]string{"client", "rpc", "request"}, 1) 363 } 364 } 365 366 // handleStreamingConn is used to handle a single Streaming Nomad RPC connection. 367 func (c *Client) handleStreamingConn(conn net.Conn) { 368 defer conn.Close() 369 370 // Decode the header 371 var header structs.StreamingRpcHeader 372 decoder := codec.NewDecoder(conn, structs.MsgpackHandle) 373 if err := decoder.Decode(&header); err != nil { 374 if err != io.EOF && !strings.Contains(err.Error(), "closed") { 375 c.rpcLogger.Error("error performing streaming RPC", "error", err, "addr", conn.RemoteAddr()) 376 metrics.IncrCounter([]string{"client", "streaming_rpc", "request_error"}, 1) 377 } 378 379 return 380 } 381 382 ack := structs.StreamingRpcAck{} 383 handler, err := c.streamingRpcs.GetHandler(header.Method) 384 if err != nil { 385 c.rpcLogger.Error("streaming RPC error", "addr", conn.RemoteAddr(), "error", err) 386 metrics.IncrCounter([]string{"client", "streaming_rpc", "request_error"}, 1) 387 ack.Error = err.Error() 388 } 389 390 // Send the acknowledgement 391 encoder := codec.NewEncoder(conn, structs.MsgpackHandle) 392 if err := encoder.Encode(ack); err != nil { 393 conn.Close() 394 return 395 } 396 397 if ack.Error != "" { 398 return 399 } 400 401 // Invoke the handler 402 metrics.IncrCounter([]string{"client", "streaming_rpc", "request"}, 1) 403 handler(conn) 404 } 405 406 // resolveServer given a sever's address as a string, return it's resolved 407 // net.Addr or an error. 408 func resolveServer(s string) (net.Addr, error) { 409 const defaultClientPort = "4647" // default client RPC port 410 host, port, err := net.SplitHostPort(s) 411 if err != nil { 412 if strings.Contains(err.Error(), "missing port") { 413 host = s 414 port = defaultClientPort 415 } else { 416 return nil, err 417 } 418 } 419 return net.ResolveTCPAddr("tcp", net.JoinHostPort(host, port)) 420 } 421 422 // Ping is used to ping a particular server and returns whether it is healthy or 423 // a potential error. 424 func (c *Client) Ping(srv net.Addr) error { 425 var reply struct{} 426 err := c.connPool.RPC(c.Region(), srv, c.RPCMajorVersion(), "Status.Ping", struct{}{}, &reply) 427 return err 428 } 429 430 // rpcRetryWatcher returns a channel that will be closed if an event happens 431 // such that we expect the next RPC to be successful. 432 func (c *Client) rpcRetryWatcher() <-chan struct{} { 433 c.rpcRetryLock.Lock() 434 defer c.rpcRetryLock.Unlock() 435 436 if c.rpcRetryCh == nil { 437 c.rpcRetryCh = make(chan struct{}) 438 } 439 440 return c.rpcRetryCh 441 } 442 443 // fireRpcRetryWatcher causes any RPC retryloops to retry their RPCs because we 444 // believe the will be successful. 445 func (c *Client) fireRpcRetryWatcher() { 446 c.rpcRetryLock.Lock() 447 defer c.rpcRetryLock.Unlock() 448 if c.rpcRetryCh != nil { 449 close(c.rpcRetryCh) 450 c.rpcRetryCh = nil 451 } 452 }