github.com/lazyledger/lazyledger-core@v0.35.0-dev.0.20210613111200-4c651f053571/rpc/jsonrpc/client/ws_client.go (about) 1 package client 2 3 import ( 4 "context" 5 "encoding/json" 6 "fmt" 7 mrand "math/rand" 8 "net" 9 "net/http" 10 "sync" 11 "time" 12 13 "github.com/gorilla/websocket" 14 metrics "github.com/rcrowley/go-metrics" 15 16 "github.com/lazyledger/lazyledger-core/libs/service" 17 tmsync "github.com/lazyledger/lazyledger-core/libs/sync" 18 types "github.com/lazyledger/lazyledger-core/rpc/jsonrpc/types" 19 ) 20 21 const ( 22 defaultMaxReconnectAttempts = 25 23 defaultWriteWait = 0 24 defaultReadWait = 0 25 defaultPingPeriod = 0 26 ) 27 28 // WSClient is a JSON-RPC client, which uses WebSocket for communication with 29 // the remote server. 30 // 31 // WSClient is safe for concurrent use by multiple goroutines. 32 type WSClient struct { // nolint: maligned 33 conn *websocket.Conn 34 35 Address string // IP:PORT or /path/to/socket 36 Endpoint string // /websocket/url/endpoint 37 Dialer func(string, string) (net.Conn, error) 38 39 // Single user facing channel to read RPCResponses from, closed only when the 40 // client is being stopped. 41 ResponsesCh chan types.RPCResponse 42 43 // Callback, which will be called each time after successful reconnect. 44 onReconnect func() 45 46 // internal channels 47 send chan types.RPCRequest // user requests 48 backlog chan types.RPCRequest // stores a single user request received during a conn failure 49 reconnectAfter chan error // reconnect requests 50 readRoutineQuit chan struct{} // a way for readRoutine to close writeRoutine 51 52 // Maximum reconnect attempts (0 or greater; default: 25). 53 maxReconnectAttempts int 54 55 // Support both ws and wss protocols 56 protocol string 57 58 wg sync.WaitGroup 59 60 mtx tmsync.RWMutex 61 sentLastPingAt time.Time 62 reconnecting bool 63 nextReqID int 64 // sentIDs map[types.JSONRPCIntID]bool // IDs of the requests currently in flight 65 66 // Time allowed to write a message to the server. 0 means block until operation succeeds. 67 writeWait time.Duration 68 69 // Time allowed to read the next message from the server. 0 means block until operation succeeds. 70 readWait time.Duration 71 72 // Send pings to server with this period. Must be less than readWait. If 0, no pings will be sent. 73 pingPeriod time.Duration 74 75 service.BaseService 76 77 // Time between sending a ping and receiving a pong. See 78 // https://godoc.org/github.com/rcrowley/go-metrics#Timer. 79 PingPongLatencyTimer metrics.Timer 80 } 81 82 // NewWS returns a new client. See the commentary on the func(*WSClient) 83 // functions for a detailed description of how to configure ping period and 84 // pong wait time. The endpoint argument must begin with a `/`. 85 // An error is returned on invalid remote. The function panics when remote is nil. 86 func NewWS(remoteAddr, endpoint string, options ...func(*WSClient)) (*WSClient, error) { 87 parsedURL, err := newParsedURL(remoteAddr) 88 if err != nil { 89 return nil, err 90 } 91 // default to ws protocol, unless wss is explicitly specified 92 if parsedURL.Scheme != protoWSS { 93 parsedURL.Scheme = protoWS 94 } 95 96 dialFn, err := makeHTTPDialer(remoteAddr) 97 if err != nil { 98 return nil, err 99 } 100 101 c := &WSClient{ 102 Address: parsedURL.GetTrimmedHostWithPath(), 103 Dialer: dialFn, 104 Endpoint: endpoint, 105 PingPongLatencyTimer: metrics.NewTimer(), 106 107 maxReconnectAttempts: defaultMaxReconnectAttempts, 108 readWait: defaultReadWait, 109 writeWait: defaultWriteWait, 110 pingPeriod: defaultPingPeriod, 111 protocol: parsedURL.Scheme, 112 113 // sentIDs: make(map[types.JSONRPCIntID]bool), 114 } 115 c.BaseService = *service.NewBaseService(nil, "WSClient", c) 116 for _, option := range options { 117 option(c) 118 } 119 return c, nil 120 } 121 122 // MaxReconnectAttempts sets the maximum number of reconnect attempts before returning an error. 123 // It should only be used in the constructor and is not Goroutine-safe. 124 func MaxReconnectAttempts(max int) func(*WSClient) { 125 return func(c *WSClient) { 126 c.maxReconnectAttempts = max 127 } 128 } 129 130 // ReadWait sets the amount of time to wait before a websocket read times out. 131 // It should only be used in the constructor and is not Goroutine-safe. 132 func ReadWait(readWait time.Duration) func(*WSClient) { 133 return func(c *WSClient) { 134 c.readWait = readWait 135 } 136 } 137 138 // WriteWait sets the amount of time to wait before a websocket write times out. 139 // It should only be used in the constructor and is not Goroutine-safe. 140 func WriteWait(writeWait time.Duration) func(*WSClient) { 141 return func(c *WSClient) { 142 c.writeWait = writeWait 143 } 144 } 145 146 // PingPeriod sets the duration for sending websocket pings. 147 // It should only be used in the constructor - not Goroutine-safe. 148 func PingPeriod(pingPeriod time.Duration) func(*WSClient) { 149 return func(c *WSClient) { 150 c.pingPeriod = pingPeriod 151 } 152 } 153 154 // OnReconnect sets the callback, which will be called every time after 155 // successful reconnect. 156 func OnReconnect(cb func()) func(*WSClient) { 157 return func(c *WSClient) { 158 c.onReconnect = cb 159 } 160 } 161 162 // String returns WS client full address. 163 func (c *WSClient) String() string { 164 return fmt.Sprintf("WSClient{%s (%s)}", c.Address, c.Endpoint) 165 } 166 167 // OnStart implements service.Service by dialing a server and creating read and 168 // write routines. 169 func (c *WSClient) OnStart() error { 170 err := c.dial() 171 if err != nil { 172 return err 173 } 174 175 c.ResponsesCh = make(chan types.RPCResponse) 176 177 c.send = make(chan types.RPCRequest) 178 // 1 additional error may come from the read/write 179 // goroutine depending on which failed first. 180 c.reconnectAfter = make(chan error, 1) 181 // capacity for 1 request. a user won't be able to send more because the send 182 // channel is unbuffered. 183 c.backlog = make(chan types.RPCRequest, 1) 184 185 c.startReadWriteRoutines() 186 go c.reconnectRoutine() 187 188 return nil 189 } 190 191 // Stop overrides service.Service#Stop. There is no other way to wait until Quit 192 // channel is closed. 193 func (c *WSClient) Stop() error { 194 if err := c.BaseService.Stop(); err != nil { 195 return err 196 } 197 // only close user-facing channels when we can't write to them 198 c.wg.Wait() 199 close(c.ResponsesCh) 200 201 return nil 202 } 203 204 // IsReconnecting returns true if the client is reconnecting right now. 205 func (c *WSClient) IsReconnecting() bool { 206 c.mtx.RLock() 207 defer c.mtx.RUnlock() 208 return c.reconnecting 209 } 210 211 // IsActive returns true if the client is running and not reconnecting. 212 func (c *WSClient) IsActive() bool { 213 return c.IsRunning() && !c.IsReconnecting() 214 } 215 216 // Send the given RPC request to the server. Results will be available on 217 // ResponsesCh, errors, if any, on ErrorsCh. Will block until send succeeds or 218 // ctx.Done is closed. 219 func (c *WSClient) Send(ctx context.Context, request types.RPCRequest) error { 220 select { 221 case c.send <- request: 222 c.Logger.Info("sent a request", "req", request) 223 // c.mtx.Lock() 224 // c.sentIDs[request.ID.(types.JSONRPCIntID)] = true 225 // c.mtx.Unlock() 226 return nil 227 case <-ctx.Done(): 228 return ctx.Err() 229 } 230 } 231 232 // Call enqueues a call request onto the Send queue. Requests are JSON encoded. 233 func (c *WSClient) Call(ctx context.Context, method string, params map[string]interface{}) error { 234 request, err := types.MapToRequest(c.nextRequestID(), method, params) 235 if err != nil { 236 return err 237 } 238 return c.Send(ctx, request) 239 } 240 241 // CallWithArrayParams enqueues a call request onto the Send queue. Params are 242 // in a form of array (e.g. []interface{}{"abcd"}). Requests are JSON encoded. 243 func (c *WSClient) CallWithArrayParams(ctx context.Context, method string, params []interface{}) error { 244 request, err := types.ArrayToRequest(c.nextRequestID(), method, params) 245 if err != nil { 246 return err 247 } 248 return c.Send(ctx, request) 249 } 250 251 // Private methods 252 253 func (c *WSClient) nextRequestID() types.JSONRPCIntID { 254 c.mtx.Lock() 255 id := c.nextReqID 256 c.nextReqID++ 257 c.mtx.Unlock() 258 return types.JSONRPCIntID(id) 259 } 260 261 func (c *WSClient) dial() error { 262 dialer := &websocket.Dialer{ 263 NetDial: c.Dialer, 264 Proxy: http.ProxyFromEnvironment, 265 } 266 rHeader := http.Header{} 267 conn, _, err := dialer.Dial(c.protocol+"://"+c.Address+c.Endpoint, rHeader) // nolint:bodyclose 268 if err != nil { 269 return err 270 } 271 c.conn = conn 272 return nil 273 } 274 275 // reconnect tries to redial up to maxReconnectAttempts with exponential 276 // backoff. 277 func (c *WSClient) reconnect() error { 278 attempt := 0 279 280 c.mtx.Lock() 281 c.reconnecting = true 282 c.mtx.Unlock() 283 defer func() { 284 c.mtx.Lock() 285 c.reconnecting = false 286 c.mtx.Unlock() 287 }() 288 289 for { 290 // nolint:gosec // G404: Use of weak random number generator 291 jitter := time.Duration(mrand.Float64() * float64(time.Second)) // 1s == (1e9 ns) 292 backoffDuration := jitter + ((1 << uint(attempt)) * time.Second) 293 294 c.Logger.Info("reconnecting", "attempt", attempt+1, "backoff_duration", backoffDuration) 295 time.Sleep(backoffDuration) 296 297 err := c.dial() 298 if err != nil { 299 c.Logger.Error("failed to redial", "err", err) 300 } else { 301 c.Logger.Info("reconnected") 302 if c.onReconnect != nil { 303 go c.onReconnect() 304 } 305 return nil 306 } 307 308 attempt++ 309 310 if attempt > c.maxReconnectAttempts { 311 return fmt.Errorf("reached maximum reconnect attempts: %w", err) 312 } 313 } 314 } 315 316 func (c *WSClient) startReadWriteRoutines() { 317 c.wg.Add(2) 318 c.readRoutineQuit = make(chan struct{}) 319 go c.readRoutine() 320 go c.writeRoutine() 321 } 322 323 func (c *WSClient) processBacklog() error { 324 select { 325 case request := <-c.backlog: 326 if c.writeWait > 0 { 327 if err := c.conn.SetWriteDeadline(time.Now().Add(c.writeWait)); err != nil { 328 c.Logger.Error("failed to set write deadline", "err", err) 329 } 330 } 331 if err := c.conn.WriteJSON(request); err != nil { 332 c.Logger.Error("failed to resend request", "err", err) 333 c.reconnectAfter <- err 334 // requeue request 335 c.backlog <- request 336 return err 337 } 338 c.Logger.Info("resend a request", "req", request) 339 default: 340 } 341 return nil 342 } 343 344 func (c *WSClient) reconnectRoutine() { 345 for { 346 select { 347 case originalError := <-c.reconnectAfter: 348 // wait until writeRoutine and readRoutine finish 349 c.wg.Wait() 350 if err := c.reconnect(); err != nil { 351 c.Logger.Error("failed to reconnect", "err", err, "original_err", originalError) 352 if err = c.Stop(); err != nil { 353 c.Logger.Error("failed to stop conn", "error", err) 354 } 355 356 return 357 } 358 // drain reconnectAfter 359 LOOP: 360 for { 361 select { 362 case <-c.reconnectAfter: 363 default: 364 break LOOP 365 } 366 } 367 err := c.processBacklog() 368 if err == nil { 369 c.startReadWriteRoutines() 370 } 371 372 case <-c.Quit(): 373 return 374 } 375 } 376 } 377 378 // The client ensures that there is at most one writer to a connection by 379 // executing all writes from this goroutine. 380 func (c *WSClient) writeRoutine() { 381 var ticker *time.Ticker 382 if c.pingPeriod > 0 { 383 // ticker with a predefined period 384 ticker = time.NewTicker(c.pingPeriod) 385 } else { 386 // ticker that never fires 387 ticker = &time.Ticker{C: make(<-chan time.Time)} 388 } 389 390 defer func() { 391 ticker.Stop() 392 c.conn.Close() 393 // err != nil { 394 // ignore error; it will trigger in tests 395 // likely because it's closing an already closed connection 396 // } 397 c.wg.Done() 398 }() 399 400 for { 401 select { 402 case request := <-c.send: 403 if c.writeWait > 0 { 404 if err := c.conn.SetWriteDeadline(time.Now().Add(c.writeWait)); err != nil { 405 c.Logger.Error("failed to set write deadline", "err", err) 406 } 407 } 408 if err := c.conn.WriteJSON(request); err != nil { 409 c.Logger.Error("failed to send request", "err", err) 410 c.reconnectAfter <- err 411 // add request to the backlog, so we don't lose it 412 c.backlog <- request 413 return 414 } 415 case <-ticker.C: 416 if c.writeWait > 0 { 417 if err := c.conn.SetWriteDeadline(time.Now().Add(c.writeWait)); err != nil { 418 c.Logger.Error("failed to set write deadline", "err", err) 419 } 420 } 421 if err := c.conn.WriteMessage(websocket.PingMessage, []byte{}); err != nil { 422 c.Logger.Error("failed to write ping", "err", err) 423 c.reconnectAfter <- err 424 return 425 } 426 c.mtx.Lock() 427 c.sentLastPingAt = time.Now() 428 c.mtx.Unlock() 429 c.Logger.Debug("sent ping") 430 case <-c.readRoutineQuit: 431 return 432 case <-c.Quit(): 433 if err := c.conn.WriteMessage( 434 websocket.CloseMessage, 435 websocket.FormatCloseMessage(websocket.CloseNormalClosure, ""), 436 ); err != nil { 437 c.Logger.Error("failed to write message", "err", err) 438 } 439 return 440 } 441 } 442 } 443 444 // The client ensures that there is at most one reader to a connection by 445 // executing all reads from this goroutine. 446 func (c *WSClient) readRoutine() { 447 defer func() { 448 c.conn.Close() 449 // err != nil { 450 // ignore error; it will trigger in tests 451 // likely because it's closing an already closed connection 452 // } 453 c.wg.Done() 454 }() 455 456 c.conn.SetPongHandler(func(string) error { 457 // gather latency stats 458 c.mtx.RLock() 459 t := c.sentLastPingAt 460 c.mtx.RUnlock() 461 c.PingPongLatencyTimer.UpdateSince(t) 462 463 c.Logger.Debug("got pong") 464 return nil 465 }) 466 467 for { 468 // reset deadline for every message type (control or data) 469 if c.readWait > 0 { 470 if err := c.conn.SetReadDeadline(time.Now().Add(c.readWait)); err != nil { 471 c.Logger.Error("failed to set read deadline", "err", err) 472 } 473 } 474 _, data, err := c.conn.ReadMessage() 475 if err != nil { 476 if !websocket.IsUnexpectedCloseError(err, websocket.CloseNormalClosure) { 477 return 478 } 479 480 c.Logger.Error("failed to read response", "err", err) 481 close(c.readRoutineQuit) 482 c.reconnectAfter <- err 483 return 484 } 485 486 var response types.RPCResponse 487 err = json.Unmarshal(data, &response) 488 if err != nil { 489 c.Logger.Error("failed to parse response", "err", err, "data", string(data)) 490 continue 491 } 492 493 if err = validateResponseID(response.ID); err != nil { 494 c.Logger.Error("error in response ID", "id", response.ID, "err", err) 495 continue 496 } 497 498 // TODO: events resulting from /subscribe do not work with -> 499 // because they are implemented as responses with the subscribe request's 500 // ID. According to the spec, they should be notifications (requests 501 // without IDs). 502 // https://github.com/tendermint/tendermint/issues/2949 503 // c.mtx.Lock() 504 // if _, ok := c.sentIDs[response.ID.(types.JSONRPCIntID)]; !ok { 505 // c.Logger.Error("unsolicited response ID", "id", response.ID, "expected", c.sentIDs) 506 // c.mtx.Unlock() 507 // continue 508 // } 509 // delete(c.sentIDs, response.ID.(types.JSONRPCIntID)) 510 // c.mtx.Unlock() 511 // Combine a non-blocking read on BaseService.Quit with a non-blocking write on ResponsesCh to avoid blocking 512 // c.wg.Wait() in c.Stop(). Note we rely on Quit being closed so that it sends unlimited Quit signals to stop 513 // both readRoutine and writeRoutine 514 515 c.Logger.Info("got response", "id", response.ID, "result", fmt.Sprintf("%X", response.Result)) 516 517 select { 518 case <-c.Quit(): 519 case c.ResponsesCh <- response: 520 } 521 } 522 } 523 524 // Predefined methods 525 526 // Subscribe to a query. Note the server must have a "subscribe" route 527 // defined. 528 func (c *WSClient) Subscribe(ctx context.Context, query string) error { 529 params := map[string]interface{}{"query": query} 530 return c.Call(ctx, "subscribe", params) 531 } 532 533 // Unsubscribe from a query. Note the server must have a "unsubscribe" route 534 // defined. 535 func (c *WSClient) Unsubscribe(ctx context.Context, query string) error { 536 params := map[string]interface{}{"query": query} 537 return c.Call(ctx, "unsubscribe", params) 538 } 539 540 // UnsubscribeAll from all. Note the server must have a "unsubscribe_all" route 541 // defined. 542 func (c *WSClient) UnsubscribeAll(ctx context.Context) error { 543 params := map[string]interface{}{} 544 return c.Call(ctx, "unsubscribe_all", params) 545 }