github.com/okex/exchain@v1.8.0/libs/tendermint/rpc/jsonrpc/client/ws_client.go (about) 1 package client 2 3 import ( 4 "context" 5 "encoding/json" 6 "fmt" 7 "net" 8 "net/http" 9 "sync" 10 "time" 11 12 "github.com/gorilla/websocket" 13 "github.com/pkg/errors" 14 metrics "github.com/rcrowley/go-metrics" 15 16 amino "github.com/tendermint/go-amino" 17 18 tmrand "github.com/okex/exchain/libs/tendermint/libs/rand" 19 "github.com/okex/exchain/libs/tendermint/libs/service" 20 types "github.com/okex/exchain/libs/tendermint/rpc/jsonrpc/types" 21 ) 22 23 const ( 24 defaultMaxReconnectAttempts = 25 25 defaultWriteWait = 0 26 defaultReadWait = 0 27 defaultPingPeriod = 0 28 ) 29 30 // WSClient is a JSON-RPC client, which uses WebSocket for communication with 31 // the remote server. 32 // 33 // WSClient is safe for concurrent use by multiple goroutines. 34 type WSClient struct { // nolint: maligned 35 conn *websocket.Conn 36 cdc *amino.Codec 37 38 Address string // IP:PORT or /path/to/socket 39 Endpoint string // /websocket/url/endpoint 40 Dialer func(string, string) (net.Conn, error) 41 42 // Single user facing channel to read RPCResponses from, closed only when the 43 // client is being stopped. 44 ResponsesCh chan types.RPCResponse 45 46 // Callback, which will be called each time after successful reconnect. 47 onReconnect func() 48 49 // internal channels 50 send chan types.RPCRequest // user requests 51 backlog chan types.RPCRequest // stores a single user request received during a conn failure 52 reconnectAfter chan error // reconnect requests 53 readRoutineQuit chan struct{} // a way for readRoutine to close writeRoutine 54 55 // Maximum reconnect attempts (0 or greater; default: 25). 56 maxReconnectAttempts int 57 58 // Support both ws and wss protocols 59 protocol string 60 61 wg sync.WaitGroup 62 63 mtx sync.RWMutex 64 sentLastPingAt time.Time 65 reconnecting bool 66 nextReqID int 67 // sentIDs map[types.JSONRPCIntID]bool // IDs of the requests currently in flight 68 69 // Time allowed to write a message to the server. 0 means block until operation succeeds. 70 writeWait time.Duration 71 72 // Time allowed to read the next message from the server. 0 means block until operation succeeds. 73 readWait time.Duration 74 75 // Send pings to server with this period. Must be less than readWait. If 0, no pings will be sent. 76 pingPeriod time.Duration 77 78 service.BaseService 79 80 // Time between sending a ping and receiving a pong. See 81 // https://godoc.org/github.com/rcrowley/go-metrics#Timer. 82 PingPongLatencyTimer metrics.Timer 83 } 84 85 // NewWS returns a new client. See the commentary on the func(*WSClient) 86 // functions for a detailed description of how to configure ping period and 87 // pong wait time. The endpoint argument must begin with a `/`. 88 // An error is returned on invalid remote. The function panics when remote is nil. 89 func NewWS(remoteAddr, endpoint string, options ...func(*WSClient)) (*WSClient, error) { 90 parsedURL, err := newParsedURL(remoteAddr) 91 if err != nil { 92 return nil, err 93 } 94 // default to ws protocol, unless wss is explicitly specified 95 if parsedURL.Scheme != protoWSS { 96 parsedURL.Scheme = protoWS 97 } 98 99 dialFn, err := makeHTTPDialer(remoteAddr) 100 if err != nil { 101 return nil, err 102 } 103 104 c := &WSClient{ 105 cdc: amino.NewCodec(), 106 Address: parsedURL.GetTrimmedHostWithPath(), 107 Dialer: dialFn, 108 Endpoint: endpoint, 109 PingPongLatencyTimer: metrics.NewTimer(), 110 111 maxReconnectAttempts: defaultMaxReconnectAttempts, 112 readWait: defaultReadWait, 113 writeWait: defaultWriteWait, 114 pingPeriod: defaultPingPeriod, 115 protocol: parsedURL.Scheme, 116 117 // sentIDs: make(map[types.JSONRPCIntID]bool), 118 } 119 c.BaseService = *service.NewBaseService(nil, "WSClient", c) 120 for _, option := range options { 121 option(c) 122 } 123 return c, nil 124 } 125 126 // MaxReconnectAttempts sets the maximum number of reconnect attempts before returning an error. 127 // It should only be used in the constructor and is not Goroutine-safe. 128 func MaxReconnectAttempts(max int) func(*WSClient) { 129 return func(c *WSClient) { 130 c.maxReconnectAttempts = max 131 } 132 } 133 134 // ReadWait sets the amount of time to wait before a websocket read times out. 135 // It should only be used in the constructor and is not Goroutine-safe. 136 func ReadWait(readWait time.Duration) func(*WSClient) { 137 return func(c *WSClient) { 138 c.readWait = readWait 139 } 140 } 141 142 // WriteWait sets the amount of time to wait before a websocket write times out. 143 // It should only be used in the constructor and is not Goroutine-safe. 144 func WriteWait(writeWait time.Duration) func(*WSClient) { 145 return func(c *WSClient) { 146 c.writeWait = writeWait 147 } 148 } 149 150 // PingPeriod sets the duration for sending websocket pings. 151 // It should only be used in the constructor - not Goroutine-safe. 152 func PingPeriod(pingPeriod time.Duration) func(*WSClient) { 153 return func(c *WSClient) { 154 c.pingPeriod = pingPeriod 155 } 156 } 157 158 // OnReconnect sets the callback, which will be called every time after 159 // successful reconnect. 160 func OnReconnect(cb func()) func(*WSClient) { 161 return func(c *WSClient) { 162 c.onReconnect = cb 163 } 164 } 165 166 // String returns WS client full address. 167 func (c *WSClient) String() string { 168 return fmt.Sprintf("WSClient{%s (%s)}", c.Address, c.Endpoint) 169 } 170 171 // OnStart implements service.Service by dialing a server and creating read and 172 // write routines. 173 func (c *WSClient) OnStart() error { 174 err := c.dial() 175 if err != nil { 176 return err 177 } 178 179 c.ResponsesCh = make(chan types.RPCResponse) 180 181 c.send = make(chan types.RPCRequest) 182 // 1 additional error may come from the read/write 183 // goroutine depending on which failed first. 184 c.reconnectAfter = make(chan error, 1) 185 // capacity for 1 request. a user won't be able to send more because the send 186 // channel is unbuffered. 187 c.backlog = make(chan types.RPCRequest, 1) 188 189 c.startReadWriteRoutines() 190 go c.reconnectRoutine() 191 192 return nil 193 } 194 195 // Stop overrides service.Service#Stop. There is no other way to wait until Quit 196 // channel is closed. 197 func (c *WSClient) Stop() error { 198 if err := c.BaseService.Stop(); err != nil { 199 return err 200 } 201 // only close user-facing channels when we can't write to them 202 c.wg.Wait() 203 close(c.ResponsesCh) 204 205 return nil 206 } 207 208 // IsReconnecting returns true if the client is reconnecting right now. 209 func (c *WSClient) IsReconnecting() bool { 210 c.mtx.RLock() 211 defer c.mtx.RUnlock() 212 return c.reconnecting 213 } 214 215 // IsActive returns true if the client is running and not reconnecting. 216 func (c *WSClient) IsActive() bool { 217 return c.IsRunning() && !c.IsReconnecting() 218 } 219 220 // Send the given RPC request to the server. Results will be available on 221 // ResponsesCh, errors, if any, on ErrorsCh. Will block until send succeeds or 222 // ctx.Done is closed. 223 func (c *WSClient) Send(ctx context.Context, request types.RPCRequest) error { 224 select { 225 case c.send <- request: 226 c.Logger.Info("sent a request", "req", request) 227 // c.mtx.Lock() 228 // c.sentIDs[request.ID.(types.JSONRPCIntID)] = true 229 // c.mtx.Unlock() 230 return nil 231 case <-ctx.Done(): 232 return ctx.Err() 233 } 234 } 235 236 // Call enqueues a call request onto the Send queue. Requests are JSON encoded. 237 func (c *WSClient) Call(ctx context.Context, method string, params map[string]interface{}) error { 238 request, err := types.MapToRequest(c.cdc, c.nextRequestID(), method, params) 239 if err != nil { 240 return err 241 } 242 return c.Send(ctx, request) 243 } 244 245 // CallWithArrayParams enqueues a call request onto the Send queue. Params are 246 // in a form of array (e.g. []interface{}{"abcd"}). Requests are JSON encoded. 247 func (c *WSClient) CallWithArrayParams(ctx context.Context, method string, params []interface{}) error { 248 request, err := types.ArrayToRequest(c.cdc, c.nextRequestID(), method, params) 249 if err != nil { 250 return err 251 } 252 return c.Send(ctx, request) 253 } 254 255 func (c *WSClient) Codec() *amino.Codec { return c.cdc } 256 func (c *WSClient) SetCodec(cdc *amino.Codec) { c.cdc = cdc } 257 258 /////////////////////////////////////////////////////////////////////////////// 259 // Private methods 260 261 func (c *WSClient) nextRequestID() types.JSONRPCIntID { 262 c.mtx.Lock() 263 id := c.nextReqID 264 c.nextReqID++ 265 c.mtx.Unlock() 266 return types.JSONRPCIntID(id) 267 } 268 269 func (c *WSClient) dial() error { 270 dialer := &websocket.Dialer{ 271 NetDial: c.Dialer, 272 Proxy: http.ProxyFromEnvironment, 273 } 274 rHeader := http.Header{} 275 conn, _, err := dialer.Dial(c.protocol+"://"+c.Address+c.Endpoint, rHeader) // nolint:bodyclose 276 if err != nil { 277 return err 278 } 279 c.conn = conn 280 return nil 281 } 282 283 // reconnect tries to redial up to maxReconnectAttempts with exponential 284 // backoff. 285 func (c *WSClient) reconnect() error { 286 attempt := 0 287 288 c.mtx.Lock() 289 c.reconnecting = true 290 c.mtx.Unlock() 291 defer func() { 292 c.mtx.Lock() 293 c.reconnecting = false 294 c.mtx.Unlock() 295 }() 296 297 for { 298 jitterSeconds := time.Duration(tmrand.Float64() * float64(time.Second)) // 1s == (1e9 ns) 299 backoffDuration := jitterSeconds + ((1 << uint(attempt)) * time.Second) 300 301 c.Logger.Info("reconnecting", "attempt", attempt+1, "backoff_duration", backoffDuration) 302 time.Sleep(backoffDuration) 303 304 err := c.dial() 305 if err != nil { 306 c.Logger.Error("failed to redial", "err", err) 307 } else { 308 c.Logger.Info("reconnected") 309 if c.onReconnect != nil { 310 go c.onReconnect() 311 } 312 return nil 313 } 314 315 attempt++ 316 317 if attempt > c.maxReconnectAttempts { 318 return errors.Wrap(err, "reached maximum reconnect attempts") 319 } 320 } 321 } 322 323 func (c *WSClient) startReadWriteRoutines() { 324 c.wg.Add(2) 325 c.readRoutineQuit = make(chan struct{}) 326 go c.readRoutine() 327 go c.writeRoutine() 328 } 329 330 func (c *WSClient) processBacklog() error { 331 select { 332 case request := <-c.backlog: 333 if c.writeWait > 0 { 334 if err := c.conn.SetWriteDeadline(time.Now().Add(c.writeWait)); err != nil { 335 c.Logger.Error("failed to set write deadline", "err", err) 336 } 337 } 338 if err := c.conn.WriteJSON(request); err != nil { 339 c.Logger.Error("failed to resend request", "err", err) 340 c.reconnectAfter <- err 341 // requeue request 342 c.backlog <- request 343 return err 344 } 345 c.Logger.Info("resend a request", "req", request) 346 default: 347 } 348 return nil 349 } 350 351 func (c *WSClient) reconnectRoutine() { 352 for { 353 select { 354 case originalError := <-c.reconnectAfter: 355 // wait until writeRoutine and readRoutine finish 356 c.wg.Wait() 357 if err := c.reconnect(); err != nil { 358 c.Logger.Error("failed to reconnect", "err", err, "original_err", originalError) 359 c.Stop() 360 return 361 } 362 // drain reconnectAfter 363 LOOP: 364 for { 365 select { 366 case <-c.reconnectAfter: 367 default: 368 break LOOP 369 } 370 } 371 err := c.processBacklog() 372 if err == nil { 373 c.startReadWriteRoutines() 374 } 375 376 case <-c.Quit(): 377 return 378 } 379 } 380 } 381 382 // The client ensures that there is at most one writer to a connection by 383 // executing all writes from this goroutine. 384 func (c *WSClient) writeRoutine() { 385 var ticker *time.Ticker 386 if c.pingPeriod > 0 { 387 // ticker with a predefined period 388 ticker = time.NewTicker(c.pingPeriod) 389 } else { 390 // ticker that never fires 391 ticker = &time.Ticker{C: make(<-chan time.Time)} 392 } 393 394 defer func() { 395 ticker.Stop() 396 c.conn.Close() 397 // err != nil { 398 // ignore error; it will trigger in tests 399 // likely because it's closing an already closed connection 400 // } 401 c.wg.Done() 402 }() 403 404 for { 405 select { 406 case request := <-c.send: 407 if c.writeWait > 0 { 408 if err := c.conn.SetWriteDeadline(time.Now().Add(c.writeWait)); err != nil { 409 c.Logger.Error("failed to set write deadline", "err", err) 410 } 411 } 412 if err := c.conn.WriteJSON(request); err != nil { 413 c.Logger.Error("failed to send request", "err", err) 414 c.reconnectAfter <- err 415 // add request to the backlog, so we don't lose it 416 c.backlog <- request 417 return 418 } 419 case <-ticker.C: 420 if c.writeWait > 0 { 421 if err := c.conn.SetWriteDeadline(time.Now().Add(c.writeWait)); err != nil { 422 c.Logger.Error("failed to set write deadline", "err", err) 423 } 424 } 425 if err := c.conn.WriteMessage(websocket.PingMessage, []byte{}); err != nil { 426 c.Logger.Error("failed to write ping", "err", err) 427 c.reconnectAfter <- err 428 return 429 } 430 c.mtx.Lock() 431 c.sentLastPingAt = time.Now() 432 c.mtx.Unlock() 433 c.Logger.Debug("sent ping") 434 case <-c.readRoutineQuit: 435 return 436 case <-c.Quit(): 437 if err := c.conn.WriteMessage( 438 websocket.CloseMessage, 439 websocket.FormatCloseMessage(websocket.CloseNormalClosure, ""), 440 ); err != nil { 441 c.Logger.Error("failed to write message", "err", err) 442 } 443 return 444 } 445 } 446 } 447 448 // The client ensures that there is at most one reader to a connection by 449 // executing all reads from this goroutine. 450 func (c *WSClient) readRoutine() { 451 defer func() { 452 c.conn.Close() 453 // err != nil { 454 // ignore error; it will trigger in tests 455 // likely because it's closing an already closed connection 456 // } 457 c.wg.Done() 458 }() 459 460 c.conn.SetPongHandler(func(string) error { 461 // gather latency stats 462 c.mtx.RLock() 463 t := c.sentLastPingAt 464 c.mtx.RUnlock() 465 c.PingPongLatencyTimer.UpdateSince(t) 466 467 c.Logger.Debug("got pong") 468 return nil 469 }) 470 471 for { 472 // reset deadline for every message type (control or data) 473 if c.readWait > 0 { 474 if err := c.conn.SetReadDeadline(time.Now().Add(c.readWait)); err != nil { 475 c.Logger.Error("failed to set read deadline", "err", err) 476 } 477 } 478 _, data, err := c.conn.ReadMessage() 479 if err != nil { 480 if !websocket.IsUnexpectedCloseError(err, websocket.CloseNormalClosure) { 481 return 482 } 483 484 c.Logger.Error("failed to read response", "err", err) 485 close(c.readRoutineQuit) 486 c.reconnectAfter <- err 487 return 488 } 489 490 var response types.RPCResponse 491 err = json.Unmarshal(data, &response) 492 if err != nil { 493 c.Logger.Error("failed to parse response", "err", err, "data", string(data)) 494 continue 495 } 496 497 if err = validateResponseID(response.ID); err != nil { 498 c.Logger.Error("error in response ID", "id", response.ID, "err", err) 499 continue 500 } 501 502 // TODO: events resulting from /subscribe do not work with -> 503 // because they are implemented as responses with the subscribe request's 504 // ID. According to the spec, they should be notifications (requests 505 // without IDs). 506 // https://github.com/tendermint/tendermint/issues/2949 507 // c.mtx.Lock() 508 // if _, ok := c.sentIDs[response.ID.(types.JSONRPCIntID)]; !ok { 509 // c.Logger.Error("unsolicited response ID", "id", response.ID, "expected", c.sentIDs) 510 // c.mtx.Unlock() 511 // continue 512 // } 513 // delete(c.sentIDs, response.ID.(types.JSONRPCIntID)) 514 // c.mtx.Unlock() 515 // Combine a non-blocking read on BaseService.Quit with a non-blocking write on ResponsesCh to avoid blocking 516 // c.wg.Wait() in c.Stop(). Note we rely on Quit being closed so that it sends unlimited Quit signals to stop 517 // both readRoutine and writeRoutine 518 519 c.Logger.Info("got response", "id", response.ID, "result", fmt.Sprintf("%X", response.Result)) 520 521 select { 522 case <-c.Quit(): 523 case c.ResponsesCh <- response: 524 } 525 } 526 } 527 528 /////////////////////////////////////////////////////////////////////////////// 529 // Predefined methods 530 531 // Subscribe to a query. Note the server must have a "subscribe" route 532 // defined. 533 func (c *WSClient) Subscribe(ctx context.Context, query string) error { 534 params := map[string]interface{}{"query": query} 535 return c.Call(ctx, "subscribe", params) 536 } 537 538 // Unsubscribe from a query. Note the server must have a "unsubscribe" route 539 // defined. 540 func (c *WSClient) Unsubscribe(ctx context.Context, query string) error { 541 params := map[string]interface{}{"query": query} 542 return c.Call(ctx, "unsubscribe", params) 543 } 544 545 // UnsubscribeAll from all. Note the server must have a "unsubscribe_all" route 546 // defined. 547 func (c *WSClient) UnsubscribeAll(ctx context.Context) error { 548 params := map[string]interface{}{} 549 return c.Call(ctx, "unsubscribe_all", params) 550 }