github.com/ari-anchor/sei-tendermint@v0.0.0-20230519144642-dc826b7b56bb/rpc/jsonrpc/client/ws_client.go (about)

     1  package client
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"fmt"
     7  	mrand "math/rand"
     8  	"net"
     9  	"net/http"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/gorilla/websocket"
    14  
    15  	"github.com/ari-anchor/sei-tendermint/libs/log"
    16  	rpctypes "github.com/ari-anchor/sei-tendermint/rpc/jsonrpc/types"
    17  )
    18  
    19  // wsOptions carries optional settings for a websocket connection.
    20  type wsOptions struct {
    21  	MaxReconnectAttempts uint          // maximum attempts to reconnect
    22  	ReadWait             time.Duration // deadline for any read op
    23  	WriteWait            time.Duration // deadline for any write op
    24  	PingPeriod           time.Duration // frequency with which pings are sent
    25  }
    26  
    27  // defaultWSOptions are the default websocket connection settings.
    28  var defaultWSOptions = wsOptions{
    29  	MaxReconnectAttempts: 10, // first: 2 sec, last: 17 min.
    30  	WriteWait:            10 * time.Second,
    31  	ReadWait:             0,
    32  	PingPeriod:           0,
    33  }
    34  
    35  // WSClient is a JSON-RPC client, which uses WebSocket for communication with
    36  // the remote server.
    37  //
    38  // WSClient is safe for concurrent use by multiple goroutines.
    39  type WSClient struct { // nolint: maligned
    40  	Logger log.Logger
    41  	conn   *websocket.Conn
    42  
    43  	Address  string // IP:PORT or /path/to/socket
    44  	Endpoint string // /websocket/url/endpoint
    45  	Dialer   func(string, string) (net.Conn, error)
    46  
    47  	// Single user facing channel to read RPCResponses from, closed only when the
    48  	// client is being stopped.
    49  	ResponsesCh chan rpctypes.RPCResponse
    50  
    51  	// Callback, which will be called each time after successful reconnect.
    52  	onReconnect func()
    53  
    54  	// internal channels
    55  	send            chan rpctypes.RPCRequest // user requests
    56  	backlog         chan rpctypes.RPCRequest // stores a single user request received during a conn failure
    57  	reconnectAfter  chan error               // reconnect requests
    58  	readRoutineQuit chan struct{}            // a way for readRoutine to close writeRoutine
    59  
    60  	// Maximum reconnect attempts (0 or greater; default: 25).
    61  	maxReconnectAttempts uint
    62  
    63  	// Support both ws and wss protocols
    64  	protocol string
    65  
    66  	wg sync.WaitGroup
    67  
    68  	mtx          sync.RWMutex
    69  	reconnecting bool
    70  	nextReqID    int
    71  	// sentIDs        map[types.JSONRPCIntID]bool // IDs of the requests currently in flight
    72  
    73  	// Time allowed to write a message to the server. 0 means block until operation succeeds.
    74  	writeWait time.Duration
    75  
    76  	// Time allowed to read the next message from the server. 0 means block until operation succeeds.
    77  	readWait time.Duration
    78  
    79  	// Send pings to server with this period. Must be less than readWait. If 0, no pings will be sent.
    80  	pingPeriod time.Duration
    81  }
    82  
    83  // NewWS returns a new client with default options. The endpoint argument must
    84  // begin with a `/`. An error is returned on invalid remote.
    85  func NewWS(remoteAddr, endpoint string) (*WSClient, error) {
    86  	opts := defaultWSOptions
    87  	parsedURL, err := newParsedURL(remoteAddr)
    88  	if err != nil {
    89  		return nil, err
    90  	}
    91  	// default to ws protocol, unless wss or https is specified
    92  	if parsedURL.Scheme == protoHTTPS {
    93  		parsedURL.Scheme = protoWSS
    94  	} else if parsedURL.Scheme != protoWSS {
    95  		parsedURL.Scheme = protoWS
    96  	}
    97  
    98  	dialFn, err := makeHTTPDialer(remoteAddr)
    99  	if err != nil {
   100  		return nil, err
   101  	}
   102  
   103  	c := &WSClient{
   104  		Logger:               log.NewNopLogger(),
   105  		Address:              parsedURL.GetTrimmedHostWithPath(),
   106  		Dialer:               dialFn,
   107  		Endpoint:             endpoint,
   108  		maxReconnectAttempts: opts.MaxReconnectAttempts,
   109  		readWait:             opts.ReadWait,
   110  		writeWait:            opts.WriteWait,
   111  		pingPeriod:           opts.PingPeriod,
   112  		protocol:             parsedURL.Scheme,
   113  
   114  		// sentIDs: make(map[types.JSONRPCIntID]bool),
   115  	}
   116  	return c, nil
   117  }
   118  
   119  // OnReconnect sets the callback, which will be called every time after
   120  // successful reconnect.
   121  // Could only be set before Start.
   122  func (c *WSClient) OnReconnect(cb func()) {
   123  	c.onReconnect = cb
   124  }
   125  
   126  // String returns WS client full address.
   127  func (c *WSClient) String() string {
   128  	return fmt.Sprintf("WSClient{%s (%s)}", c.Address, c.Endpoint)
   129  }
   130  
   131  // Start dials the specified service address and starts the I/O routines.  The
   132  // service routines run until ctx terminates. To wait for the client to exit
   133  // after ctx ends, call Stop.
   134  func (c *WSClient) Start(ctx context.Context) error {
   135  	if err := c.dial(); err != nil {
   136  		return err
   137  	}
   138  
   139  	c.ResponsesCh = make(chan rpctypes.RPCResponse)
   140  
   141  	c.send = make(chan rpctypes.RPCRequest)
   142  	// 1 additional error may come from the read/write
   143  	// goroutine depending on which failed first.
   144  	c.reconnectAfter = make(chan error, 1)
   145  	// capacity for 1 request. a user won't be able to send more because the send
   146  	// channel is unbuffered.
   147  	c.backlog = make(chan rpctypes.RPCRequest, 1)
   148  
   149  	c.startReadWriteRoutines(ctx)
   150  	go c.reconnectRoutine(ctx)
   151  
   152  	return nil
   153  }
   154  
   155  // Stop blocks until the client is shut down and returns nil.
   156  //
   157  // TODO(creachadair): This method exists for compatibility with the original
   158  // service plumbing. Give it a better name (e.g., Wait).
   159  func (c *WSClient) Stop() error {
   160  	// only close user-facing channels when we can't write to them
   161  	c.wg.Wait()
   162  	close(c.ResponsesCh)
   163  	return nil
   164  }
   165  
   166  // IsReconnecting returns true if the client is reconnecting right now.
   167  func (c *WSClient) IsReconnecting() bool {
   168  	c.mtx.RLock()
   169  	defer c.mtx.RUnlock()
   170  	return c.reconnecting
   171  }
   172  
   173  // Send the given RPC request to the server. Results will be available on
   174  // ResponsesCh, errors, if any, on ErrorsCh. Will block until send succeeds or
   175  // ctx.Done is closed.
   176  func (c *WSClient) Send(ctx context.Context, request rpctypes.RPCRequest) error {
   177  	select {
   178  	case c.send <- request:
   179  		c.Logger.Info("sent a request", "req", request)
   180  		// c.mtx.Lock()
   181  		// c.sentIDs[request.ID.(types.JSONRPCIntID)] = true
   182  		// c.mtx.Unlock()
   183  		return nil
   184  	case <-ctx.Done():
   185  		return ctx.Err()
   186  	}
   187  }
   188  
   189  // Call enqueues a call request onto the Send queue. Requests are JSON encoded.
   190  func (c *WSClient) Call(ctx context.Context, method string, params map[string]interface{}) error {
   191  	req := rpctypes.NewRequest(c.nextRequestID())
   192  	if err := req.SetMethodAndParams(method, params); err != nil {
   193  		return err
   194  	}
   195  	return c.Send(ctx, req)
   196  }
   197  
   198  // Private methods
   199  
   200  func (c *WSClient) nextRequestID() int {
   201  	c.mtx.Lock()
   202  	defer c.mtx.Unlock()
   203  	id := c.nextReqID
   204  	c.nextReqID++
   205  	return id
   206  }
   207  
   208  func (c *WSClient) dial() error {
   209  	dialer := &websocket.Dialer{
   210  		NetDial: c.Dialer,
   211  		Proxy:   http.ProxyFromEnvironment,
   212  	}
   213  	rHeader := http.Header{}
   214  	conn, _, err := dialer.Dial(c.protocol+"://"+c.Address+c.Endpoint, rHeader) // nolint:bodyclose
   215  	if err != nil {
   216  		return err
   217  	}
   218  	c.conn = conn
   219  	return nil
   220  }
   221  
   222  // reconnect tries to redial up to maxReconnectAttempts with exponential
   223  // backoff.
   224  func (c *WSClient) reconnect(ctx context.Context) error {
   225  	attempt := uint(0)
   226  
   227  	c.mtx.Lock()
   228  	c.reconnecting = true
   229  	c.mtx.Unlock()
   230  	defer func() {
   231  		c.mtx.Lock()
   232  		c.reconnecting = false
   233  		c.mtx.Unlock()
   234  	}()
   235  
   236  	timer := time.NewTimer(0)
   237  	defer timer.Stop()
   238  
   239  	for {
   240  		// nolint:gosec // G404: Use of weak random number generator
   241  		jitter := time.Duration(mrand.Float64() * float64(time.Second)) // 1s == (1e9 ns)
   242  		backoffDuration := jitter + ((1 << attempt) * time.Second)
   243  
   244  		c.Logger.Info("reconnecting", "attempt", attempt+1, "backoff_duration", backoffDuration)
   245  		timer.Reset(backoffDuration)
   246  		select {
   247  		case <-ctx.Done():
   248  			return nil
   249  		case <-timer.C:
   250  		}
   251  
   252  		err := c.dial()
   253  		if err != nil {
   254  			c.Logger.Error("failed to redial", "err", err)
   255  		} else {
   256  			c.Logger.Info("reconnected")
   257  			if c.onReconnect != nil {
   258  				go c.onReconnect()
   259  			}
   260  			return nil
   261  		}
   262  
   263  		attempt++
   264  
   265  		if attempt > c.maxReconnectAttempts {
   266  			return fmt.Errorf("reached maximum reconnect attempts: %w", err)
   267  		}
   268  	}
   269  }
   270  
   271  func (c *WSClient) startReadWriteRoutines(ctx context.Context) {
   272  	c.wg.Add(2)
   273  	c.readRoutineQuit = make(chan struct{})
   274  	go c.readRoutine(ctx)
   275  	go c.writeRoutine(ctx)
   276  }
   277  
   278  func (c *WSClient) processBacklog() error {
   279  	select {
   280  	case request := <-c.backlog:
   281  		if c.writeWait > 0 {
   282  			if err := c.conn.SetWriteDeadline(time.Now().Add(c.writeWait)); err != nil {
   283  				c.Logger.Error("failed to set write deadline", "err", err)
   284  			}
   285  		}
   286  		if err := c.conn.WriteJSON(request); err != nil {
   287  			c.Logger.Error("failed to resend request", "err", err)
   288  			c.reconnectAfter <- err
   289  			// requeue request
   290  			c.backlog <- request
   291  			return err
   292  		}
   293  		c.Logger.Info("resend a request", "req", request)
   294  	default:
   295  	}
   296  	return nil
   297  }
   298  
   299  func (c *WSClient) reconnectRoutine(ctx context.Context) {
   300  	for {
   301  		select {
   302  		case <-ctx.Done():
   303  			return
   304  		case originalError := <-c.reconnectAfter:
   305  			// wait until writeRoutine and readRoutine finish
   306  			c.wg.Wait()
   307  			if err := c.reconnect(ctx); err != nil {
   308  				c.Logger.Error("failed to reconnect", "err", err, "original_err", originalError)
   309  				if err = c.Stop(); err != nil {
   310  					c.Logger.Error("failed to stop conn", "error", err)
   311  				}
   312  
   313  				return
   314  			}
   315  			// drain reconnectAfter
   316  		LOOP:
   317  			for {
   318  				select {
   319  				case <-ctx.Done():
   320  					return
   321  				case <-c.reconnectAfter:
   322  				default:
   323  					break LOOP
   324  				}
   325  			}
   326  			err := c.processBacklog()
   327  			if err == nil {
   328  				c.startReadWriteRoutines(ctx)
   329  			}
   330  		}
   331  	}
   332  }
   333  
   334  // The client ensures that there is at most one writer to a connection by
   335  // executing all writes from this goroutine.
   336  func (c *WSClient) writeRoutine(ctx context.Context) {
   337  	var ticker *time.Ticker
   338  	if c.pingPeriod > 0 {
   339  		// ticker with a predefined period
   340  		ticker = time.NewTicker(c.pingPeriod)
   341  	} else {
   342  		// ticker that never fires
   343  		ticker = &time.Ticker{C: make(<-chan time.Time)}
   344  	}
   345  
   346  	defer func() {
   347  		ticker.Stop()
   348  		c.conn.Close()
   349  		c.wg.Done()
   350  	}()
   351  
   352  	for {
   353  		select {
   354  		case request := <-c.send:
   355  			if c.writeWait > 0 {
   356  				if err := c.conn.SetWriteDeadline(time.Now().Add(c.writeWait)); err != nil {
   357  					c.Logger.Error("failed to set write deadline", "err", err)
   358  				}
   359  			}
   360  			if err := c.conn.WriteJSON(request); err != nil {
   361  				c.Logger.Error("failed to send request", "err", err)
   362  				c.reconnectAfter <- err
   363  				// add request to the backlog, so we don't lose it
   364  				c.backlog <- request
   365  				return
   366  			}
   367  		case <-ticker.C:
   368  			if c.writeWait > 0 {
   369  				if err := c.conn.SetWriteDeadline(time.Now().Add(c.writeWait)); err != nil {
   370  					c.Logger.Error("failed to set write deadline", "err", err)
   371  				}
   372  			}
   373  			if err := c.conn.WriteMessage(websocket.PingMessage, []byte{}); err != nil {
   374  				c.Logger.Error("failed to write ping", "err", err)
   375  				c.reconnectAfter <- err
   376  				return
   377  			}
   378  		case <-c.readRoutineQuit:
   379  			return
   380  		case <-ctx.Done():
   381  			if err := c.conn.WriteMessage(
   382  				websocket.CloseMessage,
   383  				websocket.FormatCloseMessage(websocket.CloseNormalClosure, ""),
   384  			); err != nil {
   385  				c.Logger.Error("failed to write message", "err", err)
   386  			}
   387  			return
   388  		}
   389  	}
   390  }
   391  
   392  // The client ensures that there is at most one reader to a connection by
   393  // executing all reads from this goroutine.
   394  func (c *WSClient) readRoutine(ctx context.Context) {
   395  	defer func() {
   396  		c.conn.Close()
   397  		c.wg.Done()
   398  	}()
   399  
   400  	for {
   401  		// reset deadline for every message type (control or data)
   402  		if c.readWait > 0 {
   403  			if err := c.conn.SetReadDeadline(time.Now().Add(c.readWait)); err != nil {
   404  				c.Logger.Error("failed to set read deadline", "err", err)
   405  			}
   406  		}
   407  		_, data, err := c.conn.ReadMessage()
   408  		if err != nil {
   409  			if !websocket.IsUnexpectedCloseError(err, websocket.CloseNormalClosure) {
   410  				return
   411  			}
   412  
   413  			c.Logger.Error("failed to read response", "err", err)
   414  			close(c.readRoutineQuit)
   415  			c.reconnectAfter <- err
   416  			return
   417  		}
   418  
   419  		var response rpctypes.RPCResponse
   420  		err = json.Unmarshal(data, &response)
   421  		if err != nil {
   422  			c.Logger.Error("failed to parse response", "err", err, "data", string(data))
   423  			continue
   424  		}
   425  
   426  		// TODO: events resulting from /subscribe do not work with ->
   427  		// because they are implemented as responses with the subscribe request's
   428  		// ID. According to the spec, they should be notifications (requests
   429  		// without IDs).
   430  		// https://github.com/ari-anchor/sei-tendermint/issues/2949
   431  		//
   432  		// Combine a non-blocking read on BaseService.Quit with a non-blocking write on ResponsesCh to avoid blocking
   433  		// c.wg.Wait() in c.Stop(). Note we rely on Quit being closed so that it sends unlimited Quit signals to stop
   434  		// both readRoutine and writeRoutine
   435  
   436  		c.Logger.Info("got response", "id", response.ID, "result", response.Result)
   437  
   438  		select {
   439  		case <-ctx.Done():
   440  			return
   441  		case c.ResponsesCh <- response:
   442  		}
   443  	}
   444  }
   445  
   446  // Predefined methods
   447  
   448  // Subscribe to a query. Note the server must have a "subscribe" route
   449  // defined.
   450  func (c *WSClient) Subscribe(ctx context.Context, query string) error {
   451  	params := map[string]interface{}{"query": query}
   452  	return c.Call(ctx, "subscribe", params)
   453  }
   454  
   455  // Unsubscribe from a query. Note the server must have a "unsubscribe" route
   456  // defined.
   457  func (c *WSClient) Unsubscribe(ctx context.Context, query string) error {
   458  	params := map[string]interface{}{"query": query}
   459  	return c.Call(ctx, "unsubscribe", params)
   460  }
   461  
   462  // UnsubscribeAll from all. Note the server must have a "unsubscribe_all" route
   463  // defined.
   464  func (c *WSClient) UnsubscribeAll(ctx context.Context) error {
   465  	params := map[string]interface{}{}
   466  	return c.Call(ctx, "unsubscribe_all", params)
   467  }