github.com/adoriasoft/tendermint@v0.34.0-dev1.0.20200722151356-96d84601a75a/rpc/jsonrpc/client/ws_client.go (about)

     1  package client
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"fmt"
     7  	"net"
     8  	"net/http"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/gorilla/websocket"
    13  	metrics "github.com/rcrowley/go-metrics"
    14  
    15  	tmrand "github.com/tendermint/tendermint/libs/rand"
    16  	"github.com/tendermint/tendermint/libs/service"
    17  	tmsync "github.com/tendermint/tendermint/libs/sync"
    18  	types "github.com/tendermint/tendermint/rpc/jsonrpc/types"
    19  )
    20  
    21  const (
    22  	defaultMaxReconnectAttempts = 25
    23  	defaultWriteWait            = 0
    24  	defaultReadWait             = 0
    25  	defaultPingPeriod           = 0
    26  )
    27  
    28  // WSClient is a JSON-RPC client, which uses WebSocket for communication with
    29  // the remote server.
    30  //
    31  // WSClient is safe for concurrent use by multiple goroutines.
    32  type WSClient struct { // nolint: maligned
    33  	conn *websocket.Conn
    34  
    35  	Address  string // IP:PORT or /path/to/socket
    36  	Endpoint string // /websocket/url/endpoint
    37  	Dialer   func(string, string) (net.Conn, error)
    38  
    39  	// Single user facing channel to read RPCResponses from, closed only when the
    40  	// client is being stopped.
    41  	ResponsesCh chan types.RPCResponse
    42  
    43  	// Callback, which will be called each time after successful reconnect.
    44  	onReconnect func()
    45  
    46  	// internal channels
    47  	send            chan types.RPCRequest // user requests
    48  	backlog         chan types.RPCRequest // stores a single user request received during a conn failure
    49  	reconnectAfter  chan error            // reconnect requests
    50  	readRoutineQuit chan struct{}         // a way for readRoutine to close writeRoutine
    51  
    52  	// Maximum reconnect attempts (0 or greater; default: 25).
    53  	maxReconnectAttempts int
    54  
    55  	// Support both ws and wss protocols
    56  	protocol string
    57  
    58  	wg sync.WaitGroup
    59  
    60  	mtx            tmsync.RWMutex
    61  	sentLastPingAt time.Time
    62  	reconnecting   bool
    63  	nextReqID      int
    64  	// sentIDs        map[types.JSONRPCIntID]bool // IDs of the requests currently in flight
    65  
    66  	// Time allowed to write a message to the server. 0 means block until operation succeeds.
    67  	writeWait time.Duration
    68  
    69  	// Time allowed to read the next message from the server. 0 means block until operation succeeds.
    70  	readWait time.Duration
    71  
    72  	// Send pings to server with this period. Must be less than readWait. If 0, no pings will be sent.
    73  	pingPeriod time.Duration
    74  
    75  	service.BaseService
    76  
    77  	// Time between sending a ping and receiving a pong. See
    78  	// https://godoc.org/github.com/rcrowley/go-metrics#Timer.
    79  	PingPongLatencyTimer metrics.Timer
    80  }
    81  
    82  // NewWS returns a new client. See the commentary on the func(*WSClient)
    83  // functions for a detailed description of how to configure ping period and
    84  // pong wait time. The endpoint argument must begin with a `/`.
    85  // An error is returned on invalid remote. The function panics when remote is nil.
    86  func NewWS(remoteAddr, endpoint string, options ...func(*WSClient)) (*WSClient, error) {
    87  	parsedURL, err := newParsedURL(remoteAddr)
    88  	if err != nil {
    89  		return nil, err
    90  	}
    91  	// default to ws protocol, unless wss is explicitly specified
    92  	if parsedURL.Scheme != protoWSS {
    93  		parsedURL.Scheme = protoWS
    94  	}
    95  
    96  	dialFn, err := makeHTTPDialer(remoteAddr)
    97  	if err != nil {
    98  		return nil, err
    99  	}
   100  
   101  	c := &WSClient{
   102  		Address:              parsedURL.GetTrimmedHostWithPath(),
   103  		Dialer:               dialFn,
   104  		Endpoint:             endpoint,
   105  		PingPongLatencyTimer: metrics.NewTimer(),
   106  
   107  		maxReconnectAttempts: defaultMaxReconnectAttempts,
   108  		readWait:             defaultReadWait,
   109  		writeWait:            defaultWriteWait,
   110  		pingPeriod:           defaultPingPeriod,
   111  		protocol:             parsedURL.Scheme,
   112  
   113  		// sentIDs: make(map[types.JSONRPCIntID]bool),
   114  	}
   115  	c.BaseService = *service.NewBaseService(nil, "WSClient", c)
   116  	for _, option := range options {
   117  		option(c)
   118  	}
   119  	return c, nil
   120  }
   121  
   122  // MaxReconnectAttempts sets the maximum number of reconnect attempts before returning an error.
   123  // It should only be used in the constructor and is not Goroutine-safe.
   124  func MaxReconnectAttempts(max int) func(*WSClient) {
   125  	return func(c *WSClient) {
   126  		c.maxReconnectAttempts = max
   127  	}
   128  }
   129  
   130  // ReadWait sets the amount of time to wait before a websocket read times out.
   131  // It should only be used in the constructor and is not Goroutine-safe.
   132  func ReadWait(readWait time.Duration) func(*WSClient) {
   133  	return func(c *WSClient) {
   134  		c.readWait = readWait
   135  	}
   136  }
   137  
   138  // WriteWait sets the amount of time to wait before a websocket write times out.
   139  // It should only be used in the constructor and is not Goroutine-safe.
   140  func WriteWait(writeWait time.Duration) func(*WSClient) {
   141  	return func(c *WSClient) {
   142  		c.writeWait = writeWait
   143  	}
   144  }
   145  
   146  // PingPeriod sets the duration for sending websocket pings.
   147  // It should only be used in the constructor - not Goroutine-safe.
   148  func PingPeriod(pingPeriod time.Duration) func(*WSClient) {
   149  	return func(c *WSClient) {
   150  		c.pingPeriod = pingPeriod
   151  	}
   152  }
   153  
   154  // OnReconnect sets the callback, which will be called every time after
   155  // successful reconnect.
   156  func OnReconnect(cb func()) func(*WSClient) {
   157  	return func(c *WSClient) {
   158  		c.onReconnect = cb
   159  	}
   160  }
   161  
   162  // String returns WS client full address.
   163  func (c *WSClient) String() string {
   164  	return fmt.Sprintf("WSClient{%s (%s)}", c.Address, c.Endpoint)
   165  }
   166  
   167  // OnStart implements service.Service by dialing a server and creating read and
   168  // write routines.
   169  func (c *WSClient) OnStart() error {
   170  	err := c.dial()
   171  	if err != nil {
   172  		return err
   173  	}
   174  
   175  	c.ResponsesCh = make(chan types.RPCResponse)
   176  
   177  	c.send = make(chan types.RPCRequest)
   178  	// 1 additional error may come from the read/write
   179  	// goroutine depending on which failed first.
   180  	c.reconnectAfter = make(chan error, 1)
   181  	// capacity for 1 request. a user won't be able to send more because the send
   182  	// channel is unbuffered.
   183  	c.backlog = make(chan types.RPCRequest, 1)
   184  
   185  	c.startReadWriteRoutines()
   186  	go c.reconnectRoutine()
   187  
   188  	return nil
   189  }
   190  
   191  // Stop overrides service.Service#Stop. There is no other way to wait until Quit
   192  // channel is closed.
   193  func (c *WSClient) Stop() error {
   194  	if err := c.BaseService.Stop(); err != nil {
   195  		return err
   196  	}
   197  	// only close user-facing channels when we can't write to them
   198  	c.wg.Wait()
   199  	close(c.ResponsesCh)
   200  
   201  	return nil
   202  }
   203  
   204  // IsReconnecting returns true if the client is reconnecting right now.
   205  func (c *WSClient) IsReconnecting() bool {
   206  	c.mtx.RLock()
   207  	defer c.mtx.RUnlock()
   208  	return c.reconnecting
   209  }
   210  
   211  // IsActive returns true if the client is running and not reconnecting.
   212  func (c *WSClient) IsActive() bool {
   213  	return c.IsRunning() && !c.IsReconnecting()
   214  }
   215  
   216  // Send the given RPC request to the server. Results will be available on
   217  // ResponsesCh, errors, if any, on ErrorsCh. Will block until send succeeds or
   218  // ctx.Done is closed.
   219  func (c *WSClient) Send(ctx context.Context, request types.RPCRequest) error {
   220  	select {
   221  	case c.send <- request:
   222  		c.Logger.Info("sent a request", "req", request)
   223  		// c.mtx.Lock()
   224  		// c.sentIDs[request.ID.(types.JSONRPCIntID)] = true
   225  		// c.mtx.Unlock()
   226  		return nil
   227  	case <-ctx.Done():
   228  		return ctx.Err()
   229  	}
   230  }
   231  
   232  // Call enqueues a call request onto the Send queue. Requests are JSON encoded.
   233  func (c *WSClient) Call(ctx context.Context, method string, params map[string]interface{}) error {
   234  	request, err := types.MapToRequest(c.nextRequestID(), method, params)
   235  	if err != nil {
   236  		return err
   237  	}
   238  	return c.Send(ctx, request)
   239  }
   240  
   241  // CallWithArrayParams enqueues a call request onto the Send queue. Params are
   242  // in a form of array (e.g. []interface{}{"abcd"}). Requests are JSON encoded.
   243  func (c *WSClient) CallWithArrayParams(ctx context.Context, method string, params []interface{}) error {
   244  	request, err := types.ArrayToRequest(c.nextRequestID(), method, params)
   245  	if err != nil {
   246  		return err
   247  	}
   248  	return c.Send(ctx, request)
   249  }
   250  
   251  ///////////////////////////////////////////////////////////////////////////////
   252  // Private methods
   253  
   254  func (c *WSClient) nextRequestID() types.JSONRPCIntID {
   255  	c.mtx.Lock()
   256  	id := c.nextReqID
   257  	c.nextReqID++
   258  	c.mtx.Unlock()
   259  	return types.JSONRPCIntID(id)
   260  }
   261  
   262  func (c *WSClient) dial() error {
   263  	dialer := &websocket.Dialer{
   264  		NetDial: c.Dialer,
   265  		Proxy:   http.ProxyFromEnvironment,
   266  	}
   267  	rHeader := http.Header{}
   268  	conn, _, err := dialer.Dial(c.protocol+"://"+c.Address+c.Endpoint, rHeader) // nolint:bodyclose
   269  	if err != nil {
   270  		return err
   271  	}
   272  	c.conn = conn
   273  	return nil
   274  }
   275  
   276  // reconnect tries to redial up to maxReconnectAttempts with exponential
   277  // backoff.
   278  func (c *WSClient) reconnect() error {
   279  	attempt := 0
   280  
   281  	c.mtx.Lock()
   282  	c.reconnecting = true
   283  	c.mtx.Unlock()
   284  	defer func() {
   285  		c.mtx.Lock()
   286  		c.reconnecting = false
   287  		c.mtx.Unlock()
   288  	}()
   289  
   290  	for {
   291  		jitter := time.Duration(tmrand.Float64() * float64(time.Second)) // 1s == (1e9 ns)
   292  		backoffDuration := jitter + ((1 << uint(attempt)) * time.Second)
   293  
   294  		c.Logger.Info("reconnecting", "attempt", attempt+1, "backoff_duration", backoffDuration)
   295  		time.Sleep(backoffDuration)
   296  
   297  		err := c.dial()
   298  		if err != nil {
   299  			c.Logger.Error("failed to redial", "err", err)
   300  		} else {
   301  			c.Logger.Info("reconnected")
   302  			if c.onReconnect != nil {
   303  				go c.onReconnect()
   304  			}
   305  			return nil
   306  		}
   307  
   308  		attempt++
   309  
   310  		if attempt > c.maxReconnectAttempts {
   311  			return fmt.Errorf("reached maximum reconnect attempts: %w", err)
   312  		}
   313  	}
   314  }
   315  
   316  func (c *WSClient) startReadWriteRoutines() {
   317  	c.wg.Add(2)
   318  	c.readRoutineQuit = make(chan struct{})
   319  	go c.readRoutine()
   320  	go c.writeRoutine()
   321  }
   322  
   323  func (c *WSClient) processBacklog() error {
   324  	select {
   325  	case request := <-c.backlog:
   326  		if c.writeWait > 0 {
   327  			if err := c.conn.SetWriteDeadline(time.Now().Add(c.writeWait)); err != nil {
   328  				c.Logger.Error("failed to set write deadline", "err", err)
   329  			}
   330  		}
   331  		if err := c.conn.WriteJSON(request); err != nil {
   332  			c.Logger.Error("failed to resend request", "err", err)
   333  			c.reconnectAfter <- err
   334  			// requeue request
   335  			c.backlog <- request
   336  			return err
   337  		}
   338  		c.Logger.Info("resend a request", "req", request)
   339  	default:
   340  	}
   341  	return nil
   342  }
   343  
   344  func (c *WSClient) reconnectRoutine() {
   345  	for {
   346  		select {
   347  		case originalError := <-c.reconnectAfter:
   348  			// wait until writeRoutine and readRoutine finish
   349  			c.wg.Wait()
   350  			if err := c.reconnect(); err != nil {
   351  				c.Logger.Error("failed to reconnect", "err", err, "original_err", originalError)
   352  				c.Stop()
   353  				return
   354  			}
   355  			// drain reconnectAfter
   356  		LOOP:
   357  			for {
   358  				select {
   359  				case <-c.reconnectAfter:
   360  				default:
   361  					break LOOP
   362  				}
   363  			}
   364  			err := c.processBacklog()
   365  			if err == nil {
   366  				c.startReadWriteRoutines()
   367  			}
   368  
   369  		case <-c.Quit():
   370  			return
   371  		}
   372  	}
   373  }
   374  
   375  // The client ensures that there is at most one writer to a connection by
   376  // executing all writes from this goroutine.
   377  func (c *WSClient) writeRoutine() {
   378  	var ticker *time.Ticker
   379  	if c.pingPeriod > 0 {
   380  		// ticker with a predefined period
   381  		ticker = time.NewTicker(c.pingPeriod)
   382  	} else {
   383  		// ticker that never fires
   384  		ticker = &time.Ticker{C: make(<-chan time.Time)}
   385  	}
   386  
   387  	defer func() {
   388  		ticker.Stop()
   389  		c.conn.Close()
   390  		// err != nil {
   391  		// ignore error; it will trigger in tests
   392  		// likely because it's closing an already closed connection
   393  		// }
   394  		c.wg.Done()
   395  	}()
   396  
   397  	for {
   398  		select {
   399  		case request := <-c.send:
   400  			if c.writeWait > 0 {
   401  				if err := c.conn.SetWriteDeadline(time.Now().Add(c.writeWait)); err != nil {
   402  					c.Logger.Error("failed to set write deadline", "err", err)
   403  				}
   404  			}
   405  			if err := c.conn.WriteJSON(request); err != nil {
   406  				c.Logger.Error("failed to send request", "err", err)
   407  				c.reconnectAfter <- err
   408  				// add request to the backlog, so we don't lose it
   409  				c.backlog <- request
   410  				return
   411  			}
   412  		case <-ticker.C:
   413  			if c.writeWait > 0 {
   414  				if err := c.conn.SetWriteDeadline(time.Now().Add(c.writeWait)); err != nil {
   415  					c.Logger.Error("failed to set write deadline", "err", err)
   416  				}
   417  			}
   418  			if err := c.conn.WriteMessage(websocket.PingMessage, []byte{}); err != nil {
   419  				c.Logger.Error("failed to write ping", "err", err)
   420  				c.reconnectAfter <- err
   421  				return
   422  			}
   423  			c.mtx.Lock()
   424  			c.sentLastPingAt = time.Now()
   425  			c.mtx.Unlock()
   426  			c.Logger.Debug("sent ping")
   427  		case <-c.readRoutineQuit:
   428  			return
   429  		case <-c.Quit():
   430  			if err := c.conn.WriteMessage(
   431  				websocket.CloseMessage,
   432  				websocket.FormatCloseMessage(websocket.CloseNormalClosure, ""),
   433  			); err != nil {
   434  				c.Logger.Error("failed to write message", "err", err)
   435  			}
   436  			return
   437  		}
   438  	}
   439  }
   440  
   441  // The client ensures that there is at most one reader to a connection by
   442  // executing all reads from this goroutine.
   443  func (c *WSClient) readRoutine() {
   444  	defer func() {
   445  		c.conn.Close()
   446  		// err != nil {
   447  		// ignore error; it will trigger in tests
   448  		// likely because it's closing an already closed connection
   449  		// }
   450  		c.wg.Done()
   451  	}()
   452  
   453  	c.conn.SetPongHandler(func(string) error {
   454  		// gather latency stats
   455  		c.mtx.RLock()
   456  		t := c.sentLastPingAt
   457  		c.mtx.RUnlock()
   458  		c.PingPongLatencyTimer.UpdateSince(t)
   459  
   460  		c.Logger.Debug("got pong")
   461  		return nil
   462  	})
   463  
   464  	for {
   465  		// reset deadline for every message type (control or data)
   466  		if c.readWait > 0 {
   467  			if err := c.conn.SetReadDeadline(time.Now().Add(c.readWait)); err != nil {
   468  				c.Logger.Error("failed to set read deadline", "err", err)
   469  			}
   470  		}
   471  		_, data, err := c.conn.ReadMessage()
   472  		if err != nil {
   473  			if !websocket.IsUnexpectedCloseError(err, websocket.CloseNormalClosure) {
   474  				return
   475  			}
   476  
   477  			c.Logger.Error("failed to read response", "err", err)
   478  			close(c.readRoutineQuit)
   479  			c.reconnectAfter <- err
   480  			return
   481  		}
   482  
   483  		var response types.RPCResponse
   484  		err = json.Unmarshal(data, &response)
   485  		if err != nil {
   486  			c.Logger.Error("failed to parse response", "err", err, "data", string(data))
   487  			continue
   488  		}
   489  
   490  		if err = validateResponseID(response.ID); err != nil {
   491  			c.Logger.Error("error in response ID", "id", response.ID, "err", err)
   492  			continue
   493  		}
   494  
   495  		// TODO: events resulting from /subscribe do not work with ->
   496  		// because they are implemented as responses with the subscribe request's
   497  		// ID. According to the spec, they should be notifications (requests
   498  		// without IDs).
   499  		// https://github.com/tendermint/tendermint/issues/2949
   500  		// c.mtx.Lock()
   501  		// if _, ok := c.sentIDs[response.ID.(types.JSONRPCIntID)]; !ok {
   502  		// 	c.Logger.Error("unsolicited response ID", "id", response.ID, "expected", c.sentIDs)
   503  		// 	c.mtx.Unlock()
   504  		// 	continue
   505  		// }
   506  		// delete(c.sentIDs, response.ID.(types.JSONRPCIntID))
   507  		// c.mtx.Unlock()
   508  		// Combine a non-blocking read on BaseService.Quit with a non-blocking write on ResponsesCh to avoid blocking
   509  		// c.wg.Wait() in c.Stop(). Note we rely on Quit being closed so that it sends unlimited Quit signals to stop
   510  		// both readRoutine and writeRoutine
   511  
   512  		c.Logger.Info("got response", "id", response.ID, "result", fmt.Sprintf("%X", response.Result))
   513  
   514  		select {
   515  		case <-c.Quit():
   516  		case c.ResponsesCh <- response:
   517  		}
   518  	}
   519  }
   520  
   521  ///////////////////////////////////////////////////////////////////////////////
   522  // Predefined methods
   523  
   524  // Subscribe to a query. Note the server must have a "subscribe" route
   525  // defined.
   526  func (c *WSClient) Subscribe(ctx context.Context, query string) error {
   527  	params := map[string]interface{}{"query": query}
   528  	return c.Call(ctx, "subscribe", params)
   529  }
   530  
   531  // Unsubscribe from a query. Note the server must have a "unsubscribe" route
   532  // defined.
   533  func (c *WSClient) Unsubscribe(ctx context.Context, query string) error {
   534  	params := map[string]interface{}{"query": query}
   535  	return c.Call(ctx, "unsubscribe", params)
   536  }
   537  
   538  // UnsubscribeAll from all. Note the server must have a "unsubscribe_all" route
   539  // defined.
   540  func (c *WSClient) UnsubscribeAll(ctx context.Context) error {
   541  	params := map[string]interface{}{}
   542  	return c.Call(ctx, "unsubscribe_all", params)
   543  }