github.com/matrixorigin/matrixone@v1.2.0/pkg/common/morpc/backend.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package morpc
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"math"
    21  	"net"
    22  	"runtime"
    23  	"sync"
    24  	"sync/atomic"
    25  	"time"
    26  
    27  	"github.com/fagongzi/goetty/v2"
    28  	"github.com/google/uuid"
    29  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    30  	"github.com/matrixorigin/matrixone/pkg/common/moprobe"
    31  	"github.com/matrixorigin/matrixone/pkg/common/stopper"
    32  	"github.com/matrixorigin/matrixone/pkg/logutil"
    33  	"github.com/matrixorigin/matrixone/pkg/util/errutil"
    34  	"go.uber.org/zap"
    35  )
    36  
    37  var (
    38  	stateRunning = int32(0)
    39  	stateStopped = int32(1)
    40  
    41  	backendClosed  = moerr.NewBackendClosedNoCtx()
    42  	messageSkipped = moerr.NewInvalidStateNoCtx("request is skipped")
    43  )
    44  
    45  // WithBackendLogger set the backend logger
    46  func WithBackendLogger(logger *zap.Logger) BackendOption {
    47  	return func(rb *remoteBackend) {
    48  		rb.logger = logger
    49  	}
    50  }
    51  
    52  // WithBackendBufferSize set the buffer size of the wait send chan.
    53  // Default is 1024.
    54  func WithBackendBufferSize(size int) BackendOption {
    55  	return func(rb *remoteBackend) {
    56  		rb.options.bufferSize = size
    57  	}
    58  }
    59  
    60  // WithBackendBusyBufferSize if len(writeC) >= size, backend is busy.
    61  // Default is 3/4 buffer size.
    62  func WithBackendBusyBufferSize(size int) BackendOption {
    63  	return func(rb *remoteBackend) {
    64  		rb.options.busySize = size
    65  	}
    66  }
    67  
    68  // WithBackendFilter set send filter func. Input ready to send futures, output
    69  // is really need to be send futures.
    70  func WithBackendFilter(filter func(Message, string) bool) BackendOption {
    71  	return func(rb *remoteBackend) {
    72  		rb.options.filter = filter
    73  	}
    74  }
    75  
    76  // WithBackendBatchSendSize set the maximum number of messages to be sent together
    77  // at each batch. Default is 8.
    78  func WithBackendBatchSendSize(size int) BackendOption {
    79  	return func(rb *remoteBackend) {
    80  		rb.options.batchSendSize = size
    81  	}
    82  }
    83  
    84  // WithBackendConnectTimeout set the timeout for connect to remote. Default 10s.
    85  func WithBackendConnectTimeout(timeout time.Duration) BackendOption {
    86  	return func(rb *remoteBackend) {
    87  		rb.options.connectTimeout = timeout
    88  	}
    89  }
    90  
    91  // WithBackendHasPayloadResponse has payload response means read a response that hold
    92  // a slice of data in the read buffer to avoid data copy.
    93  func WithBackendHasPayloadResponse() BackendOption {
    94  	return func(rb *remoteBackend) {
    95  		rb.options.hasPayloadResponse = true
    96  	}
    97  }
    98  
    99  // WithBackendStreamBufferSize set buffer size for stream receive message chan
   100  func WithBackendStreamBufferSize(value int) BackendOption {
   101  	return func(rb *remoteBackend) {
   102  		rb.options.streamBufferSize = value
   103  	}
   104  }
   105  
   106  // WithBackendGoettyOptions set goetty connection options. e.g. set read/write buffer
   107  // size, adjust net.Conn attribute etc.
   108  func WithBackendGoettyOptions(options ...goetty.Option) BackendOption {
   109  	return func(rb *remoteBackend) {
   110  		rb.options.goettyOptions = options
   111  	}
   112  }
   113  
   114  // WithBackendReadTimeout set read timeout for read loop.
   115  func WithBackendReadTimeout(value time.Duration) BackendOption {
   116  	return func(rb *remoteBackend) {
   117  		rb.options.readTimeout = value
   118  	}
   119  }
   120  
   121  // WithBackendMetrics setup backend metrics
   122  func WithBackendMetrics(metrics *metrics) BackendOption {
   123  	return func(rb *remoteBackend) {
   124  		rb.metrics = metrics
   125  	}
   126  }
   127  
   128  // WithBackendFreeOrphansResponse setup free orphans response func
   129  func WithBackendFreeOrphansResponse(value func(Message)) BackendOption {
   130  	return func(rb *remoteBackend) {
   131  		rb.options.freeResponse = value
   132  	}
   133  }
   134  
   135  // WithDisconnectAfterRead used for testing. Close the connection
   136  // after read N messages.
   137  func WithDisconnectAfterRead(n int) BackendOption {
   138  	return func(rb *remoteBackend) {
   139  		rb.options.disconnectAfterRead = n
   140  	}
   141  }
   142  
   143  type remoteBackend struct {
   144  	remote       string
   145  	metrics      *metrics
   146  	logger       *zap.Logger
   147  	codec        Codec
   148  	conn         goetty.IOSession
   149  	writeC       chan *Future
   150  	stopWriteC   chan struct{}
   151  	resetConnC   chan struct{}
   152  	stopper      *stopper.Stopper
   153  	readStopper  *stopper.Stopper
   154  	closeOnce    sync.Once
   155  	ctx          context.Context
   156  	cancel       context.CancelFunc
   157  	cancelOnce   sync.Once
   158  	pingTimer    *time.Timer
   159  	lastPingTime time.Time
   160  
   161  	options struct {
   162  		hasPayloadResponse  bool
   163  		goettyOptions       []goetty.Option
   164  		connectTimeout      time.Duration
   165  		bufferSize          int
   166  		busySize            int
   167  		batchSendSize       int
   168  		streamBufferSize    int
   169  		disconnectAfterRead int
   170  		filter              func(msg Message, backendAddr string) bool
   171  		readTimeout         time.Duration
   172  		freeResponse        func(Message)
   173  	}
   174  
   175  	stateMu struct {
   176  		sync.RWMutex
   177  		state          int32
   178  		readLoopActive bool
   179  		locked         bool
   180  	}
   181  
   182  	mu struct {
   183  		sync.RWMutex
   184  		futures       map[uint64]*Future
   185  		activeStreams map[uint64]*stream
   186  	}
   187  
   188  	atomic struct {
   189  		id             uint64
   190  		lastActiveTime atomic.Value //time.Time
   191  	}
   192  
   193  	pool struct {
   194  		streams *sync.Pool
   195  		futures *sync.Pool
   196  	}
   197  }
   198  
   199  // NewRemoteBackend create a goetty connection based backend. This backend will start 2
   200  // goroutine, one for read and one for write. If there is a network error in the underlying
   201  // goetty connection, it will automatically retry until the Future times out.
   202  func NewRemoteBackend(
   203  	remote string,
   204  	codec Codec,
   205  	options ...BackendOption) (Backend, error) {
   206  	rb := &remoteBackend{
   207  		stopper:     stopper.NewStopper(fmt.Sprintf("backend-write-%s", remote)),
   208  		readStopper: stopper.NewStopper(fmt.Sprintf("backend-read-%s", remote)),
   209  		remote:      remote,
   210  		codec:       codec,
   211  		resetConnC:  make(chan struct{}, 1),
   212  		stopWriteC:  make(chan struct{}),
   213  	}
   214  
   215  	for _, opt := range options {
   216  		opt(rb)
   217  	}
   218  	rb.adjust()
   219  	rb.metrics.createCounter.Inc()
   220  
   221  	rb.ctx, rb.cancel = context.WithCancel(context.Background())
   222  	rb.pool.futures = &sync.Pool{
   223  		New: func() interface{} {
   224  			return newFuture(rb.releaseFuture)
   225  		},
   226  	}
   227  	rb.pool.streams = &sync.Pool{
   228  		New: func() any {
   229  			return newStream(
   230  				rb,
   231  				make(chan Message, rb.options.streamBufferSize),
   232  				rb.newFuture,
   233  				rb.doSend,
   234  				rb.removeActiveStream,
   235  				rb.active)
   236  		},
   237  	}
   238  	rb.writeC = make(chan *Future, rb.options.bufferSize)
   239  	rb.mu.futures = make(map[uint64]*Future, rb.options.bufferSize)
   240  	rb.mu.activeStreams = make(map[uint64]*stream, rb.options.bufferSize)
   241  	if rb.options.hasPayloadResponse {
   242  		rb.options.goettyOptions = append(rb.options.goettyOptions,
   243  			goetty.WithSessionDisableAutoResetInBuffer())
   244  	}
   245  	rb.conn = goetty.NewIOSession(rb.options.goettyOptions...)
   246  
   247  	if err := rb.resetConn(); err != nil {
   248  		rb.logger.Error("connect to remote failed")
   249  		return nil, err
   250  	}
   251  	rb.activeReadLoop(false)
   252  
   253  	if err := rb.stopper.RunTask(rb.writeLoop); err != nil {
   254  		return nil, err
   255  	}
   256  
   257  	rb.active()
   258  	return rb, nil
   259  }
   260  
   261  func (rb *remoteBackend) adjust() {
   262  	if rb.options.bufferSize == 0 {
   263  		rb.options.bufferSize = 1024
   264  	}
   265  	if rb.options.busySize == 0 {
   266  		rb.options.busySize = rb.options.bufferSize * 3 / 4
   267  		if rb.options.busySize == 0 {
   268  			rb.options.busySize = 1
   269  		}
   270  	}
   271  	if rb.options.batchSendSize == 0 {
   272  		rb.options.batchSendSize = 8
   273  	}
   274  	if rb.options.connectTimeout == 0 {
   275  		rb.options.connectTimeout = time.Second * 5
   276  	}
   277  	if rb.options.streamBufferSize == 0 {
   278  		rb.options.streamBufferSize = 16
   279  	}
   280  	if rb.options.filter == nil {
   281  		rb.options.filter = func(Message, string) bool {
   282  			return true
   283  		}
   284  	}
   285  
   286  	uid, _ := uuid.NewV7()
   287  	rb.logger = logutil.Adjust(rb.logger).With(zap.String("remote", rb.remote),
   288  		zap.String("backend-id", uid.String()))
   289  	rb.options.goettyOptions = append(rb.options.goettyOptions,
   290  		goetty.WithSessionCodec(rb.codec),
   291  		goetty.WithSessionLogger(rb.logger))
   292  }
   293  
   294  func (rb *remoteBackend) Send(ctx context.Context, request Message) (*Future, error) {
   295  	if ctx == nil {
   296  		panic("remoteBackend Send nil context")
   297  	}
   298  	return rb.send(ctx, request, false)
   299  }
   300  
   301  func (rb *remoteBackend) SendInternal(ctx context.Context, request Message) (*Future, error) {
   302  	if ctx == nil {
   303  		panic("remoteBackend SendInternal nil context")
   304  	}
   305  	return rb.send(ctx, request, true)
   306  }
   307  
   308  func (rb *remoteBackend) send(ctx context.Context, request Message, internal bool) (*Future, error) {
   309  	f := rb.getFuture(ctx, request, internal)
   310  	if err := rb.doSend(f); err != nil {
   311  		f.Close()
   312  		return nil, err
   313  	}
   314  	rb.active()
   315  	return f, nil
   316  }
   317  
   318  func (rb *remoteBackend) getFuture(ctx context.Context, request Message, internal bool) *Future {
   319  	request.SetID(rb.nextID())
   320  	f := rb.newFuture()
   321  	f.init(RPCMessage{Ctx: ctx, Message: request, internal: internal})
   322  	rb.addFuture(f)
   323  	return f
   324  }
   325  
   326  func (rb *remoteBackend) NewStream(unlockAfterClose bool) (Stream, error) {
   327  	rb.stateMu.RLock()
   328  	defer rb.stateMu.RUnlock()
   329  
   330  	if rb.stateMu.state == stateStopped {
   331  		return nil, backendClosed
   332  	}
   333  
   334  	rb.mu.Lock()
   335  	defer rb.mu.Unlock()
   336  
   337  	st := rb.acquireStream()
   338  	st.init(rb.nextID(), unlockAfterClose)
   339  	rb.mu.activeStreams[st.ID()] = st
   340  	rb.active()
   341  	return st, nil
   342  }
   343  
   344  func (rb *remoteBackend) doSend(f *Future) error {
   345  	rb.metrics.sendCounter.Inc()
   346  
   347  	if err := rb.codec.Valid(f.send.Message); err != nil {
   348  		return err
   349  	}
   350  
   351  	for {
   352  		rb.stateMu.RLock()
   353  		if rb.stateMu.state == stateStopped {
   354  			rb.stateMu.RUnlock()
   355  			return backendClosed
   356  		}
   357  
   358  		// The close method need acquire the write lock, so we cannot block at here.
   359  		// The write loop may reset the backend's network link and may not be able to
   360  		// process writeC for a long time, causing the writeC buffer to reach its limit.
   361  		select {
   362  		case rb.writeC <- f:
   363  			rb.metrics.sendingQueueSizeGauge.Set(float64(len(rb.writeC)))
   364  			rb.stateMu.RUnlock()
   365  			return nil
   366  		case <-f.send.Ctx.Done():
   367  			rb.stateMu.RUnlock()
   368  			return f.send.Ctx.Err()
   369  		default:
   370  			rb.stateMu.RUnlock()
   371  		}
   372  	}
   373  }
   374  
   375  func (rb *remoteBackend) Close() {
   376  	rb.metrics.closeCounter.Inc()
   377  	rb.cancelOnce.Do(func() {
   378  		rb.cancel()
   379  	})
   380  	rb.stateMu.Lock()
   381  	if rb.stateMu.state == stateStopped {
   382  		rb.stateMu.Unlock()
   383  		return
   384  	}
   385  	rb.stateMu.state = stateStopped
   386  	rb.stopWriteLoop()
   387  	rb.stateMu.Unlock()
   388  
   389  	rb.stopper.Stop()
   390  	rb.doClose()
   391  	rb.inactive()
   392  }
   393  
   394  func (rb *remoteBackend) Busy() bool {
   395  	return len(rb.writeC) >= rb.options.busySize
   396  }
   397  
   398  func (rb *remoteBackend) LastActiveTime() time.Time {
   399  	return rb.atomic.lastActiveTime.Load().(time.Time)
   400  }
   401  
   402  func (rb *remoteBackend) Lock() {
   403  	rb.stateMu.Lock()
   404  	defer rb.stateMu.Unlock()
   405  	if rb.stateMu.locked {
   406  		panic("backend is already locked")
   407  	}
   408  	rb.stateMu.locked = true
   409  }
   410  
   411  func (rb *remoteBackend) Unlock() {
   412  	rb.stateMu.Lock()
   413  	defer rb.stateMu.Unlock()
   414  	if !rb.stateMu.locked {
   415  		panic("backend is not locked")
   416  	}
   417  	rb.stateMu.locked = false
   418  }
   419  
   420  func (rb *remoteBackend) Locked() bool {
   421  	rb.stateMu.RLock()
   422  	defer rb.stateMu.RUnlock()
   423  	return rb.stateMu.locked
   424  }
   425  
   426  func (rb *remoteBackend) active() {
   427  	now := time.Now()
   428  	rb.atomic.lastActiveTime.Store(now)
   429  }
   430  
   431  func (rb *remoteBackend) inactive() {
   432  	rb.atomic.lastActiveTime.Store(time.Time{})
   433  }
   434  
   435  func (rb *remoteBackend) writeLoop(ctx context.Context) {
   436  	rb.logger.Debug("write loop started")
   437  	defer func() {
   438  		rb.pingTimer.Stop()
   439  		rb.closeConn(false)
   440  		rb.readStopper.Stop()
   441  		rb.closeConn(true)
   442  		rb.logger.Debug("write loop stopped")
   443  	}()
   444  
   445  	defer func() {
   446  		rb.makeAllWritesDoneWithClosed()
   447  		close(rb.writeC)
   448  	}()
   449  
   450  	// fatal if panic
   451  	defer func() {
   452  		if err := recover(); err != nil {
   453  			rb.logger.Fatal("write loop failed",
   454  				zap.Any("err", err))
   455  		}
   456  	}()
   457  
   458  	rb.pingTimer = time.NewTimer(rb.getPingTimeout())
   459  	messages := make([]*Future, 0, rb.options.batchSendSize)
   460  	stopped := false
   461  	lastScheduleTime := time.Now()
   462  	for {
   463  		messages, stopped = rb.fetch(messages, rb.options.batchSendSize)
   464  		interval := time.Since(lastScheduleTime)
   465  		if rb.options.readTimeout > 0 && interval > time.Second*5 {
   466  			getLogger().Warn("system is busy, write loop schedule interval is too large",
   467  				zap.Duration("interval", interval),
   468  				zap.Time("last-ping-trigger-time", rb.lastPingTime),
   469  				zap.Duration("ping-interval", rb.getPingTimeout()))
   470  		}
   471  		if len(messages) > 0 {
   472  			rb.metrics.sendingBatchSizeGauge.Set(float64(len(messages)))
   473  			start := time.Now()
   474  
   475  			writeTimeout := time.Duration(0)
   476  			written := messages[:0]
   477  			for _, f := range messages {
   478  				rb.metrics.writeLatencyDurationHistogram.Observe(start.Sub(f.send.createAt).Seconds())
   479  
   480  				id := f.getSendMessageID()
   481  				if stopped {
   482  					f.messageSent(backendClosed)
   483  					continue
   484  				}
   485  
   486  				if v := rb.doWrite(id, f); v > 0 {
   487  					writeTimeout += v
   488  					written = append(written, f)
   489  				}
   490  			}
   491  
   492  			if len(written) > 0 {
   493  				rb.metrics.outputBytesCounter.Add(float64(rb.conn.OutBuf().Readable()))
   494  				if err := rb.conn.Flush(writeTimeout); err != nil {
   495  					for _, f := range written {
   496  						id := f.getSendMessageID()
   497  						rb.logger.Error("write request failed",
   498  							zap.Uint64("request-id", id),
   499  							zap.Error(err))
   500  						f.messageSent(err)
   501  					}
   502  				} else {
   503  					for _, f := range written {
   504  						f.messageSent(nil)
   505  					}
   506  				}
   507  			}
   508  
   509  			rb.metrics.writeDurationHistogram.Observe(time.Since(start).Seconds())
   510  		}
   511  		if stopped {
   512  			return
   513  		}
   514  		lastScheduleTime = time.Now()
   515  	}
   516  }
   517  
   518  func (rb *remoteBackend) doWrite(id uint64, f *Future) time.Duration {
   519  	if !rb.options.filter(f.send.Message, rb.remote) {
   520  		f.messageSent(messageSkipped)
   521  		return 0
   522  	}
   523  	// already timeout in future, and future will get a ctx timeout
   524  	if f.send.Timeout() {
   525  		f.messageSent(f.send.Ctx.Err())
   526  		return 0
   527  	}
   528  
   529  	v, err := f.send.GetTimeoutFromContext()
   530  	if err != nil {
   531  		f.messageSent(err)
   532  		return 0
   533  	}
   534  
   535  	// For PayloadMessage, the internal Codec will write the Payload directly to the underlying socket
   536  	// instead of copying it to the buffer, so the write deadline of the underlying conn needs to be reset
   537  	// here, otherwise an old deadline will be out causing io/timeout.
   538  	conn := rb.conn.RawConn()
   539  	if _, ok := f.send.Message.(PayloadMessage); ok && conn != nil {
   540  		conn.SetWriteDeadline(time.Now().Add(v))
   541  	}
   542  	if ce := rb.logger.Check(zap.DebugLevel, "write request"); ce != nil {
   543  		ce.Write(zap.Uint64("request-id", id),
   544  			zap.String("request", f.send.Message.DebugString()))
   545  	}
   546  	if err := rb.conn.Write(f.send, goetty.WriteOptions{}); err != nil {
   547  		rb.logger.Error("write request failed",
   548  			zap.Uint64("request-id", id), zap.Error(err))
   549  		f.messageSent(err)
   550  		return 0
   551  	}
   552  	return v
   553  }
   554  
   555  func (rb *remoteBackend) readLoop(ctx context.Context) {
   556  	rb.logger.Debug("read loop started")
   557  	defer rb.logger.Error("read loop stopped")
   558  
   559  	wg := &sync.WaitGroup{}
   560  	var cb func()
   561  	if rb.options.hasPayloadResponse {
   562  		cb = wg.Done
   563  	}
   564  
   565  	// fatal if panic
   566  	defer func() {
   567  		if err := recover(); err != nil {
   568  			rb.logger.Fatal("read loop failed",
   569  				zap.Any("err", err))
   570  		}
   571  	}()
   572  
   573  	n := 0
   574  	for {
   575  		select {
   576  		case <-ctx.Done():
   577  			rb.clean()
   578  			return
   579  		default:
   580  			msg, err := rb.conn.Read(goetty.ReadOptions{Timeout: rb.options.readTimeout})
   581  			n++
   582  			if err != nil || rb.options.disconnectAfterRead == n {
   583  				rb.logger.Error("read from backend failed", zap.Error(err))
   584  				rb.inactiveReadLoop()
   585  				rb.cancelActiveStreams()
   586  				rb.scheduleResetConn()
   587  				return
   588  			}
   589  			rb.metrics.receiveCounter.Inc()
   590  
   591  			rb.active()
   592  
   593  			if rb.options.hasPayloadResponse {
   594  				wg.Add(1)
   595  			}
   596  			resp := msg.(RPCMessage).Message
   597  			rb.metrics.inputBytesCounter.Add(float64(resp.Size()))
   598  			rb.requestDone(ctx, resp.GetID(), msg.(RPCMessage), nil, cb)
   599  			if rb.options.hasPayloadResponse {
   600  				wg.Wait()
   601  			}
   602  		}
   603  	}
   604  }
   605  
   606  func (rb *remoteBackend) fetch(messages []*Future, maxFetchCount int) ([]*Future, bool) {
   607  	defer func() {
   608  		rb.metrics.sendingQueueSizeGauge.Set(float64(len(rb.writeC)))
   609  	}()
   610  
   611  	n := len(messages)
   612  	for i := 0; i < n; i++ {
   613  		messages[i] = nil
   614  	}
   615  	messages = messages[:0]
   616  
   617  	doHeartbeat := func() {
   618  		rb.lastPingTime = time.Now()
   619  		f := rb.getFuture(context.TODO(), &flagOnlyMessage{flag: flagPing}, true)
   620  		// no need wait response, close immediately
   621  		f.Close()
   622  		messages = append(messages, f)
   623  		rb.pingTimer.Reset(rb.getPingTimeout())
   624  	}
   625  	handleHeartbeat := func() {
   626  		select {
   627  		case <-rb.pingTimer.C:
   628  			doHeartbeat()
   629  		default:
   630  		}
   631  	}
   632  
   633  	select {
   634  	case <-rb.pingTimer.C:
   635  		doHeartbeat()
   636  	case f := <-rb.writeC:
   637  		handleHeartbeat()
   638  		messages = append(messages, f)
   639  	case <-rb.resetConnC:
   640  		// If the connect needs to be reset, then all futures in the waiting response state will never
   641  		// get the response and need to be notified of an error immediately.
   642  		rb.makeAllWaitingFutureFailed()
   643  		rb.handleResetConn()
   644  	case <-rb.stopWriteC:
   645  		return rb.fetchN(messages, math.MaxInt), true
   646  	}
   647  
   648  	return rb.fetchN(messages, maxFetchCount), false
   649  }
   650  
   651  func (rb *remoteBackend) fetchN(messages []*Future, max int) []*Future {
   652  	if len(messages) >= max {
   653  		return messages
   654  	}
   655  	n := max - len(messages)
   656  	for i := 0; i < n; i++ {
   657  		select {
   658  		case f := <-rb.writeC:
   659  			messages = append(messages, f)
   660  		default:
   661  			return messages
   662  		}
   663  	}
   664  	return messages
   665  }
   666  
   667  func (rb *remoteBackend) makeAllWritesDoneWithClosed() {
   668  	for {
   669  		select {
   670  		case m := <-rb.writeC:
   671  			m.messageSent(backendClosed)
   672  		default:
   673  			return
   674  		}
   675  	}
   676  }
   677  
   678  func (rb *remoteBackend) makeAllWaitingFutureFailed() {
   679  	var ids []uint64
   680  	var waitings []*Future
   681  	func() {
   682  		rb.mu.Lock()
   683  		defer rb.mu.Unlock()
   684  		ids = make([]uint64, 0, len(rb.mu.futures))
   685  		waitings = make([]*Future, 0, len(rb.mu.futures))
   686  		for id, f := range rb.mu.futures {
   687  			if f.waiting.Load() {
   688  				waitings = append(waitings, f)
   689  				ids = append(ids, id)
   690  			}
   691  		}
   692  	}()
   693  
   694  	for i, f := range waitings {
   695  		f.error(ids[i], backendClosed, nil)
   696  	}
   697  }
   698  
   699  func (rb *remoteBackend) handleResetConn() {
   700  	if err := rb.resetConn(); err != nil {
   701  		rb.logger.Error("fail to reset backend connection", zap.Error(err))
   702  		rb.inactive()
   703  	}
   704  }
   705  
   706  func (rb *remoteBackend) doClose() {
   707  	rb.closeOnce.Do(func() {
   708  		close(rb.resetConnC)
   709  		rb.closeConn(false)
   710  		// TODO: re create when reconnect
   711  		rb.conn = nil
   712  	})
   713  }
   714  
   715  func (rb *remoteBackend) clean() {
   716  	rb.mu.Lock()
   717  	defer rb.mu.Unlock()
   718  
   719  	for id := range rb.mu.futures {
   720  		delete(rb.mu.futures, id)
   721  	}
   722  }
   723  
   724  func (rb *remoteBackend) acquireStream() *stream {
   725  	return rb.pool.streams.Get().(*stream)
   726  }
   727  
   728  func (rb *remoteBackend) cancelActiveStreams() {
   729  	rb.mu.Lock()
   730  	defer rb.mu.Unlock()
   731  
   732  	for _, st := range rb.mu.activeStreams {
   733  		st.done(context.TODO(), RPCMessage{}, true)
   734  	}
   735  }
   736  
   737  func (rb *remoteBackend) removeActiveStream(s *stream) {
   738  	rb.mu.Lock()
   739  	defer rb.mu.Unlock()
   740  
   741  	delete(rb.mu.activeStreams, s.id)
   742  	delete(rb.mu.futures, s.id)
   743  	if s.unlockAfterClose {
   744  		rb.Unlock()
   745  	}
   746  	if len(s.c) > 0 {
   747  		panic("BUG: stream channel is not empty")
   748  	}
   749  	rb.pool.streams.Put(s)
   750  }
   751  
   752  func (rb *remoteBackend) stopWriteLoop() {
   753  	close(rb.stopWriteC)
   754  }
   755  
   756  func (rb *remoteBackend) requestDone(
   757  	ctx context.Context,
   758  	id uint64,
   759  	msg RPCMessage,
   760  	err error,
   761  	cb func()) {
   762  	start := time.Now()
   763  	defer func() {
   764  		rb.metrics.doneDurationHistogram.Observe(time.Since(start).Seconds())
   765  	}()
   766  
   767  	response := msg.Message
   768  	if msg.Cancel != nil {
   769  		defer msg.Cancel()
   770  	}
   771  	if ce := rb.logger.Check(zap.DebugLevel, "read response"); ce != nil {
   772  		debugStr := ""
   773  		if response != nil {
   774  			debugStr = response.DebugString()
   775  		}
   776  		ce.Write(zap.Uint64("request-id", id),
   777  			zap.String("response", debugStr))
   778  	}
   779  
   780  	rb.mu.Lock()
   781  	if f, ok := rb.mu.futures[id]; ok {
   782  		delete(rb.mu.futures, id)
   783  		rb.mu.Unlock()
   784  		if err == nil {
   785  			f.done(response, cb)
   786  		} else {
   787  			errutil.ReportError(ctx, err)
   788  			f.error(id, err, cb)
   789  		}
   790  	} else if st, ok := rb.mu.activeStreams[id]; ok {
   791  		rb.mu.Unlock()
   792  		if response != nil {
   793  			st.done(ctx, msg, false)
   794  		}
   795  	} else {
   796  		// future has been removed, e.g. it has timed out.
   797  		rb.mu.Unlock()
   798  		if cb != nil {
   799  			cb()
   800  		}
   801  
   802  		if !msg.internal &&
   803  			response != nil &&
   804  			rb.options.freeResponse != nil {
   805  			rb.options.freeResponse(response)
   806  		}
   807  	}
   808  }
   809  
   810  func (rb *remoteBackend) addFuture(f *Future) {
   811  	rb.mu.Lock()
   812  	defer rb.mu.Unlock()
   813  
   814  	f.ref()
   815  	rb.mu.futures[f.getSendMessageID()] = f
   816  }
   817  
   818  func (rb *remoteBackend) releaseFuture(f *Future) {
   819  	rb.mu.Lock()
   820  	defer rb.mu.Unlock()
   821  
   822  	delete(rb.mu.futures, f.getSendMessageID())
   823  	f.reset()
   824  	rb.pool.futures.Put(f)
   825  }
   826  
   827  func (rb *remoteBackend) running() bool {
   828  	rb.stateMu.RLock()
   829  	defer rb.stateMu.RUnlock()
   830  	return rb.runningLocked()
   831  }
   832  
   833  func (rb *remoteBackend) resetConn() error {
   834  	start := time.Now()
   835  	defer func() {
   836  		rb.metrics.connectDurationHistogram.Observe(time.Since(start).Seconds())
   837  	}()
   838  
   839  	wait := time.Second
   840  	sleep := time.Millisecond * 200
   841  	for {
   842  		if !rb.running() {
   843  			return backendClosed
   844  		}
   845  		select {
   846  		case <-rb.ctx.Done():
   847  			return backendClosed
   848  		default:
   849  		}
   850  
   851  		rb.logger.Debug("start connect to remote")
   852  		rb.closeConn(false)
   853  		rb.metrics.connectCounter.Inc()
   854  		err := rb.conn.Connect(rb.remote, rb.options.connectTimeout)
   855  		if err == nil {
   856  			rb.logger.Debug("connect to remote succeed")
   857  			rb.activeReadLoop(false)
   858  			return nil
   859  		}
   860  
   861  		rb.metrics.connectFailedCounter.Inc()
   862  
   863  		// only retry on temp net error
   864  		canRetry := false
   865  		if ne, ok := err.(net.Error); ok && ne.Timeout() {
   866  			canRetry = true
   867  		}
   868  		rb.logger.Error("init remote connection failed, retry later",
   869  			zap.Bool("can-retry", canRetry),
   870  			zap.Error(err))
   871  
   872  		if !canRetry {
   873  			return moerr.NewBackendCannotConnectNoCtx(err)
   874  		}
   875  		duration := time.Duration(0)
   876  		for {
   877  			time.Sleep(sleep)
   878  			duration += sleep
   879  			if time.Since(start) > rb.options.connectTimeout {
   880  				return moerr.NewRPCTimeoutNoCtx()
   881  			}
   882  			select {
   883  			case <-rb.ctx.Done():
   884  				return backendClosed
   885  			default:
   886  			}
   887  			if duration >= wait {
   888  				break
   889  			}
   890  		}
   891  		wait += wait / 2
   892  
   893  		// reconnect failed, notify all future failed
   894  		rb.notifyAllWaitWritesFailed(moerr.NewBackendCannotConnectNoCtx())
   895  	}
   896  }
   897  
   898  func (rb *remoteBackend) notifyAllWaitWritesFailed(err error) {
   899  	for {
   900  		select {
   901  		case f := <-rb.writeC:
   902  			f.messageSent(err)
   903  		default:
   904  			return
   905  		}
   906  	}
   907  }
   908  
   909  func (rb *remoteBackend) activeReadLoop(locked bool) {
   910  	if !locked {
   911  		rb.stateMu.Lock()
   912  		defer rb.stateMu.Unlock()
   913  	}
   914  
   915  	if rb.stateMu.readLoopActive {
   916  		return
   917  	}
   918  
   919  	if err := rb.readStopper.RunTask(rb.readLoop); err != nil {
   920  		rb.logger.Error("active read loop failed", zap.Error(err))
   921  		return
   922  	}
   923  	rb.stateMu.readLoopActive = true
   924  }
   925  
   926  func (rb *remoteBackend) inactiveReadLoop() {
   927  	rb.stateMu.Lock()
   928  	defer rb.stateMu.Unlock()
   929  
   930  	rb.stateMu.readLoopActive = false
   931  }
   932  
   933  func (rb *remoteBackend) runningLocked() bool {
   934  	return rb.stateMu.state == stateRunning
   935  }
   936  
   937  func (rb *remoteBackend) scheduleResetConn() {
   938  	rb.stateMu.RLock()
   939  	defer rb.stateMu.RUnlock()
   940  
   941  	if !rb.runningLocked() {
   942  		return
   943  	}
   944  
   945  	select {
   946  	case rb.resetConnC <- struct{}{}:
   947  		rb.logger.Debug("schedule reset remote connection")
   948  	default:
   949  	}
   950  }
   951  
   952  func (rb *remoteBackend) closeConn(close bool) {
   953  	fn := rb.conn.Disconnect
   954  	if close {
   955  		fn = rb.conn.Close
   956  	}
   957  
   958  	if err := fn(); err != nil {
   959  		rb.logger.Error("close remote conn failed", zap.Error(err))
   960  	}
   961  }
   962  
   963  func (rb *remoteBackend) newFuture() *Future {
   964  	return rb.pool.futures.Get().(*Future)
   965  }
   966  
   967  func (rb *remoteBackend) nextID() uint64 {
   968  	return atomic.AddUint64(&rb.atomic.id, 1)
   969  }
   970  
   971  func (rb *remoteBackend) getPingTimeout() time.Duration {
   972  	if rb.options.readTimeout > 0 {
   973  		return rb.options.readTimeout / 5
   974  	}
   975  	return time.Duration(math.MaxInt64)
   976  }
   977  
   978  type goettyBasedBackendFactory struct {
   979  	codec   Codec
   980  	options []BackendOption
   981  }
   982  
   983  func NewGoettyBasedBackendFactory(codec Codec, options ...BackendOption) BackendFactory {
   984  	return &goettyBasedBackendFactory{
   985  		codec:   codec,
   986  		options: options,
   987  	}
   988  }
   989  
   990  func (bf *goettyBasedBackendFactory) Create(
   991  	remote string,
   992  	extraOptions ...BackendOption) (Backend, error) {
   993  	opts := append(bf.options, extraOptions...)
   994  	return NewRemoteBackend(remote, bf.codec, opts...)
   995  }
   996  
   997  type stream struct {
   998  	rb               *remoteBackend
   999  	c                chan Message
  1000  	sendFunc         func(*Future) error
  1001  	activeFunc       func()
  1002  	unregisterFunc   func(*stream)
  1003  	newFutureFunc    func() *Future
  1004  	unlockAfterClose bool
  1005  	ctx              context.Context
  1006  	cancel           context.CancelFunc
  1007  
  1008  	// reset fields
  1009  	id                   uint64
  1010  	sequence             uint32
  1011  	lastReceivedSequence uint32
  1012  	mu                   struct {
  1013  		sync.RWMutex
  1014  		closed bool
  1015  	}
  1016  }
  1017  
  1018  func newStream(
  1019  	rb *remoteBackend,
  1020  	c chan Message,
  1021  	acquireFutureFunc func() *Future,
  1022  	sendFunc func(*Future) error,
  1023  	unregisterFunc func(*stream),
  1024  	activeFunc func()) *stream {
  1025  	ctx, cancel := context.WithCancel(context.Background())
  1026  	s := &stream{
  1027  		rb:             rb,
  1028  		c:              c,
  1029  		ctx:            ctx,
  1030  		cancel:         cancel,
  1031  		sendFunc:       sendFunc,
  1032  		unregisterFunc: unregisterFunc,
  1033  		activeFunc:     activeFunc,
  1034  		newFutureFunc:  acquireFutureFunc,
  1035  	}
  1036  	s.setFinalizer()
  1037  	return s
  1038  }
  1039  
  1040  func (s *stream) init(id uint64, unlockAfterClose bool) {
  1041  	s.id = id
  1042  	s.unlockAfterClose = unlockAfterClose
  1043  	s.sequence = 0
  1044  	s.lastReceivedSequence = 0
  1045  	s.mu.closed = false
  1046  	for {
  1047  		select {
  1048  		case <-s.c:
  1049  		default:
  1050  			return
  1051  		}
  1052  	}
  1053  }
  1054  
  1055  func (s *stream) setFinalizer() {
  1056  	runtime.SetFinalizer(s, func(s *stream) {
  1057  		s.destroy()
  1058  	})
  1059  }
  1060  
  1061  func (s *stream) destroy() {
  1062  	close(s.c)
  1063  	s.cancel()
  1064  }
  1065  
  1066  func (s *stream) Send(ctx context.Context, request Message) error {
  1067  	if s.id != request.GetID() {
  1068  		panic("request.id != stream.id")
  1069  	}
  1070  	if _, ok := ctx.Deadline(); !ok {
  1071  		panic("deadline not set in context")
  1072  	}
  1073  	s.activeFunc()
  1074  
  1075  	f := s.newFutureFunc()
  1076  	f.ref()
  1077  	defer f.Close()
  1078  
  1079  	s.mu.RLock()
  1080  	if s.mu.closed {
  1081  		s.mu.RUnlock()
  1082  		s.rb.logger.Warn("stream is closed on send", zap.Uint64("stream-id", s.id))
  1083  		return moerr.NewStreamClosedNoCtx()
  1084  	}
  1085  
  1086  	err := s.doSendLocked(ctx, f, request)
  1087  	// unlock before future.close to avoid deadlock with future.Close
  1088  	// 1. current goroutine:        stream.RLock
  1089  	// 2. backend read goroutine:   cancelActiveStream -> backend.Lock
  1090  	// 3. backend read goroutine:   cancelActiveStream -> stream.Lock : deadlock here
  1091  	// 4. current goroutine:        f.Close -> backend.Lock           : deadlock here
  1092  	s.mu.RUnlock()
  1093  
  1094  	if err != nil {
  1095  		return err
  1096  	}
  1097  	// stream only wait send completed
  1098  	return f.waitSendCompleted()
  1099  }
  1100  
  1101  func (s *stream) doSendLocked(
  1102  	ctx context.Context,
  1103  	f *Future,
  1104  	request Message) error {
  1105  	s.sequence++
  1106  	f.init(RPCMessage{
  1107  		Ctx:            ctx,
  1108  		Message:        request,
  1109  		stream:         true,
  1110  		streamSequence: s.sequence,
  1111  	})
  1112  
  1113  	return s.sendFunc(f)
  1114  }
  1115  
  1116  func (s *stream) Receive() (chan Message, error) {
  1117  	s.mu.RLock()
  1118  	defer s.mu.RUnlock()
  1119  	if s.mu.closed {
  1120  		s.rb.logger.Warn("stream is closed on receive", zap.Uint64("stream-id", s.id))
  1121  		return nil, moerr.NewStreamClosedNoCtx()
  1122  	}
  1123  	return s.c, nil
  1124  }
  1125  
  1126  func (s *stream) Close(closeConn bool) error {
  1127  	if closeConn {
  1128  		s.rb.logger.Info("stream call closed on client", zap.Uint64("stream-id", s.id))
  1129  		s.rb.Close()
  1130  	}
  1131  	s.mu.Lock()
  1132  	defer s.mu.Unlock()
  1133  
  1134  	if s.mu.closed {
  1135  		return nil
  1136  	}
  1137  
  1138  	s.cleanCLocked()
  1139  	s.mu.closed = true
  1140  	s.unregisterFunc(s)
  1141  	return nil
  1142  }
  1143  
  1144  func (s *stream) ID() uint64 {
  1145  	return s.id
  1146  }
  1147  
  1148  func (s *stream) done(
  1149  	ctx context.Context,
  1150  	message RPCMessage,
  1151  	clean bool) {
  1152  	s.mu.RLock()
  1153  	defer s.mu.RUnlock()
  1154  
  1155  	if s.mu.closed {
  1156  		return
  1157  	}
  1158  
  1159  	if clean {
  1160  		s.cleanCLocked()
  1161  	}
  1162  	response := message.Message
  1163  	if message.Cancel != nil {
  1164  		defer message.Cancel()
  1165  	}
  1166  	if response != nil && !message.stream {
  1167  		panic("BUG")
  1168  	}
  1169  	if response != nil &&
  1170  		message.streamSequence != s.lastReceivedSequence+1 {
  1171  		s.rb.logger.Warn("sequence out of order", zap.Uint32("new", message.streamSequence),
  1172  			zap.Uint32("last", s.lastReceivedSequence))
  1173  		response = nil
  1174  	}
  1175  
  1176  	s.lastReceivedSequence = message.streamSequence
  1177  	moprobe.WithRegion(ctx, moprobe.RPCStreamReceive, func() {
  1178  		select {
  1179  		case s.c <- response:
  1180  		case <-ctx.Done():
  1181  		}
  1182  	})
  1183  }
  1184  
  1185  func (s *stream) cleanCLocked() {
  1186  	for {
  1187  		select {
  1188  		case <-s.c:
  1189  		default:
  1190  			return
  1191  		}
  1192  	}
  1193  }