github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/raft_transport.go (about)

     1  // Copyright 2015 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"fmt"
    17  	"net"
    18  	"sort"
    19  	"sync/atomic"
    20  	"time"
    21  	"unsafe"
    22  
    23  	"github.com/cockroachdb/cockroach/pkg/base"
    24  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    25  	"github.com/cockroachdb/cockroach/pkg/rpc"
    26  	"github.com/cockroachdb/cockroach/pkg/rpc/nodedialer"
    27  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    28  	"github.com/cockroachdb/cockroach/pkg/storage"
    29  	"github.com/cockroachdb/cockroach/pkg/util/log"
    30  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    31  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    32  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    33  	"github.com/cockroachdb/errors"
    34  	"go.etcd.io/etcd/raft/raftpb"
    35  	"google.golang.org/grpc"
    36  )
    37  
    38  const (
    39  	// Outgoing messages are queued per-node on a channel of this size.
    40  	//
    41  	// TODO(peter): The normal send buffer size is larger than we would like. It
    42  	// is a temporary patch for the issue discussed in #8630 where
    43  	// Store.HandleRaftRequest can block applying a preemptive snapshot for a
    44  	// long enough period of time that grpc flow control kicks in and messages
    45  	// are dropped on the sending side.
    46  	raftSendBufferSize = 10000
    47  
    48  	// When no message has been queued for this duration, the corresponding
    49  	// instance of processQueue will shut down.
    50  	//
    51  	// TODO(tamird): make culling of outbound streams more evented, so that we
    52  	// need not rely on this timeout to shut things down.
    53  	raftIdleTimeout = time.Minute
    54  )
    55  
    56  // RaftMessageResponseStream is the subset of the
    57  // MultiRaft_RaftMessageServer interface that is needed for sending responses.
    58  type RaftMessageResponseStream interface {
    59  	Context() context.Context
    60  	Send(*RaftMessageResponse) error
    61  }
    62  
    63  // lockedRaftMessageResponseStream is an implementation of
    64  // RaftMessageResponseStream which provides support for concurrent calls to
    65  // Send. Note that the default implementation of grpc.Stream for server
    66  // responses (grpc.serverStream) is not safe for concurrent calls to Send.
    67  type lockedRaftMessageResponseStream struct {
    68  	wrapped MultiRaft_RaftMessageBatchServer
    69  	sendMu  syncutil.Mutex
    70  }
    71  
    72  func (s *lockedRaftMessageResponseStream) Context() context.Context {
    73  	return s.wrapped.Context()
    74  }
    75  
    76  func (s *lockedRaftMessageResponseStream) Send(resp *RaftMessageResponse) error {
    77  	s.sendMu.Lock()
    78  	defer s.sendMu.Unlock()
    79  	return s.wrapped.Send(resp)
    80  }
    81  
    82  func (s *lockedRaftMessageResponseStream) Recv() (*RaftMessageRequestBatch, error) {
    83  	// No need for lock. gRPC.Stream.RecvMsg is safe for concurrent use.
    84  	return s.wrapped.Recv()
    85  }
    86  
    87  // SnapshotResponseStream is the subset of the
    88  // MultiRaft_RaftSnapshotServer interface that is needed for sending responses.
    89  type SnapshotResponseStream interface {
    90  	Context() context.Context
    91  	Send(*SnapshotResponse) error
    92  	Recv() (*SnapshotRequest, error)
    93  }
    94  
    95  // RaftMessageHandler is the interface that must be implemented by
    96  // arguments to RaftTransport.Listen.
    97  type RaftMessageHandler interface {
    98  	// HandleRaftRequest is called for each incoming Raft message. The request is
    99  	// always processed asynchronously and the response is sent over respStream.
   100  	// If an error is encountered during asynchronous processing, it will be
   101  	// streamed back to the sender of the message as a RaftMessageResponse.
   102  	HandleRaftRequest(ctx context.Context, req *RaftMessageRequest,
   103  		respStream RaftMessageResponseStream) *roachpb.Error
   104  
   105  	// HandleRaftResponse is called for each raft response. Note that
   106  	// not all messages receive a response. An error is returned if and only if
   107  	// the underlying Raft connection should be closed.
   108  	HandleRaftResponse(context.Context, *RaftMessageResponse) error
   109  
   110  	// HandleSnapshot is called for each new incoming snapshot stream, after
   111  	// parsing the initial SnapshotRequest_Header on the stream.
   112  	HandleSnapshot(header *SnapshotRequest_Header, respStream SnapshotResponseStream) error
   113  }
   114  
   115  type raftTransportStats struct {
   116  	nodeID        roachpb.NodeID
   117  	queue         int
   118  	queueMax      int32
   119  	clientSent    int64
   120  	clientRecv    int64
   121  	clientDropped int64
   122  	serverSent    int64
   123  	serverRecv    int64
   124  }
   125  
   126  type raftTransportStatsSlice []*raftTransportStats
   127  
   128  func (s raftTransportStatsSlice) Len() int           { return len(s) }
   129  func (s raftTransportStatsSlice) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
   130  func (s raftTransportStatsSlice) Less(i, j int) bool { return s[i].nodeID < s[j].nodeID }
   131  
   132  // RaftTransport handles the rpc messages for raft.
   133  //
   134  // The raft transport is asynchronous with respect to the caller, and
   135  // internally multiplexes outbound messages. Internally, each message is
   136  // queued on a per-destination queue before being asynchronously delivered.
   137  //
   138  // Callers are required to construct a RaftSender before being able to
   139  // dispatch messages, and must provide an error handler which will be invoked
   140  // asynchronously in the event that the recipient of any message closes its
   141  // inbound RPC stream. This callback is asynchronous with respect to the
   142  // outbound message which caused the remote to hang up; all that is known is
   143  // which remote hung up.
   144  type RaftTransport struct {
   145  	log.AmbientContext
   146  	st *cluster.Settings
   147  
   148  	stopper *stop.Stopper
   149  
   150  	queues   [rpc.NumConnectionClasses]syncutil.IntMap // map[roachpb.NodeID]*chan *RaftMessageRequest
   151  	stats    [rpc.NumConnectionClasses]syncutil.IntMap // map[roachpb.NodeID]*chan *RaftMessageRequest
   152  	dialer   *nodedialer.Dialer
   153  	handlers syncutil.IntMap // map[roachpb.StoreID]*RaftMessageHandler
   154  }
   155  
   156  // NewDummyRaftTransport returns a dummy raft transport for use in tests which
   157  // need a non-nil raft transport that need not function.
   158  func NewDummyRaftTransport(st *cluster.Settings) *RaftTransport {
   159  	resolver := func(roachpb.NodeID) (net.Addr, error) {
   160  		return nil, errors.New("dummy resolver")
   161  	}
   162  	return NewRaftTransport(log.AmbientContext{Tracer: st.Tracer}, st,
   163  		nodedialer.New(nil, resolver), nil, nil)
   164  }
   165  
   166  // NewRaftTransport creates a new RaftTransport.
   167  func NewRaftTransport(
   168  	ambient log.AmbientContext,
   169  	st *cluster.Settings,
   170  	dialer *nodedialer.Dialer,
   171  	grpcServer *grpc.Server,
   172  	stopper *stop.Stopper,
   173  ) *RaftTransport {
   174  	t := &RaftTransport{
   175  		AmbientContext: ambient,
   176  		st:             st,
   177  
   178  		stopper: stopper,
   179  		dialer:  dialer,
   180  	}
   181  
   182  	if grpcServer != nil {
   183  		RegisterMultiRaftServer(grpcServer, t)
   184  	}
   185  	// statsMap is used to associate a queue with its raftTransportStats.
   186  	statsMap := make(map[roachpb.NodeID]*raftTransportStats)
   187  	clearStatsMap := func() {
   188  		for k := range statsMap {
   189  			delete(statsMap, k)
   190  		}
   191  	}
   192  	if t.stopper != nil && log.V(1) {
   193  		ctx := t.AnnotateCtx(context.Background())
   194  		t.stopper.RunWorker(ctx, func(ctx context.Context) {
   195  			ticker := time.NewTicker(10 * time.Second)
   196  			defer ticker.Stop()
   197  			lastStats := make(map[roachpb.NodeID]raftTransportStats)
   198  			lastTime := timeutil.Now()
   199  			var stats raftTransportStatsSlice
   200  			for {
   201  				select {
   202  				case <-ticker.C:
   203  					stats = stats[:0]
   204  					getStats := func(k int64, v unsafe.Pointer) bool {
   205  						s := (*raftTransportStats)(v)
   206  						// Clear the queue length stat. Note that this field is only
   207  						// mutated by this goroutine.
   208  						s.queue = 0
   209  						stats = append(stats, s)
   210  						statsMap[roachpb.NodeID(k)] = s
   211  						return true
   212  					}
   213  					setQueueLength := func(k int64, v unsafe.Pointer) bool {
   214  						ch := *(*chan *RaftMessageRequest)(v)
   215  						if s, ok := statsMap[roachpb.NodeID(k)]; ok {
   216  							s.queue += len(ch)
   217  						}
   218  						return true
   219  					}
   220  					for c := range t.stats {
   221  						clearStatsMap()
   222  						t.stats[c].Range(getStats)
   223  						t.queues[c].Range(setQueueLength)
   224  					}
   225  					clearStatsMap() // no need to hold on to references to stats
   226  
   227  					now := timeutil.Now()
   228  					elapsed := now.Sub(lastTime).Seconds()
   229  					sort.Sort(stats)
   230  
   231  					var buf bytes.Buffer
   232  					// NB: The header is 80 characters which should display in a single
   233  					// line on most terminals.
   234  					fmt.Fprintf(&buf,
   235  						"         qlen   qmax   qdropped client-sent client-recv server-sent server-recv\n")
   236  					for _, s := range stats {
   237  						last := lastStats[s.nodeID]
   238  						cur := raftTransportStats{
   239  							nodeID:        s.nodeID,
   240  							queue:         s.queue,
   241  							queueMax:      atomic.LoadInt32(&s.queueMax),
   242  							clientDropped: atomic.LoadInt64(&s.clientDropped),
   243  							clientSent:    atomic.LoadInt64(&s.clientSent),
   244  							clientRecv:    atomic.LoadInt64(&s.clientRecv),
   245  							serverSent:    atomic.LoadInt64(&s.serverSent),
   246  							serverRecv:    atomic.LoadInt64(&s.serverRecv),
   247  						}
   248  						fmt.Fprintf(&buf, "  %3d: %6d %6d %10d %11.1f %11.1f %11.1f %11.1f\n",
   249  							cur.nodeID, cur.queue, cur.queueMax, cur.clientDropped,
   250  							float64(cur.clientSent-last.clientSent)/elapsed,
   251  							float64(cur.clientRecv-last.clientRecv)/elapsed,
   252  							float64(cur.serverSent-last.serverSent)/elapsed,
   253  							float64(cur.serverRecv-last.serverRecv)/elapsed)
   254  						lastStats[s.nodeID] = cur
   255  					}
   256  					lastTime = now
   257  					log.Infof(ctx, "stats:\n%s", buf.String())
   258  				case <-t.stopper.ShouldStop():
   259  					return
   260  				}
   261  			}
   262  		})
   263  	}
   264  
   265  	return t
   266  }
   267  
   268  func (t *RaftTransport) queuedMessageCount() int64 {
   269  	var n int64
   270  	addLength := func(k int64, v unsafe.Pointer) bool {
   271  		ch := *(*chan *RaftMessageRequest)(v)
   272  		n += int64(len(ch))
   273  		return true
   274  	}
   275  	for class := range t.queues {
   276  		t.queues[class].Range(addLength)
   277  	}
   278  	return n
   279  }
   280  
   281  func (t *RaftTransport) getHandler(storeID roachpb.StoreID) (RaftMessageHandler, bool) {
   282  	if value, ok := t.handlers.Load(int64(storeID)); ok {
   283  		return *(*RaftMessageHandler)(value), true
   284  	}
   285  	return nil, false
   286  }
   287  
   288  // handleRaftRequest proxies a request to the listening server interface.
   289  func (t *RaftTransport) handleRaftRequest(
   290  	ctx context.Context, req *RaftMessageRequest, respStream RaftMessageResponseStream,
   291  ) *roachpb.Error {
   292  	handler, ok := t.getHandler(req.ToReplica.StoreID)
   293  	if !ok {
   294  		log.Warningf(ctx, "unable to accept Raft message from %+v: no handler registered for %+v",
   295  			req.FromReplica, req.ToReplica)
   296  		return roachpb.NewError(roachpb.NewStoreNotFoundError(req.ToReplica.StoreID))
   297  	}
   298  
   299  	return handler.HandleRaftRequest(ctx, req, respStream)
   300  }
   301  
   302  // newRaftMessageResponse constructs a RaftMessageResponse from the
   303  // given request and error.
   304  func newRaftMessageResponse(req *RaftMessageRequest, pErr *roachpb.Error) *RaftMessageResponse {
   305  	resp := &RaftMessageResponse{
   306  		RangeID: req.RangeID,
   307  		// From and To are reversed in the response.
   308  		ToReplica:   req.FromReplica,
   309  		FromReplica: req.ToReplica,
   310  	}
   311  	if pErr != nil {
   312  		resp.Union.SetValue(pErr)
   313  	}
   314  	return resp
   315  }
   316  
   317  func (t *RaftTransport) getStats(
   318  	nodeID roachpb.NodeID, class rpc.ConnectionClass,
   319  ) *raftTransportStats {
   320  	statsMap := &t.stats[class]
   321  	value, ok := statsMap.Load(int64(nodeID))
   322  	if !ok {
   323  		stats := &raftTransportStats{nodeID: nodeID}
   324  		value, _ = statsMap.LoadOrStore(int64(nodeID), unsafe.Pointer(stats))
   325  	}
   326  	return (*raftTransportStats)(value)
   327  }
   328  
   329  // RaftMessageBatch proxies the incoming requests to the listening server interface.
   330  func (t *RaftTransport) RaftMessageBatch(stream MultiRaft_RaftMessageBatchServer) error {
   331  	errCh := make(chan error, 1)
   332  
   333  	// Node stopping error is caught below in the select.
   334  	if err := t.stopper.RunTask(
   335  		stream.Context(), "storage.RaftTransport: processing batch",
   336  		func(ctx context.Context) {
   337  			t.stopper.RunWorker(ctx, func(ctx context.Context) {
   338  				errCh <- func() error {
   339  					var stats *raftTransportStats
   340  					stream := &lockedRaftMessageResponseStream{wrapped: stream}
   341  					for {
   342  						batch, err := stream.Recv()
   343  						if err != nil {
   344  							return err
   345  						}
   346  						if len(batch.Requests) == 0 {
   347  							continue
   348  						}
   349  
   350  						// This code always uses the DefaultClass. Class is primarily a
   351  						// client construct and the server has no way to determine which
   352  						// class an inbound connection holds on the client side. Because of
   353  						// this we associate all server receives and sends with the
   354  						// DefaultClass. This data is exclusively used to print a debug
   355  						// log message periodically. Using this policy may lead to a
   356  						// DefaultClass log line showing a high rate of server recv but
   357  						// a low rate of client sends if most of the traffic is due to
   358  						// system ranges.
   359  						//
   360  						// TODO(ajwerner): consider providing transport metadata to inform
   361  						// the server of the connection class or keep shared stats for all
   362  						// connection with a host.
   363  						if stats == nil {
   364  							stats = t.getStats(batch.Requests[0].FromReplica.NodeID, rpc.DefaultClass)
   365  						}
   366  
   367  						for i := range batch.Requests {
   368  							req := &batch.Requests[i]
   369  							atomic.AddInt64(&stats.serverRecv, 1)
   370  							if pErr := t.handleRaftRequest(ctx, req, stream); pErr != nil {
   371  								atomic.AddInt64(&stats.serverSent, 1)
   372  								if err := stream.Send(newRaftMessageResponse(req, pErr)); err != nil {
   373  									return err
   374  								}
   375  							}
   376  						}
   377  					}
   378  				}()
   379  			})
   380  		}); err != nil {
   381  		return err
   382  	}
   383  
   384  	select {
   385  	case err := <-errCh:
   386  		return err
   387  	case <-t.stopper.ShouldQuiesce():
   388  		return nil
   389  	}
   390  }
   391  
   392  // RaftSnapshot handles incoming streaming snapshot requests.
   393  func (t *RaftTransport) RaftSnapshot(stream MultiRaft_RaftSnapshotServer) error {
   394  	errCh := make(chan error, 1)
   395  	if err := t.stopper.RunAsyncTask(
   396  		stream.Context(), "storage.RaftTransport: processing snapshot",
   397  		func(ctx context.Context) {
   398  			errCh <- func() error {
   399  				req, err := stream.Recv()
   400  				if err != nil {
   401  					return err
   402  				}
   403  				if req.Header == nil {
   404  					return stream.Send(&SnapshotResponse{
   405  						Status:  SnapshotResponse_ERROR,
   406  						Message: "client error: no header in first snapshot request message"})
   407  				}
   408  				rmr := req.Header.RaftMessageRequest
   409  				handler, ok := t.getHandler(rmr.ToReplica.StoreID)
   410  				if !ok {
   411  					log.Warningf(ctx, "unable to accept Raft message from %+v: no handler registered for %+v",
   412  						rmr.FromReplica, rmr.ToReplica)
   413  					return roachpb.NewStoreNotFoundError(rmr.ToReplica.StoreID)
   414  				}
   415  				return handler.HandleSnapshot(req.Header, stream)
   416  			}()
   417  		}); err != nil {
   418  		return err
   419  	}
   420  	select {
   421  	case <-t.stopper.ShouldStop():
   422  		return nil
   423  	case err := <-errCh:
   424  		return err
   425  	}
   426  }
   427  
   428  // Listen registers a raftMessageHandler to receive proxied messages.
   429  func (t *RaftTransport) Listen(storeID roachpb.StoreID, handler RaftMessageHandler) {
   430  	t.handlers.Store(int64(storeID), unsafe.Pointer(&handler))
   431  }
   432  
   433  // Stop unregisters a raftMessageHandler.
   434  func (t *RaftTransport) Stop(storeID roachpb.StoreID) {
   435  	t.handlers.Delete(int64(storeID))
   436  }
   437  
   438  // processQueue opens a Raft client stream and sends messages from the
   439  // designated queue (ch) via that stream, exiting when an error is received or
   440  // when it idles out. All messages remaining in the queue at that point are
   441  // lost and a new instance of processQueue will be started by the next message
   442  // to be sent.
   443  func (t *RaftTransport) processQueue(
   444  	nodeID roachpb.NodeID,
   445  	ch chan *RaftMessageRequest,
   446  	stats *raftTransportStats,
   447  	stream MultiRaft_RaftMessageBatchClient,
   448  	class rpc.ConnectionClass,
   449  ) error {
   450  	errCh := make(chan error, 1)
   451  
   452  	// Starting workers in a task prevents data races during shutdown.
   453  	if err := t.stopper.RunTask(
   454  		stream.Context(), "storage.RaftTransport: processing queue",
   455  		func(ctx context.Context) {
   456  			t.stopper.RunWorker(ctx, func(ctx context.Context) {
   457  				errCh <- func() error {
   458  					for {
   459  						resp, err := stream.Recv()
   460  						if err != nil {
   461  							return err
   462  						}
   463  						atomic.AddInt64(&stats.clientRecv, 1)
   464  						handler, ok := t.getHandler(resp.ToReplica.StoreID)
   465  						if !ok {
   466  							log.Warningf(ctx, "no handler found for store %s in response %s",
   467  								resp.ToReplica.StoreID, resp)
   468  							continue
   469  						}
   470  						if err := handler.HandleRaftResponse(ctx, resp); err != nil {
   471  							return err
   472  						}
   473  					}
   474  				}()
   475  			})
   476  		}); err != nil {
   477  		return err
   478  	}
   479  
   480  	var raftIdleTimer timeutil.Timer
   481  	defer raftIdleTimer.Stop()
   482  	batch := &RaftMessageRequestBatch{}
   483  	for {
   484  		raftIdleTimer.Reset(raftIdleTimeout)
   485  		select {
   486  		case <-t.stopper.ShouldStop():
   487  			return nil
   488  		case <-raftIdleTimer.C:
   489  			raftIdleTimer.Read = true
   490  			return nil
   491  		case err := <-errCh:
   492  			return err
   493  		case req := <-ch:
   494  			batch.Requests = append(batch.Requests, *req)
   495  			req.release()
   496  			// Pull off as many queued requests as possible.
   497  			//
   498  			// TODO(peter): Think about limiting the size of the batch we send.
   499  			for done := false; !done; {
   500  				select {
   501  				case req = <-ch:
   502  					batch.Requests = append(batch.Requests, *req)
   503  					req.release()
   504  				default:
   505  					done = true
   506  				}
   507  			}
   508  
   509  			err := stream.Send(batch)
   510  			batch.Requests = batch.Requests[:0]
   511  
   512  			atomic.AddInt64(&stats.clientSent, 1)
   513  			if err != nil {
   514  				return err
   515  			}
   516  		}
   517  	}
   518  }
   519  
   520  // getQueue returns the queue for the specified node ID and a boolean
   521  // indicating whether the queue already exists (true) or was created (false).
   522  func (t *RaftTransport) getQueue(
   523  	nodeID roachpb.NodeID, class rpc.ConnectionClass,
   524  ) (chan *RaftMessageRequest, bool) {
   525  	queuesMap := &t.queues[class]
   526  	value, ok := queuesMap.Load(int64(nodeID))
   527  	if !ok {
   528  		ch := make(chan *RaftMessageRequest, raftSendBufferSize)
   529  		value, ok = queuesMap.LoadOrStore(int64(nodeID), unsafe.Pointer(&ch))
   530  	}
   531  	return *(*chan *RaftMessageRequest)(value), ok
   532  }
   533  
   534  // SendAsync sends a message to the recipient specified in the request. It
   535  // returns false if the outgoing queue is full. The returned bool may be a false
   536  // positive but will never be a false negative; if sent is true the message may
   537  // or may not actually be sent but if it's false the message definitely was not
   538  // sent. It is not safe to continue using the reference to the provided request.
   539  func (t *RaftTransport) SendAsync(req *RaftMessageRequest, class rpc.ConnectionClass) (sent bool) {
   540  	toNodeID := req.ToReplica.NodeID
   541  	stats := t.getStats(toNodeID, class)
   542  	defer func() {
   543  		if !sent {
   544  			atomic.AddInt64(&stats.clientDropped, 1)
   545  		}
   546  	}()
   547  
   548  	if req.RangeID == 0 && len(req.Heartbeats) == 0 && len(req.HeartbeatResps) == 0 {
   549  		// Coalesced heartbeats are addressed to range 0; everything else
   550  		// needs an explicit range ID.
   551  		panic("only messages with coalesced heartbeats or heartbeat responses may be sent to range ID 0")
   552  	}
   553  	if req.Message.Type == raftpb.MsgSnap {
   554  		panic("snapshots must be sent using SendSnapshot")
   555  	}
   556  
   557  	if !t.dialer.GetCircuitBreaker(toNodeID, class).Ready() {
   558  		return false
   559  	}
   560  
   561  	ch, existingQueue := t.getQueue(toNodeID, class)
   562  	if !existingQueue {
   563  		// Note that startProcessNewQueue is in charge of deleting the queue.
   564  		ctx := t.AnnotateCtx(context.Background())
   565  		if !t.startProcessNewQueue(ctx, toNodeID, class, stats) {
   566  			return false
   567  		}
   568  	}
   569  
   570  	select {
   571  	case ch <- req:
   572  		l := int32(len(ch))
   573  		if v := atomic.LoadInt32(&stats.queueMax); v < l {
   574  			atomic.CompareAndSwapInt32(&stats.queueMax, v, l)
   575  		}
   576  		return true
   577  	default:
   578  		req.release()
   579  		return false
   580  	}
   581  }
   582  
   583  // startProcessNewQueue connects to the node and launches a worker goroutine
   584  // that processes the queue for the given nodeID (which must exist) until
   585  // the underlying connection is closed or an error occurs. This method
   586  // takes on the responsibility of deleting the queue when the worker shuts down.
   587  // The class parameter dictates the ConnectionClass which should be used to dial
   588  // the remote node. Traffic for system ranges and heartbeats will receive a
   589  // different class than that of user data ranges.
   590  //
   591  // Returns whether the worker was started (the queue is deleted either way).
   592  func (t *RaftTransport) startProcessNewQueue(
   593  	ctx context.Context,
   594  	toNodeID roachpb.NodeID,
   595  	class rpc.ConnectionClass,
   596  	stats *raftTransportStats,
   597  ) (started bool) {
   598  	cleanup := func(ch chan *RaftMessageRequest) {
   599  		// Account for the remainder of `ch` which was never sent.
   600  		// NB: we deleted the queue above, so within a short amount
   601  		// of time nobody should be writing into the channel any
   602  		// more. We might miss a message or two here, but that's
   603  		// OK (there's nobody who can safely close the channel the
   604  		// way the code is written).
   605  		for {
   606  			select {
   607  			case <-ch:
   608  				atomic.AddInt64(&stats.clientDropped, 1)
   609  			default:
   610  				return
   611  			}
   612  		}
   613  	}
   614  	worker := func(ctx context.Context) {
   615  		ch, existingQueue := t.getQueue(toNodeID, class)
   616  		if !existingQueue {
   617  			log.Fatalf(ctx, "queue for n%d does not exist", toNodeID)
   618  		}
   619  		defer cleanup(ch)
   620  		defer t.queues[class].Delete(int64(toNodeID))
   621  		conn, err := t.dialer.Dial(ctx, toNodeID, class)
   622  		if err != nil {
   623  			// DialNode already logs sufficiently, so just return.
   624  			return
   625  		}
   626  		client := NewMultiRaftClient(conn)
   627  		batchCtx, cancel := context.WithCancel(ctx)
   628  		defer cancel()
   629  
   630  		stream, err := client.RaftMessageBatch(batchCtx) // closed via cancellation
   631  		if err != nil {
   632  			log.Warningf(ctx, "creating batch client for node %d failed: %+v", toNodeID, err)
   633  			return
   634  		}
   635  
   636  		if err := t.processQueue(toNodeID, ch, stats, stream, class); err != nil {
   637  			log.Warningf(ctx, "while processing outgoing Raft queue to node %d: %s:", toNodeID, err)
   638  		}
   639  	}
   640  	// Starting workers in a task prevents data races during shutdown.
   641  	workerTask := func(ctx context.Context) {
   642  		t.stopper.RunWorker(ctx, worker)
   643  	}
   644  	err := t.stopper.RunTask(ctx, "storage.RaftTransport: sending messages", workerTask)
   645  	if err != nil {
   646  		t.queues[class].Delete(int64(toNodeID))
   647  		return false
   648  	}
   649  	return true
   650  }
   651  
   652  // SendSnapshot streams the given outgoing snapshot. The caller is responsible
   653  // for closing the OutgoingSnapshot.
   654  func (t *RaftTransport) SendSnapshot(
   655  	ctx context.Context,
   656  	raftCfg *base.RaftConfig,
   657  	storePool *StorePool,
   658  	header SnapshotRequest_Header,
   659  	snap *OutgoingSnapshot,
   660  	newBatch func() storage.Batch,
   661  	sent func(),
   662  ) error {
   663  	var stream MultiRaft_RaftSnapshotClient
   664  	nodeID := header.RaftMessageRequest.ToReplica.NodeID
   665  
   666  	conn, err := t.dialer.Dial(ctx, nodeID, rpc.DefaultClass)
   667  	if err != nil {
   668  		return err
   669  	}
   670  
   671  	client := NewMultiRaftClient(conn)
   672  	stream, err = client.RaftSnapshot(ctx)
   673  	if err != nil {
   674  		return err
   675  	}
   676  
   677  	defer func() {
   678  		if err := stream.CloseSend(); err != nil {
   679  			log.Warningf(ctx, "failed to close snapshot stream: %+v", err)
   680  		}
   681  	}()
   682  	return sendSnapshot(ctx, raftCfg, t.st, stream, storePool, header, snap, newBatch, sent)
   683  }