github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/raft_transport.go (about) 1 // Copyright 2015 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "bytes" 15 "context" 16 "fmt" 17 "net" 18 "sort" 19 "sync/atomic" 20 "time" 21 "unsafe" 22 23 "github.com/cockroachdb/cockroach/pkg/base" 24 "github.com/cockroachdb/cockroach/pkg/roachpb" 25 "github.com/cockroachdb/cockroach/pkg/rpc" 26 "github.com/cockroachdb/cockroach/pkg/rpc/nodedialer" 27 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 28 "github.com/cockroachdb/cockroach/pkg/storage" 29 "github.com/cockroachdb/cockroach/pkg/util/log" 30 "github.com/cockroachdb/cockroach/pkg/util/stop" 31 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 32 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 33 "github.com/cockroachdb/errors" 34 "go.etcd.io/etcd/raft/raftpb" 35 "google.golang.org/grpc" 36 ) 37 38 const ( 39 // Outgoing messages are queued per-node on a channel of this size. 40 // 41 // TODO(peter): The normal send buffer size is larger than we would like. It 42 // is a temporary patch for the issue discussed in #8630 where 43 // Store.HandleRaftRequest can block applying a preemptive snapshot for a 44 // long enough period of time that grpc flow control kicks in and messages 45 // are dropped on the sending side. 46 raftSendBufferSize = 10000 47 48 // When no message has been queued for this duration, the corresponding 49 // instance of processQueue will shut down. 50 // 51 // TODO(tamird): make culling of outbound streams more evented, so that we 52 // need not rely on this timeout to shut things down. 53 raftIdleTimeout = time.Minute 54 ) 55 56 // RaftMessageResponseStream is the subset of the 57 // MultiRaft_RaftMessageServer interface that is needed for sending responses. 58 type RaftMessageResponseStream interface { 59 Context() context.Context 60 Send(*RaftMessageResponse) error 61 } 62 63 // lockedRaftMessageResponseStream is an implementation of 64 // RaftMessageResponseStream which provides support for concurrent calls to 65 // Send. Note that the default implementation of grpc.Stream for server 66 // responses (grpc.serverStream) is not safe for concurrent calls to Send. 67 type lockedRaftMessageResponseStream struct { 68 wrapped MultiRaft_RaftMessageBatchServer 69 sendMu syncutil.Mutex 70 } 71 72 func (s *lockedRaftMessageResponseStream) Context() context.Context { 73 return s.wrapped.Context() 74 } 75 76 func (s *lockedRaftMessageResponseStream) Send(resp *RaftMessageResponse) error { 77 s.sendMu.Lock() 78 defer s.sendMu.Unlock() 79 return s.wrapped.Send(resp) 80 } 81 82 func (s *lockedRaftMessageResponseStream) Recv() (*RaftMessageRequestBatch, error) { 83 // No need for lock. gRPC.Stream.RecvMsg is safe for concurrent use. 84 return s.wrapped.Recv() 85 } 86 87 // SnapshotResponseStream is the subset of the 88 // MultiRaft_RaftSnapshotServer interface that is needed for sending responses. 89 type SnapshotResponseStream interface { 90 Context() context.Context 91 Send(*SnapshotResponse) error 92 Recv() (*SnapshotRequest, error) 93 } 94 95 // RaftMessageHandler is the interface that must be implemented by 96 // arguments to RaftTransport.Listen. 97 type RaftMessageHandler interface { 98 // HandleRaftRequest is called for each incoming Raft message. The request is 99 // always processed asynchronously and the response is sent over respStream. 100 // If an error is encountered during asynchronous processing, it will be 101 // streamed back to the sender of the message as a RaftMessageResponse. 102 HandleRaftRequest(ctx context.Context, req *RaftMessageRequest, 103 respStream RaftMessageResponseStream) *roachpb.Error 104 105 // HandleRaftResponse is called for each raft response. Note that 106 // not all messages receive a response. An error is returned if and only if 107 // the underlying Raft connection should be closed. 108 HandleRaftResponse(context.Context, *RaftMessageResponse) error 109 110 // HandleSnapshot is called for each new incoming snapshot stream, after 111 // parsing the initial SnapshotRequest_Header on the stream. 112 HandleSnapshot(header *SnapshotRequest_Header, respStream SnapshotResponseStream) error 113 } 114 115 type raftTransportStats struct { 116 nodeID roachpb.NodeID 117 queue int 118 queueMax int32 119 clientSent int64 120 clientRecv int64 121 clientDropped int64 122 serverSent int64 123 serverRecv int64 124 } 125 126 type raftTransportStatsSlice []*raftTransportStats 127 128 func (s raftTransportStatsSlice) Len() int { return len(s) } 129 func (s raftTransportStatsSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 130 func (s raftTransportStatsSlice) Less(i, j int) bool { return s[i].nodeID < s[j].nodeID } 131 132 // RaftTransport handles the rpc messages for raft. 133 // 134 // The raft transport is asynchronous with respect to the caller, and 135 // internally multiplexes outbound messages. Internally, each message is 136 // queued on a per-destination queue before being asynchronously delivered. 137 // 138 // Callers are required to construct a RaftSender before being able to 139 // dispatch messages, and must provide an error handler which will be invoked 140 // asynchronously in the event that the recipient of any message closes its 141 // inbound RPC stream. This callback is asynchronous with respect to the 142 // outbound message which caused the remote to hang up; all that is known is 143 // which remote hung up. 144 type RaftTransport struct { 145 log.AmbientContext 146 st *cluster.Settings 147 148 stopper *stop.Stopper 149 150 queues [rpc.NumConnectionClasses]syncutil.IntMap // map[roachpb.NodeID]*chan *RaftMessageRequest 151 stats [rpc.NumConnectionClasses]syncutil.IntMap // map[roachpb.NodeID]*chan *RaftMessageRequest 152 dialer *nodedialer.Dialer 153 handlers syncutil.IntMap // map[roachpb.StoreID]*RaftMessageHandler 154 } 155 156 // NewDummyRaftTransport returns a dummy raft transport for use in tests which 157 // need a non-nil raft transport that need not function. 158 func NewDummyRaftTransport(st *cluster.Settings) *RaftTransport { 159 resolver := func(roachpb.NodeID) (net.Addr, error) { 160 return nil, errors.New("dummy resolver") 161 } 162 return NewRaftTransport(log.AmbientContext{Tracer: st.Tracer}, st, 163 nodedialer.New(nil, resolver), nil, nil) 164 } 165 166 // NewRaftTransport creates a new RaftTransport. 167 func NewRaftTransport( 168 ambient log.AmbientContext, 169 st *cluster.Settings, 170 dialer *nodedialer.Dialer, 171 grpcServer *grpc.Server, 172 stopper *stop.Stopper, 173 ) *RaftTransport { 174 t := &RaftTransport{ 175 AmbientContext: ambient, 176 st: st, 177 178 stopper: stopper, 179 dialer: dialer, 180 } 181 182 if grpcServer != nil { 183 RegisterMultiRaftServer(grpcServer, t) 184 } 185 // statsMap is used to associate a queue with its raftTransportStats. 186 statsMap := make(map[roachpb.NodeID]*raftTransportStats) 187 clearStatsMap := func() { 188 for k := range statsMap { 189 delete(statsMap, k) 190 } 191 } 192 if t.stopper != nil && log.V(1) { 193 ctx := t.AnnotateCtx(context.Background()) 194 t.stopper.RunWorker(ctx, func(ctx context.Context) { 195 ticker := time.NewTicker(10 * time.Second) 196 defer ticker.Stop() 197 lastStats := make(map[roachpb.NodeID]raftTransportStats) 198 lastTime := timeutil.Now() 199 var stats raftTransportStatsSlice 200 for { 201 select { 202 case <-ticker.C: 203 stats = stats[:0] 204 getStats := func(k int64, v unsafe.Pointer) bool { 205 s := (*raftTransportStats)(v) 206 // Clear the queue length stat. Note that this field is only 207 // mutated by this goroutine. 208 s.queue = 0 209 stats = append(stats, s) 210 statsMap[roachpb.NodeID(k)] = s 211 return true 212 } 213 setQueueLength := func(k int64, v unsafe.Pointer) bool { 214 ch := *(*chan *RaftMessageRequest)(v) 215 if s, ok := statsMap[roachpb.NodeID(k)]; ok { 216 s.queue += len(ch) 217 } 218 return true 219 } 220 for c := range t.stats { 221 clearStatsMap() 222 t.stats[c].Range(getStats) 223 t.queues[c].Range(setQueueLength) 224 } 225 clearStatsMap() // no need to hold on to references to stats 226 227 now := timeutil.Now() 228 elapsed := now.Sub(lastTime).Seconds() 229 sort.Sort(stats) 230 231 var buf bytes.Buffer 232 // NB: The header is 80 characters which should display in a single 233 // line on most terminals. 234 fmt.Fprintf(&buf, 235 " qlen qmax qdropped client-sent client-recv server-sent server-recv\n") 236 for _, s := range stats { 237 last := lastStats[s.nodeID] 238 cur := raftTransportStats{ 239 nodeID: s.nodeID, 240 queue: s.queue, 241 queueMax: atomic.LoadInt32(&s.queueMax), 242 clientDropped: atomic.LoadInt64(&s.clientDropped), 243 clientSent: atomic.LoadInt64(&s.clientSent), 244 clientRecv: atomic.LoadInt64(&s.clientRecv), 245 serverSent: atomic.LoadInt64(&s.serverSent), 246 serverRecv: atomic.LoadInt64(&s.serverRecv), 247 } 248 fmt.Fprintf(&buf, " %3d: %6d %6d %10d %11.1f %11.1f %11.1f %11.1f\n", 249 cur.nodeID, cur.queue, cur.queueMax, cur.clientDropped, 250 float64(cur.clientSent-last.clientSent)/elapsed, 251 float64(cur.clientRecv-last.clientRecv)/elapsed, 252 float64(cur.serverSent-last.serverSent)/elapsed, 253 float64(cur.serverRecv-last.serverRecv)/elapsed) 254 lastStats[s.nodeID] = cur 255 } 256 lastTime = now 257 log.Infof(ctx, "stats:\n%s", buf.String()) 258 case <-t.stopper.ShouldStop(): 259 return 260 } 261 } 262 }) 263 } 264 265 return t 266 } 267 268 func (t *RaftTransport) queuedMessageCount() int64 { 269 var n int64 270 addLength := func(k int64, v unsafe.Pointer) bool { 271 ch := *(*chan *RaftMessageRequest)(v) 272 n += int64(len(ch)) 273 return true 274 } 275 for class := range t.queues { 276 t.queues[class].Range(addLength) 277 } 278 return n 279 } 280 281 func (t *RaftTransport) getHandler(storeID roachpb.StoreID) (RaftMessageHandler, bool) { 282 if value, ok := t.handlers.Load(int64(storeID)); ok { 283 return *(*RaftMessageHandler)(value), true 284 } 285 return nil, false 286 } 287 288 // handleRaftRequest proxies a request to the listening server interface. 289 func (t *RaftTransport) handleRaftRequest( 290 ctx context.Context, req *RaftMessageRequest, respStream RaftMessageResponseStream, 291 ) *roachpb.Error { 292 handler, ok := t.getHandler(req.ToReplica.StoreID) 293 if !ok { 294 log.Warningf(ctx, "unable to accept Raft message from %+v: no handler registered for %+v", 295 req.FromReplica, req.ToReplica) 296 return roachpb.NewError(roachpb.NewStoreNotFoundError(req.ToReplica.StoreID)) 297 } 298 299 return handler.HandleRaftRequest(ctx, req, respStream) 300 } 301 302 // newRaftMessageResponse constructs a RaftMessageResponse from the 303 // given request and error. 304 func newRaftMessageResponse(req *RaftMessageRequest, pErr *roachpb.Error) *RaftMessageResponse { 305 resp := &RaftMessageResponse{ 306 RangeID: req.RangeID, 307 // From and To are reversed in the response. 308 ToReplica: req.FromReplica, 309 FromReplica: req.ToReplica, 310 } 311 if pErr != nil { 312 resp.Union.SetValue(pErr) 313 } 314 return resp 315 } 316 317 func (t *RaftTransport) getStats( 318 nodeID roachpb.NodeID, class rpc.ConnectionClass, 319 ) *raftTransportStats { 320 statsMap := &t.stats[class] 321 value, ok := statsMap.Load(int64(nodeID)) 322 if !ok { 323 stats := &raftTransportStats{nodeID: nodeID} 324 value, _ = statsMap.LoadOrStore(int64(nodeID), unsafe.Pointer(stats)) 325 } 326 return (*raftTransportStats)(value) 327 } 328 329 // RaftMessageBatch proxies the incoming requests to the listening server interface. 330 func (t *RaftTransport) RaftMessageBatch(stream MultiRaft_RaftMessageBatchServer) error { 331 errCh := make(chan error, 1) 332 333 // Node stopping error is caught below in the select. 334 if err := t.stopper.RunTask( 335 stream.Context(), "storage.RaftTransport: processing batch", 336 func(ctx context.Context) { 337 t.stopper.RunWorker(ctx, func(ctx context.Context) { 338 errCh <- func() error { 339 var stats *raftTransportStats 340 stream := &lockedRaftMessageResponseStream{wrapped: stream} 341 for { 342 batch, err := stream.Recv() 343 if err != nil { 344 return err 345 } 346 if len(batch.Requests) == 0 { 347 continue 348 } 349 350 // This code always uses the DefaultClass. Class is primarily a 351 // client construct and the server has no way to determine which 352 // class an inbound connection holds on the client side. Because of 353 // this we associate all server receives and sends with the 354 // DefaultClass. This data is exclusively used to print a debug 355 // log message periodically. Using this policy may lead to a 356 // DefaultClass log line showing a high rate of server recv but 357 // a low rate of client sends if most of the traffic is due to 358 // system ranges. 359 // 360 // TODO(ajwerner): consider providing transport metadata to inform 361 // the server of the connection class or keep shared stats for all 362 // connection with a host. 363 if stats == nil { 364 stats = t.getStats(batch.Requests[0].FromReplica.NodeID, rpc.DefaultClass) 365 } 366 367 for i := range batch.Requests { 368 req := &batch.Requests[i] 369 atomic.AddInt64(&stats.serverRecv, 1) 370 if pErr := t.handleRaftRequest(ctx, req, stream); pErr != nil { 371 atomic.AddInt64(&stats.serverSent, 1) 372 if err := stream.Send(newRaftMessageResponse(req, pErr)); err != nil { 373 return err 374 } 375 } 376 } 377 } 378 }() 379 }) 380 }); err != nil { 381 return err 382 } 383 384 select { 385 case err := <-errCh: 386 return err 387 case <-t.stopper.ShouldQuiesce(): 388 return nil 389 } 390 } 391 392 // RaftSnapshot handles incoming streaming snapshot requests. 393 func (t *RaftTransport) RaftSnapshot(stream MultiRaft_RaftSnapshotServer) error { 394 errCh := make(chan error, 1) 395 if err := t.stopper.RunAsyncTask( 396 stream.Context(), "storage.RaftTransport: processing snapshot", 397 func(ctx context.Context) { 398 errCh <- func() error { 399 req, err := stream.Recv() 400 if err != nil { 401 return err 402 } 403 if req.Header == nil { 404 return stream.Send(&SnapshotResponse{ 405 Status: SnapshotResponse_ERROR, 406 Message: "client error: no header in first snapshot request message"}) 407 } 408 rmr := req.Header.RaftMessageRequest 409 handler, ok := t.getHandler(rmr.ToReplica.StoreID) 410 if !ok { 411 log.Warningf(ctx, "unable to accept Raft message from %+v: no handler registered for %+v", 412 rmr.FromReplica, rmr.ToReplica) 413 return roachpb.NewStoreNotFoundError(rmr.ToReplica.StoreID) 414 } 415 return handler.HandleSnapshot(req.Header, stream) 416 }() 417 }); err != nil { 418 return err 419 } 420 select { 421 case <-t.stopper.ShouldStop(): 422 return nil 423 case err := <-errCh: 424 return err 425 } 426 } 427 428 // Listen registers a raftMessageHandler to receive proxied messages. 429 func (t *RaftTransport) Listen(storeID roachpb.StoreID, handler RaftMessageHandler) { 430 t.handlers.Store(int64(storeID), unsafe.Pointer(&handler)) 431 } 432 433 // Stop unregisters a raftMessageHandler. 434 func (t *RaftTransport) Stop(storeID roachpb.StoreID) { 435 t.handlers.Delete(int64(storeID)) 436 } 437 438 // processQueue opens a Raft client stream and sends messages from the 439 // designated queue (ch) via that stream, exiting when an error is received or 440 // when it idles out. All messages remaining in the queue at that point are 441 // lost and a new instance of processQueue will be started by the next message 442 // to be sent. 443 func (t *RaftTransport) processQueue( 444 nodeID roachpb.NodeID, 445 ch chan *RaftMessageRequest, 446 stats *raftTransportStats, 447 stream MultiRaft_RaftMessageBatchClient, 448 class rpc.ConnectionClass, 449 ) error { 450 errCh := make(chan error, 1) 451 452 // Starting workers in a task prevents data races during shutdown. 453 if err := t.stopper.RunTask( 454 stream.Context(), "storage.RaftTransport: processing queue", 455 func(ctx context.Context) { 456 t.stopper.RunWorker(ctx, func(ctx context.Context) { 457 errCh <- func() error { 458 for { 459 resp, err := stream.Recv() 460 if err != nil { 461 return err 462 } 463 atomic.AddInt64(&stats.clientRecv, 1) 464 handler, ok := t.getHandler(resp.ToReplica.StoreID) 465 if !ok { 466 log.Warningf(ctx, "no handler found for store %s in response %s", 467 resp.ToReplica.StoreID, resp) 468 continue 469 } 470 if err := handler.HandleRaftResponse(ctx, resp); err != nil { 471 return err 472 } 473 } 474 }() 475 }) 476 }); err != nil { 477 return err 478 } 479 480 var raftIdleTimer timeutil.Timer 481 defer raftIdleTimer.Stop() 482 batch := &RaftMessageRequestBatch{} 483 for { 484 raftIdleTimer.Reset(raftIdleTimeout) 485 select { 486 case <-t.stopper.ShouldStop(): 487 return nil 488 case <-raftIdleTimer.C: 489 raftIdleTimer.Read = true 490 return nil 491 case err := <-errCh: 492 return err 493 case req := <-ch: 494 batch.Requests = append(batch.Requests, *req) 495 req.release() 496 // Pull off as many queued requests as possible. 497 // 498 // TODO(peter): Think about limiting the size of the batch we send. 499 for done := false; !done; { 500 select { 501 case req = <-ch: 502 batch.Requests = append(batch.Requests, *req) 503 req.release() 504 default: 505 done = true 506 } 507 } 508 509 err := stream.Send(batch) 510 batch.Requests = batch.Requests[:0] 511 512 atomic.AddInt64(&stats.clientSent, 1) 513 if err != nil { 514 return err 515 } 516 } 517 } 518 } 519 520 // getQueue returns the queue for the specified node ID and a boolean 521 // indicating whether the queue already exists (true) or was created (false). 522 func (t *RaftTransport) getQueue( 523 nodeID roachpb.NodeID, class rpc.ConnectionClass, 524 ) (chan *RaftMessageRequest, bool) { 525 queuesMap := &t.queues[class] 526 value, ok := queuesMap.Load(int64(nodeID)) 527 if !ok { 528 ch := make(chan *RaftMessageRequest, raftSendBufferSize) 529 value, ok = queuesMap.LoadOrStore(int64(nodeID), unsafe.Pointer(&ch)) 530 } 531 return *(*chan *RaftMessageRequest)(value), ok 532 } 533 534 // SendAsync sends a message to the recipient specified in the request. It 535 // returns false if the outgoing queue is full. The returned bool may be a false 536 // positive but will never be a false negative; if sent is true the message may 537 // or may not actually be sent but if it's false the message definitely was not 538 // sent. It is not safe to continue using the reference to the provided request. 539 func (t *RaftTransport) SendAsync(req *RaftMessageRequest, class rpc.ConnectionClass) (sent bool) { 540 toNodeID := req.ToReplica.NodeID 541 stats := t.getStats(toNodeID, class) 542 defer func() { 543 if !sent { 544 atomic.AddInt64(&stats.clientDropped, 1) 545 } 546 }() 547 548 if req.RangeID == 0 && len(req.Heartbeats) == 0 && len(req.HeartbeatResps) == 0 { 549 // Coalesced heartbeats are addressed to range 0; everything else 550 // needs an explicit range ID. 551 panic("only messages with coalesced heartbeats or heartbeat responses may be sent to range ID 0") 552 } 553 if req.Message.Type == raftpb.MsgSnap { 554 panic("snapshots must be sent using SendSnapshot") 555 } 556 557 if !t.dialer.GetCircuitBreaker(toNodeID, class).Ready() { 558 return false 559 } 560 561 ch, existingQueue := t.getQueue(toNodeID, class) 562 if !existingQueue { 563 // Note that startProcessNewQueue is in charge of deleting the queue. 564 ctx := t.AnnotateCtx(context.Background()) 565 if !t.startProcessNewQueue(ctx, toNodeID, class, stats) { 566 return false 567 } 568 } 569 570 select { 571 case ch <- req: 572 l := int32(len(ch)) 573 if v := atomic.LoadInt32(&stats.queueMax); v < l { 574 atomic.CompareAndSwapInt32(&stats.queueMax, v, l) 575 } 576 return true 577 default: 578 req.release() 579 return false 580 } 581 } 582 583 // startProcessNewQueue connects to the node and launches a worker goroutine 584 // that processes the queue for the given nodeID (which must exist) until 585 // the underlying connection is closed or an error occurs. This method 586 // takes on the responsibility of deleting the queue when the worker shuts down. 587 // The class parameter dictates the ConnectionClass which should be used to dial 588 // the remote node. Traffic for system ranges and heartbeats will receive a 589 // different class than that of user data ranges. 590 // 591 // Returns whether the worker was started (the queue is deleted either way). 592 func (t *RaftTransport) startProcessNewQueue( 593 ctx context.Context, 594 toNodeID roachpb.NodeID, 595 class rpc.ConnectionClass, 596 stats *raftTransportStats, 597 ) (started bool) { 598 cleanup := func(ch chan *RaftMessageRequest) { 599 // Account for the remainder of `ch` which was never sent. 600 // NB: we deleted the queue above, so within a short amount 601 // of time nobody should be writing into the channel any 602 // more. We might miss a message or two here, but that's 603 // OK (there's nobody who can safely close the channel the 604 // way the code is written). 605 for { 606 select { 607 case <-ch: 608 atomic.AddInt64(&stats.clientDropped, 1) 609 default: 610 return 611 } 612 } 613 } 614 worker := func(ctx context.Context) { 615 ch, existingQueue := t.getQueue(toNodeID, class) 616 if !existingQueue { 617 log.Fatalf(ctx, "queue for n%d does not exist", toNodeID) 618 } 619 defer cleanup(ch) 620 defer t.queues[class].Delete(int64(toNodeID)) 621 conn, err := t.dialer.Dial(ctx, toNodeID, class) 622 if err != nil { 623 // DialNode already logs sufficiently, so just return. 624 return 625 } 626 client := NewMultiRaftClient(conn) 627 batchCtx, cancel := context.WithCancel(ctx) 628 defer cancel() 629 630 stream, err := client.RaftMessageBatch(batchCtx) // closed via cancellation 631 if err != nil { 632 log.Warningf(ctx, "creating batch client for node %d failed: %+v", toNodeID, err) 633 return 634 } 635 636 if err := t.processQueue(toNodeID, ch, stats, stream, class); err != nil { 637 log.Warningf(ctx, "while processing outgoing Raft queue to node %d: %s:", toNodeID, err) 638 } 639 } 640 // Starting workers in a task prevents data races during shutdown. 641 workerTask := func(ctx context.Context) { 642 t.stopper.RunWorker(ctx, worker) 643 } 644 err := t.stopper.RunTask(ctx, "storage.RaftTransport: sending messages", workerTask) 645 if err != nil { 646 t.queues[class].Delete(int64(toNodeID)) 647 return false 648 } 649 return true 650 } 651 652 // SendSnapshot streams the given outgoing snapshot. The caller is responsible 653 // for closing the OutgoingSnapshot. 654 func (t *RaftTransport) SendSnapshot( 655 ctx context.Context, 656 raftCfg *base.RaftConfig, 657 storePool *StorePool, 658 header SnapshotRequest_Header, 659 snap *OutgoingSnapshot, 660 newBatch func() storage.Batch, 661 sent func(), 662 ) error { 663 var stream MultiRaft_RaftSnapshotClient 664 nodeID := header.RaftMessageRequest.ToReplica.NodeID 665 666 conn, err := t.dialer.Dial(ctx, nodeID, rpc.DefaultClass) 667 if err != nil { 668 return err 669 } 670 671 client := NewMultiRaftClient(conn) 672 stream, err = client.RaftSnapshot(ctx) 673 if err != nil { 674 return err 675 } 676 677 defer func() { 678 if err := stream.CloseSend(); err != nil { 679 log.Warningf(ctx, "failed to close snapshot stream: %+v", err) 680 } 681 }() 682 return sendSnapshot(ctx, raftCfg, t.st, stream, storePool, header, snap, newBatch, sent) 683 }