github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/conn/node.go (about) 1 /* 2 * Copyright 2017-2018 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package conn 18 19 import ( 20 "bytes" 21 "encoding/binary" 22 "fmt" 23 "math/rand" 24 "strings" 25 "sync" 26 "sync/atomic" 27 "time" 28 29 "github.com/dgraph-io/badger/y" 30 "github.com/dgraph-io/dgo/protos/api" 31 "github.com/dgraph-io/dgraph/protos/pb" 32 "github.com/dgraph-io/dgraph/raftwal" 33 "github.com/dgraph-io/dgraph/x" 34 "github.com/golang/glog" 35 "github.com/pkg/errors" 36 "go.etcd.io/etcd/raft" 37 "go.etcd.io/etcd/raft/raftpb" 38 otrace "go.opencensus.io/trace" 39 "golang.org/x/net/context" 40 ) 41 42 var ( 43 // ErrNoNode is returned when no node has been set up. 44 ErrNoNode = errors.Errorf("No node has been set up yet") 45 ) 46 47 // Node represents a node participating in the RAFT protocol. 48 type Node struct { 49 x.SafeMutex 50 51 joinLock sync.Mutex 52 53 // Used to keep track of lin read requests. 54 requestCh chan linReadReq 55 56 // SafeMutex is for fields which can be changed after init. 57 _confState *raftpb.ConfState 58 _raft raft.Node 59 60 // Fields which are never changed after init. 61 Cfg *raft.Config 62 MyAddr string 63 Id uint64 64 peers map[uint64]string 65 confChanges map[uint64]chan error 66 messages chan sendmsg 67 RaftContext *pb.RaftContext 68 Store *raftwal.DiskStorage 69 Rand *rand.Rand 70 71 Proposals proposals 72 // applied is used to keep track of the applied RAFT proposals. 73 // The stages are proposed -> committed (accepted by cluster) -> 74 // applied (to PL) -> synced (to BadgerDB). 75 Applied y.WaterMark 76 77 heartbeatsOut int64 78 heartbeatsIn int64 79 } 80 81 // NewNode returns a new Node instance. 82 func NewNode(rc *pb.RaftContext, store *raftwal.DiskStorage) *Node { 83 snap, err := store.Snapshot() 84 x.Check(err) 85 86 n := &Node{ 87 Id: rc.Id, 88 MyAddr: rc.Addr, 89 Store: store, 90 Cfg: &raft.Config{ 91 ID: rc.Id, 92 ElectionTick: 20, // 2s if we call Tick() every 100 ms. 93 HeartbeatTick: 1, // 100ms if we call Tick() every 100 ms. 94 Storage: store, 95 MaxInflightMsgs: 256, 96 MaxSizePerMsg: 256 << 10, // 256 KB should allow more batching. 97 MaxCommittedSizePerReady: 64 << 20, // Avoid loading entire Raft log into memory. 98 // We don't need lease based reads. They cause issues because they 99 // require CheckQuorum to be true, and that causes a lot of issues 100 // for us during cluster bootstrapping and later. A seemingly 101 // healthy cluster would just cause leader to step down due to 102 // "inactive" quorum, and then disallow anyone from becoming leader. 103 // So, let's stick to default options. Let's achieve correctness, 104 // then we achieve performance. Plus, for the Dgraph alphas, we'll 105 // be soon relying only on Timestamps for blocking reads and 106 // achieving linearizability, than checking quorums (Zero would 107 // still check quorums). 108 ReadOnlyOption: raft.ReadOnlySafe, 109 // When a disconnected node joins back, it forces a leader change, 110 // as it starts with a higher term, as described in Raft thesis (not 111 // the paper) in section 9.6. This setting can avoid that by only 112 // increasing the term, if the node has a good chance of becoming 113 // the leader. 114 PreVote: true, 115 116 // We can explicitly set Applied to the first index in the Raft log, 117 // so it does not derive it separately, thus avoiding a crash when 118 // the Applied is set to below snapshot index by Raft. 119 // In case this is a new Raft log, first would be 1, and therefore 120 // Applied would be zero, hence meeting the condition by the library 121 // that Applied should only be set during a restart. 122 // 123 // Update: Set the Applied to the latest snapshot, because it seems 124 // like somehow the first index can be out of sync with the latest 125 // snapshot. 126 Applied: snap.Metadata.Index, 127 128 Logger: &x.ToGlog{}, 129 }, 130 // processConfChange etc are not throttled so some extra delta, so that we don't 131 // block tick when applyCh is full 132 Applied: y.WaterMark{Name: fmt.Sprintf("Applied watermark")}, 133 RaftContext: rc, 134 Rand: rand.New(&lockedSource{src: rand.NewSource(time.Now().UnixNano())}), 135 confChanges: make(map[uint64]chan error), 136 messages: make(chan sendmsg, 100), 137 peers: make(map[uint64]string), 138 requestCh: make(chan linReadReq, 100), 139 } 140 n.Applied.Init(nil) 141 // This should match up to the Applied index set above. 142 n.Applied.SetDoneUntil(n.Cfg.Applied) 143 glog.Infof("Setting raft.Config to: %+v\n", n.Cfg) 144 return n 145 } 146 147 // ReportRaftComms periodically prints the state of the node (heartbeats in and out). 148 func (n *Node) ReportRaftComms() { 149 if !glog.V(3) { 150 return 151 } 152 ticker := time.NewTicker(time.Second) 153 defer ticker.Stop() 154 155 for range ticker.C { 156 out := atomic.SwapInt64(&n.heartbeatsOut, 0) 157 in := atomic.SwapInt64(&n.heartbeatsIn, 0) 158 glog.Infof("RaftComm: [%#x] Heartbeats out: %d, in: %d", n.Id, out, in) 159 } 160 } 161 162 // SetRaft would set the provided raft.Node to this node. 163 // It would check fail if the node is already set. 164 func (n *Node) SetRaft(r raft.Node) { 165 n.Lock() 166 defer n.Unlock() 167 x.AssertTrue(n._raft == nil) 168 n._raft = r 169 } 170 171 // Raft would return back the raft.Node stored in the node. 172 func (n *Node) Raft() raft.Node { 173 n.RLock() 174 defer n.RUnlock() 175 return n._raft 176 } 177 178 // SetConfState would store the latest ConfState generated by ApplyConfChange. 179 func (n *Node) SetConfState(cs *raftpb.ConfState) { 180 glog.Infof("Setting conf state to %+v\n", cs) 181 n.Lock() 182 defer n.Unlock() 183 n._confState = cs 184 } 185 186 // DoneConfChange marks a configuration change as done and sends the given error to the 187 // config channel. 188 func (n *Node) DoneConfChange(id uint64, err error) { 189 n.Lock() 190 defer n.Unlock() 191 ch, has := n.confChanges[id] 192 if !has { 193 return 194 } 195 delete(n.confChanges, id) 196 ch <- err 197 } 198 199 func (n *Node) storeConfChange(che chan error) uint64 { 200 n.Lock() 201 defer n.Unlock() 202 id := rand.Uint64() 203 _, has := n.confChanges[id] 204 for has { 205 id = rand.Uint64() 206 _, has = n.confChanges[id] 207 } 208 n.confChanges[id] = che 209 return id 210 } 211 212 // ConfState would return the latest ConfState stored in node. 213 func (n *Node) ConfState() *raftpb.ConfState { 214 n.RLock() 215 defer n.RUnlock() 216 return n._confState 217 } 218 219 // Peer returns the address of the peer with the given id. 220 func (n *Node) Peer(pid uint64) (string, bool) { 221 n.RLock() 222 defer n.RUnlock() 223 addr, ok := n.peers[pid] 224 return addr, ok 225 } 226 227 // SetPeer sets the address of the peer with the given id. The address must not be empty. 228 func (n *Node) SetPeer(pid uint64, addr string) { 229 x.AssertTruef(addr != "", "SetPeer for peer %d has empty addr.", pid) 230 n.Lock() 231 defer n.Unlock() 232 n.peers[pid] = addr 233 } 234 235 // Send sends the given RAFT message from this node. 236 func (n *Node) Send(msg raftpb.Message) { 237 x.AssertTruef(n.Id != msg.To, "Sending message to itself") 238 data, err := msg.Marshal() 239 x.Check(err) 240 241 if glog.V(2) { 242 switch msg.Type { 243 case raftpb.MsgHeartbeat, raftpb.MsgHeartbeatResp: 244 atomic.AddInt64(&n.heartbeatsOut, 1) 245 case raftpb.MsgReadIndex, raftpb.MsgReadIndexResp: 246 case raftpb.MsgApp, raftpb.MsgAppResp: 247 case raftpb.MsgProp: 248 default: 249 glog.Infof("RaftComm: [%#x] Sending message of type %s to %#x", msg.From, msg.Type, msg.To) 250 } 251 } 252 // As long as leadership is stable, any attempted Propose() calls should be reflected in the 253 // next raft.Ready.Messages. Leaders will send MsgApps to the followers; followers will send 254 // MsgProp to the leader. It is up to the transport layer to get those messages to their 255 // destination. If a MsgApp gets dropped by the transport layer, it will get retried by raft 256 // (i.e. it will appear in a future Ready.Messages), but MsgProp will only be sent once. During 257 // leadership transitions, proposals may get dropped even if the network is reliable. 258 // 259 // We can't do a select default here. The messages must be sent to the channel, otherwise we 260 // should block until the channel can accept these messages. BatchAndSendMessages would take 261 // care of dropping messages which can't be sent due to network issues to the corresponding 262 // node. But, we shouldn't take the liberty to do that here. It would take us more time to 263 // repropose these dropped messages anyway, than to block here a bit waiting for the messages 264 // channel to clear out. 265 n.messages <- sendmsg{to: msg.To, data: data} 266 } 267 268 // Snapshot returns the current snapshot. 269 func (n *Node) Snapshot() (raftpb.Snapshot, error) { 270 if n == nil || n.Store == nil { 271 return raftpb.Snapshot{}, errors.New("Uninitialized node or raft store") 272 } 273 return n.Store.Snapshot() 274 } 275 276 // SaveToStorage saves the hard state, entries, and snapshot to persistent storage, in that order. 277 func (n *Node) SaveToStorage(h raftpb.HardState, es []raftpb.Entry, s raftpb.Snapshot) { 278 for { 279 if err := n.Store.Save(h, es, s); err != nil { 280 glog.Errorf("While trying to save Raft update: %v. Retrying...", err) 281 } else { 282 return 283 } 284 } 285 } 286 287 // PastLife returns the index of the snapshot before the restart (if any) and whether there was 288 // a previous state that should be recovered after a restart. 289 func (n *Node) PastLife() (uint64, bool, error) { 290 var ( 291 sp raftpb.Snapshot 292 idx uint64 293 restart bool 294 rerr error 295 ) 296 sp, rerr = n.Store.Snapshot() 297 if rerr != nil { 298 return 0, false, rerr 299 } 300 if !raft.IsEmptySnap(sp) { 301 glog.Infof("Found Snapshot.Metadata: %+v\n", sp.Metadata) 302 restart = true 303 idx = sp.Metadata.Index 304 } 305 306 var hd raftpb.HardState 307 hd, rerr = n.Store.HardState() 308 if rerr != nil { 309 return 0, false, rerr 310 } 311 if !raft.IsEmptyHardState(hd) { 312 glog.Infof("Found hardstate: %+v\n", hd) 313 restart = true 314 } 315 316 var num int 317 num, rerr = n.Store.NumEntries() 318 if rerr != nil { 319 return 0, false, rerr 320 } 321 glog.Infof("Group %d found %d entries\n", n.RaftContext.Group, num) 322 // We'll always have at least one entry. 323 if num > 1 { 324 restart = true 325 } 326 return idx, restart, nil 327 } 328 329 const ( 330 messageBatchSoftLimit = 10e6 331 ) 332 333 type stream struct { 334 msgCh chan []byte 335 alive int32 336 } 337 338 // BatchAndSendMessages sends messages in batches. 339 func (n *Node) BatchAndSendMessages() { 340 batches := make(map[uint64]*bytes.Buffer) 341 streams := make(map[uint64]*stream) 342 343 for { 344 totalSize := 0 345 sm := <-n.messages 346 slurp_loop: 347 for { 348 var buf *bytes.Buffer 349 if b, ok := batches[sm.to]; !ok { 350 buf = new(bytes.Buffer) 351 batches[sm.to] = buf 352 } else { 353 buf = b 354 } 355 totalSize += 4 + len(sm.data) 356 x.Check(binary.Write(buf, binary.LittleEndian, uint32(len(sm.data)))) 357 x.Check2(buf.Write(sm.data)) 358 359 if totalSize > messageBatchSoftLimit { 360 // We limit the batch size, but we aren't pushing back on 361 // n.messages, because the loop below spawns a goroutine 362 // to do its dirty work. This is good because right now 363 // (*node).send fails(!) if the channel is full. 364 break 365 } 366 367 select { 368 case sm = <-n.messages: 369 default: 370 break slurp_loop 371 } 372 } 373 374 for to, buf := range batches { 375 if buf.Len() == 0 { 376 continue 377 } 378 s, ok := streams[to] 379 if !ok || atomic.LoadInt32(&s.alive) <= 0 { 380 s = &stream{ 381 msgCh: make(chan []byte, 100), 382 alive: 1, 383 } 384 go n.streamMessages(to, s) 385 streams[to] = s 386 } 387 data := make([]byte, buf.Len()) 388 copy(data, buf.Bytes()) 389 buf.Reset() 390 391 select { 392 case s.msgCh <- data: 393 default: 394 } 395 } 396 } 397 } 398 399 func (n *Node) streamMessages(to uint64, s *stream) { 400 defer atomic.StoreInt32(&s.alive, 0) 401 402 // Exit after this deadline. Let BatchAndSendMessages create another goroutine, if needed. 403 // Let's set the deadline to 10s because if we increase it, then it takes longer to recover from 404 // a partition and get a new leader. 405 deadline := time.Now().Add(10 * time.Second) 406 ticker := time.NewTicker(time.Second) 407 defer ticker.Stop() 408 409 var logged int 410 for range ticker.C { // Don't do this in an busy-wait loop, use a ticker. 411 if err := n.doSendMessage(to, s.msgCh); err != nil { 412 // Update lastLog so we print error only a few times if we are not able to connect. 413 // Otherwise, the log is polluted with repeated errors. 414 if logged == 0 { 415 glog.Warningf("Unable to send message to peer: %#x. Error: %v", to, err) 416 logged++ 417 } 418 } 419 if time.Now().After(deadline) { 420 return 421 } 422 } 423 } 424 425 func (n *Node) doSendMessage(to uint64, msgCh chan []byte) error { 426 addr, has := n.Peer(to) 427 if !has { 428 return errors.Errorf("Do not have address of peer %#x", to) 429 } 430 pool, err := GetPools().Get(addr) 431 if err != nil { 432 return err 433 } 434 435 c := pb.NewRaftClient(pool.Get()) 436 ctx, span := otrace.StartSpan(context.Background(), 437 fmt.Sprintf("RaftMessage-%d-to-%d", n.Id, to)) 438 defer span.End() 439 440 mc, err := c.RaftMessage(ctx) 441 if err != nil { 442 return err 443 } 444 445 var packets, lastPackets uint64 446 slurp := func(batch *pb.RaftBatch) { 447 for { 448 if len(batch.Payload.Data) > messageBatchSoftLimit { 449 return 450 } 451 select { 452 case data := <-msgCh: 453 batch.Payload.Data = append(batch.Payload.Data, data...) 454 packets++ 455 default: 456 return 457 } 458 } 459 } 460 461 ctx = mc.Context() 462 ticker := time.NewTicker(3 * time.Minute) 463 defer ticker.Stop() 464 465 for { 466 select { 467 case data := <-msgCh: 468 batch := &pb.RaftBatch{ 469 Context: n.RaftContext, 470 Payload: &api.Payload{Data: data}, 471 } 472 packets++ 473 slurp(batch) // Pick up more entries from msgCh, if present. 474 span.Annotatef(nil, "[Packets: %d] Sending data of length: %d.", 475 packets, len(batch.Payload.Data)) 476 if err := mc.Send(batch); err != nil { 477 span.Annotatef(nil, "Error while mc.Send: %v", err) 478 switch { 479 case strings.Contains(err.Error(), "TransientFailure"): 480 glog.Warningf("Reporting node: %d addr: %s as unreachable.", to, pool.Addr) 481 n.Raft().ReportUnreachable(to) 482 pool.SetUnhealthy() 483 default: 484 } 485 // We don't need to do anything if we receive any error while sending message. 486 // RAFT would automatically retry. 487 return err 488 } 489 case <-ticker.C: 490 if lastPackets == packets { 491 span.Annotatef(nil, 492 "No activity for a while [Packets == %d]. Closing connection.", packets) 493 return mc.CloseSend() 494 } 495 lastPackets = packets 496 case <-ctx.Done(): 497 return ctx.Err() 498 } 499 } 500 } 501 502 // Connect connects the node and makes its peerPool refer to the constructed pool and address 503 // (possibly updating ourselves from the old address.) (Unless pid is ourselves, in which 504 // case this does nothing.) 505 func (n *Node) Connect(pid uint64, addr string) { 506 if pid == n.Id { 507 return 508 } 509 if paddr, ok := n.Peer(pid); ok && paddr == addr { 510 // Already connected. 511 return 512 } 513 // Here's what we do. Right now peerPool maps peer node id's to addr values. If 514 // a *pool can be created, good, but if not, we still create a peerPoolEntry with 515 // a nil *pool. 516 if addr == n.MyAddr { 517 // TODO: Note this fact in more general peer health info somehow. 518 glog.Infof("Peer %d claims same host as me\n", pid) 519 n.SetPeer(pid, addr) 520 return 521 } 522 GetPools().Connect(addr) 523 n.SetPeer(pid, addr) 524 } 525 526 // DeletePeer deletes the record of the peer with the given id. 527 func (n *Node) DeletePeer(pid uint64) { 528 if pid == n.Id { 529 return 530 } 531 n.Lock() 532 defer n.Unlock() 533 delete(n.peers, pid) 534 } 535 536 var errInternalRetry = errors.New("Retry proposal again") 537 538 func (n *Node) proposeConfChange(ctx context.Context, pb raftpb.ConfChange) error { 539 cctx, cancel := context.WithTimeout(ctx, 3*time.Second) 540 defer cancel() 541 542 ch := make(chan error, 1) 543 id := n.storeConfChange(ch) 544 // TODO: Delete id from the map. 545 pb.ID = id 546 if err := n.Raft().ProposeConfChange(cctx, pb); err != nil { 547 if cctx.Err() != nil { 548 return errInternalRetry 549 } 550 glog.Warningf("Error while proposing conf change: %v", err) 551 return err 552 } 553 select { 554 case err := <-ch: 555 return err 556 case <-ctx.Done(): 557 return ctx.Err() 558 case <-cctx.Done(): 559 return errInternalRetry 560 } 561 } 562 563 func (n *Node) addToCluster(ctx context.Context, pid uint64) error { 564 addr, ok := n.Peer(pid) 565 x.AssertTruef(ok, "Unable to find conn pool for peer: %#x", pid) 566 rc := &pb.RaftContext{ 567 Addr: addr, 568 Group: n.RaftContext.Group, 569 Id: pid, 570 } 571 rcBytes, err := rc.Marshal() 572 x.Check(err) 573 574 cc := raftpb.ConfChange{ 575 Type: raftpb.ConfChangeAddNode, 576 NodeID: pid, 577 Context: rcBytes, 578 } 579 err = errInternalRetry 580 for err == errInternalRetry { 581 glog.Infof("Trying to add %#x to cluster. Addr: %v\n", pid, addr) 582 glog.Infof("Current confstate at %#x: %+v\n", n.Id, n.ConfState()) 583 err = n.proposeConfChange(ctx, cc) 584 } 585 return err 586 } 587 588 // ProposePeerRemoval proposes a new configuration with the peer with the given id removed. 589 func (n *Node) ProposePeerRemoval(ctx context.Context, id uint64) error { 590 if n.Raft() == nil { 591 return ErrNoNode 592 } 593 if _, ok := n.Peer(id); !ok && id != n.RaftContext.Id { 594 return errors.Errorf("Node %#x not part of group", id) 595 } 596 cc := raftpb.ConfChange{ 597 Type: raftpb.ConfChangeRemoveNode, 598 NodeID: id, 599 } 600 err := errInternalRetry 601 for err == errInternalRetry { 602 err = n.proposeConfChange(ctx, cc) 603 } 604 return err 605 } 606 607 type linReadReq struct { 608 // A one-shot chan which we send a raft index upon. 609 indexCh chan<- uint64 610 } 611 612 var errReadIndex = errors.Errorf( 613 "Cannot get linearized read (time expired or no configured leader)") 614 615 // WaitLinearizableRead waits until a linearizable read can be performed. 616 func (n *Node) WaitLinearizableRead(ctx context.Context) error { 617 span := otrace.FromContext(ctx) 618 span.Annotate(nil, "WaitLinearizableRead") 619 620 indexCh := make(chan uint64, 1) 621 select { 622 case n.requestCh <- linReadReq{indexCh: indexCh}: 623 span.Annotate(nil, "Pushed to requestCh") 624 case <-ctx.Done(): 625 span.Annotate(nil, "Context expired") 626 return ctx.Err() 627 } 628 629 select { 630 case index := <-indexCh: 631 span.Annotatef(nil, "Received index: %d", index) 632 if index == 0 { 633 return errReadIndex 634 } 635 err := n.Applied.WaitForMark(ctx, index) 636 span.Annotatef(nil, "Error from Applied.WaitForMark: %v", err) 637 return err 638 case <-ctx.Done(): 639 span.Annotate(nil, "Context expired") 640 return ctx.Err() 641 } 642 } 643 644 // RunReadIndexLoop runs the RAFT index in a loop. 645 func (n *Node) RunReadIndexLoop(closer *y.Closer, readStateCh <-chan raft.ReadState) { 646 defer closer.Done() 647 readIndex := func(activeRctx []byte) (uint64, error) { 648 // Read Request can get rejected then we would wait indefinitely on the channel 649 // so have a timeout. 650 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) 651 defer cancel() 652 653 if err := n.Raft().ReadIndex(ctx, activeRctx); err != nil { 654 glog.Errorf("Error while trying to call ReadIndex: %v\n", err) 655 return 0, err 656 } 657 658 again: 659 select { 660 case <-closer.HasBeenClosed(): 661 return 0, errors.New("Closer has been called") 662 case rs := <-readStateCh: 663 if !bytes.Equal(activeRctx, rs.RequestCtx) { 664 glog.V(3).Infof("Read state: %x != requested %x", rs.RequestCtx, activeRctx[:]) 665 goto again 666 } 667 return rs.Index, nil 668 case <-ctx.Done(): 669 glog.Warningf("[%#x] Read index context timed out\n", n.Id) 670 return 0, errInternalRetry 671 } 672 } // end of readIndex func 673 674 // We maintain one linearizable ReadIndex request at a time. Others wait queued behind 675 // requestCh. 676 requests := []linReadReq{} 677 for { 678 select { 679 case <-closer.HasBeenClosed(): 680 return 681 case <-readStateCh: 682 // Do nothing, discard ReadState as we don't have any pending ReadIndex requests. 683 case req := <-n.requestCh: 684 slurpLoop: 685 for { 686 requests = append(requests, req) 687 select { 688 case req = <-n.requestCh: 689 default: 690 break slurpLoop 691 } 692 } 693 // Create one activeRctx slice for the read index, even if we have to call readIndex 694 // repeatedly. That way, we can process the requests as soon as we encounter the first 695 // activeRctx. This is better than flooding readIndex with a new activeRctx on each 696 // call, causing more unique traffic and further delays in request processing. 697 activeRctx := make([]byte, 8) 698 x.Check2(n.Rand.Read(activeRctx)) 699 glog.V(3).Infof("Request readctx: %#x", activeRctx) 700 for { 701 index, err := readIndex(activeRctx) 702 if err == errInternalRetry { 703 continue 704 } 705 if err != nil { 706 index = 0 707 glog.Errorf("[%#x] While trying to do lin read index: %v", n.Id, err) 708 } 709 for _, req := range requests { 710 req.indexCh <- index 711 } 712 break 713 } 714 requests = requests[:0] 715 } 716 } 717 } 718 719 func (n *Node) joinCluster(ctx context.Context, rc *pb.RaftContext) (*api.Payload, error) { 720 // Only process one JoinCluster request at a time. 721 n.joinLock.Lock() 722 defer n.joinLock.Unlock() 723 724 // Check that the new node is from the same group as me. 725 if rc.Group != n.RaftContext.Group { 726 return nil, errors.Errorf("Raft group mismatch") 727 } 728 // Also check that the new node is not me. 729 if rc.Id == n.RaftContext.Id { 730 return nil, errors.Errorf("REUSE_RAFTID: Raft ID duplicates mine: %+v", rc) 731 } 732 733 // Check that the new node is not already part of the group. 734 if addr, ok := n.Peer(rc.Id); ok && rc.Addr != addr { 735 // There exists a healthy connection to server with same id. 736 if _, err := GetPools().Get(addr); err == nil { 737 return &api.Payload{}, errors.Errorf( 738 "REUSE_ADDR: IP Address same as existing peer: %s", addr) 739 } 740 } 741 n.Connect(rc.Id, rc.Addr) 742 743 err := n.addToCluster(context.Background(), rc.Id) 744 glog.Infof("[%#x] Done joining cluster with err: %v", rc.Id, err) 745 return &api.Payload{}, err 746 }