github.com/aergoio/aergo@v1.3.1/consensus/impl/raftv2/cluster.go (about) 1 package raftv2 2 3 import ( 4 "bytes" 5 "context" 6 "crypto/sha1" 7 "encoding/binary" 8 "encoding/json" 9 "errors" 10 "fmt" 11 "github.com/aergoio/aergo/cmd/aergocli/util" 12 "github.com/aergoio/aergo/internal/enc" 13 "github.com/aergoio/aergo/message" 14 "sort" 15 "sync" 16 "time" 17 18 "github.com/aergoio/aergo/consensus" 19 "github.com/aergoio/aergo/pkg/component" 20 "github.com/aergoio/aergo/types" 21 raftlib "github.com/aergoio/etcd/raft" 22 "github.com/aergoio/etcd/raft/raftpb" 23 ) 24 25 var ( 26 MaxConfChangeTimeOut = time.Second * 100 27 28 ErrClusterHasNoMember = errors.New("cluster has no member") 29 ErrNotExistRaftMember = errors.New("not exist member of raft cluster") 30 ErrNoEnableSyncPeer = errors.New("no peer to sync chain") 31 ErrMemberAlreadyApplied = errors.New("member is already added") 32 33 ErrInvalidMembershipReqType = errors.New("invalid type of membership change request") 34 ErrPendingConfChange = errors.New("pending membership change request is in progree. try again when it is finished") 35 ErrConChangeTimeOut = errors.New("timeouted membership change request") 36 ErrConfChangeChannelBusy = errors.New("channel of conf change propose is busy") 37 ErrCCMemberIsNil = errors.New("memeber is nil") 38 ErrNotMatchedRaftName = errors.New("mismatched name of raft identity") 39 ErrNotMatchedRaftPeerID = errors.New("mismatched peerid of raft identity") 40 ErrNotExitRaftProgress = errors.New("progress of this node doesn't exist") 41 ErrUnhealtyNodeExist = errors.New("can't add some node if unhealthy nodes exist") 42 ErrRemoveHealthyNode = errors.New("remove of a healthy node may cause the cluster to hang") 43 ) 44 45 const ( 46 MembersNameInit = "init" 47 MembersNameApplied = "applied" 48 MembersNameRemoved = "removed" 49 InvalidClusterID = 0 50 ) 51 52 type RaftInfo struct { 53 Leader string 54 Total uint32 55 Name string 56 RaftId string 57 Status *json.RawMessage 58 } 59 60 type NotifyFn func(event *message.RaftClusterEvent) 61 62 // raft cluster membership 63 // copy from dpos/bp 64 // TODO refactoring 65 // Cluster represents a cluster of block producers. 66 type Cluster struct { 67 component.ICompSyncRequester 68 sync.Mutex 69 cdb consensus.ChainDB 70 71 chainID []byte 72 chainTimestamp int64 73 rs *raftServer 74 75 appliedIndex uint64 76 appliedTerm uint64 77 78 identity consensus.RaftIdentity 79 80 Size uint32 81 82 // @ MatchClusterAndConfState 83 // cluster members must match nodes of confstate. otherwise confchange may fail and be skipped by comparing with cluster members. 84 // Mismatch of cluster and confstate occures when node joins a exising cluster. Joined node starts from latest members, but confstate is empty. 85 // If snapshot is written before all confchange logs be applied, mismatched state is written to disk. 86 // After recovery from snapshot, problems will happen. 87 members *Members // using for 1. booting 88 // 2. send cluster info to remote 89 appliedMembers *Members // using for 1. verifying runtime confchange. 90 // 2. creating snapshot 91 // 3. recover from snapshot 92 93 // raft http reject message from removed member 94 // TODO for p2p 95 removedMembers *Members 96 97 changeSeq uint64 98 confChangeC chan *consensus.ConfChangePropose 99 100 savedChange *consensus.ConfChangePropose 101 102 notifyFn NotifyFn 103 } 104 105 type Members struct { 106 name string 107 MapByID map[uint64]*consensus.Member // restore from DB or snapshot 108 MapByName map[string]*consensus.Member 109 110 Index map[types.PeerID]uint64 // peer ID to raft ID mapping 111 112 Addresses []string //for raft server TODO remove 113 } 114 115 func newMembers(name string) *Members { 116 return &Members{ 117 name: name, 118 MapByID: make(map[uint64]*consensus.Member), 119 MapByName: make(map[string]*consensus.Member), 120 Index: make(map[types.PeerID]uint64), 121 Addresses: make([]string, 0), 122 } 123 } 124 125 func (mbrs *Members) len() int { 126 return len(mbrs.MapByID) 127 } 128 129 func (mbrs *Members) ToArray() []*consensus.Member { 130 count := len(mbrs.MapByID) 131 132 var arrs = make([]*consensus.Member, count) 133 134 i := 0 135 for _, m := range mbrs.MapByID { 136 arrs[i] = m 137 i++ 138 } 139 140 sort.Sort(consensus.MembersByName(arrs)) 141 142 return arrs 143 } 144 145 func (mbrs *Members) ToMemberAttrArray() []*types.MemberAttr { 146 count := len(mbrs.MapByID) 147 148 var arrs = make([]*types.MemberAttr, count) 149 150 mbrArray := mbrs.ToArray() 151 152 i := 0 153 for _, m := range mbrArray { 154 arrs[i] = &m.MemberAttr 155 i++ 156 } 157 158 return arrs 159 } 160 161 func (mbrs *Members) toString() string { 162 var buf string 163 164 buf += fmt.Sprintf("%s", mbrs.name) 165 166 if mbrs == nil { 167 return "[]" 168 } 169 170 mbrsArr := mbrs.ToArray() 171 sort.Sort(consensus.MembersByName(mbrsArr)) 172 173 buf += fmt.Sprintf("[") 174 for _, bp := range mbrsArr { 175 buf += fmt.Sprintf("%s", bp.ToString()) 176 } 177 buf += fmt.Sprintf("]") 178 179 return buf 180 } 181 182 func NewCluster(chainID []byte, bf *BlockFactory, raftName string, p2pPeerID types.PeerID, chainTimestamp int64, notifyFn NotifyFn) *Cluster { 183 cl := &Cluster{ 184 chainID: chainID, 185 chainTimestamp: chainTimestamp, 186 ICompSyncRequester: bf, 187 identity: consensus.RaftIdentity{Name: raftName}, 188 members: newMembers(MembersNameInit), 189 appliedMembers: newMembers(MembersNameApplied), 190 removedMembers: newMembers(MembersNameRemoved), 191 confChangeC: make(chan *consensus.ConfChangePropose), 192 } 193 if bf != nil { 194 cl.cdb = bf.ChainWAL 195 } 196 197 if len(p2pPeerID) > 0 { 198 cl.identity.PeerID = types.IDB58Encode(p2pPeerID) 199 } 200 cl.notifyFn = notifyFn 201 202 return cl 203 } 204 205 func NewClusterFromMemberAttrs(clusterID uint64, chainID []byte, memberAttrs []*types.MemberAttr) (*Cluster, error) { 206 cl := NewCluster(chainID, nil, "", "", 0, nil) 207 208 for _, mbrAttr := range memberAttrs { 209 var mbr consensus.Member 210 211 mbr.SetAttr(mbrAttr) 212 213 if err := cl.isValidMember(&mbr); err != nil { 214 logger.Error().Err(err).Str("mbr", mbr.ToString()).Msg("fail to add member") 215 return nil, err 216 } 217 218 if err := cl.addMember(&mbr, false); err != nil { 219 logger.Error().Err(err).Str("mbr", mbr.ToString()).Msg("fail to add member") 220 return nil, err 221 } 222 } 223 224 if clusterID == InvalidClusterID { 225 return nil, ErrClusterNotReady 226 } 227 cl.identity.ClusterID = clusterID 228 229 return cl, nil 230 } 231 232 func (cl *Cluster) ClusterID() uint64 { 233 return cl.identity.ClusterID 234 } 235 236 func (cl *Cluster) NodeName() string { 237 return cl.identity.Name 238 } 239 240 func (cl *Cluster) NodeID() uint64 { 241 return cl.identity.ID 242 } 243 244 func (cl *Cluster) NodePeerID() string { 245 return cl.identity.PeerID 246 } 247 248 func (cl *Cluster) SetNodeID(nodeid uint64) { 249 cl.identity.ID = nodeid 250 } 251 252 func (cl *Cluster) SetClusterID(clusterid uint64) { 253 logger.Debug().Str("id", EtcdIDToString(clusterid)).Msg("set cluster ID") 254 255 cl.identity.ClusterID = clusterid 256 } 257 258 // RecoverIdentity reset node id and name of cluster. 259 // raft identity is saved in WAL and reset when server is restarted 260 func (cl *Cluster) RecoverIdentity(id *consensus.RaftIdentity) error { 261 cl.Lock() 262 defer cl.Unlock() 263 264 // check name 265 if cl.identity.Name != id.Name { 266 return ErrNotMatchedRaftName 267 } 268 269 if cl.identity.PeerID != id.PeerID { 270 return ErrNotMatchedRaftPeerID 271 } 272 273 if id.ClusterID == 0 { 274 return ErrInvalidRaftIdentity 275 } 276 277 cl.identity = *id 278 279 logger.Info().Str("identity", id.ToString()).Msg("recover raft identity of this node") 280 281 return nil 282 } 283 284 func (cl *Cluster) Recover(snapshot *raftpb.Snapshot) (bool, error) { 285 var snapdata = &consensus.SnapshotData{} 286 287 if err := snapdata.Decode(snapshot.Data); err != nil { 288 return false, err 289 } 290 291 logger.Info().Str("snap", snapdata.ToString()).Msg("cluster recover from snapshot") 292 293 if cl.isAllMembersEqual(snapdata.Members, snapdata.RemovedMembers) { 294 logger.Info().Msg("cluster recover skipped since all members are equal to previous configure") 295 return true, nil 296 } 297 298 cl.ResetMembers() 299 300 // members restore 301 for _, mbr := range snapdata.Members { 302 if err := cl.addMember(mbr, true); err != nil { 303 return false, err 304 } 305 } 306 307 for _, mbr := range snapdata.RemovedMembers { 308 cl.RemovedMembers().add(mbr) 309 } 310 311 logger.Info().Str("info", cl.toStringWithLock()).Msg("cluster recovered") 312 313 return false, nil 314 } 315 316 func (cl *Cluster) ResetMembers() { 317 cl.Lock() 318 defer cl.Unlock() 319 320 cl.members = newMembers(MembersNameInit) 321 cl.appliedMembers = newMembers(MembersNameApplied) 322 cl.removedMembers = newMembers(MembersNameRemoved) 323 324 cl.Size = 0 325 } 326 327 func (cl *Cluster) isMatch(confstate *raftpb.ConfState) bool { 328 var matched int 329 330 if len(cl.AppliedMembers().MapByID) != len(confstate.Nodes) { 331 return false 332 } 333 334 for _, confID := range confstate.Nodes { 335 if _, ok := cl.AppliedMembers().MapByID[confID]; !ok { 336 return false 337 } 338 339 matched++ 340 } 341 342 return true 343 } 344 345 func (cl *Cluster) Members() *Members { 346 return cl.members 347 } 348 349 func (cl *Cluster) AppliedMembers() *Members { 350 return cl.appliedMembers 351 } 352 353 func (cl *Cluster) RemovedMembers() *Members { 354 return cl.removedMembers 355 } 356 357 func (cl *Cluster) Quorum() uint32 { 358 return cl.Size/2 + 1 359 } 360 361 func (cl *Cluster) getStartPeers() ([]raftlib.Peer, error) { 362 cl.Lock() 363 defer cl.Unlock() 364 365 if cl.Size == 0 { 366 return nil, ErrClusterHasNoMember 367 } 368 369 rpeers := make([]raftlib.Peer, cl.Size) 370 371 var i int 372 for _, member := range cl.members.MapByID { 373 data, err := json.Marshal(member) 374 if err != nil { 375 return nil, err 376 } 377 rpeers[i] = raftlib.Peer{ID: uint64(member.ID), Context: data} 378 i++ 379 } 380 381 return rpeers, nil 382 } 383 384 // getAnyPeerAddressToSync returns peer address that has block of no for sync 385 func (cl *Cluster) getAnyPeerAddressToSync() (types.PeerID, error) { 386 cl.Lock() 387 defer cl.Unlock() 388 389 for _, member := range cl.Members().MapByID { 390 if member.Name != cl.NodeName() { 391 return member.GetPeerID(), nil 392 } 393 } 394 395 return "", ErrNoEnableSyncPeer 396 } 397 398 func (cl *Cluster) isValidMember(member *consensus.Member) error { 399 cl.Lock() 400 defer cl.Unlock() 401 402 mbrs := cl.members 403 404 for _, prevMember := range mbrs.MapByID { 405 if prevMember.HasDuplicatedAttr(member) { 406 logger.Error().Str("prev", prevMember.ToString()).Str("cur", member.ToString()).Msg("duplicated configuration for raft BP member") 407 return ErrDupBP 408 } 409 } 410 411 // check if peerID of this node is valid 412 if cl.NodeName() == member.Name && enc.ToString([]byte(member.GetPeerID())) != cl.NodePeerID() { 413 logger.Error().Str("config", member.GetPeerID().String()).Str("cluster peerid", cl.NodePeerID()).Msg("peerID value is not matched with P2P") 414 return ErrInvalidRaftPeerID 415 } 416 417 return nil 418 } 419 420 func (cl *Cluster) addMember(member *consensus.Member, applied bool) error { 421 logger.Info().Str("member", member.ToString()).Bool("applied", applied).Msg("member add") 422 423 cl.Lock() 424 defer cl.Unlock() 425 426 if applied { 427 if cl.AppliedMembers().isExist(member.ID) { 428 return ErrMemberAlreadyApplied 429 } 430 logger.Debug().Str("member", member.ToString()).Msg("add to applied members") 431 cl.AppliedMembers().add(member) 432 433 // notify to p2p TODO temporary code 434 peerID, err := types.IDFromBytes(member.PeerID) 435 if err != nil { 436 panic("invalid member peerid " + enc.ToString(member.PeerID)) 437 } 438 439 if cl.notifyFn != nil { 440 cl.notifyFn(&message.RaftClusterEvent{BPAdded: []types.PeerID{peerID}}) 441 } 442 } 443 444 if cl.members.isExist(member.ID) { 445 logger.Debug().Str("member", member.ToString()).Msg("omit adding to init members") 446 return nil 447 } 448 449 cl.members.add(member) 450 cl.Size++ 451 452 return nil 453 } 454 455 func (cl *Cluster) removeMember(member *consensus.Member) error { 456 logger.Info().Str("member", member.ToString()).Msg("member remove") 457 458 cl.Lock() 459 defer cl.Unlock() 460 461 cl.AppliedMembers().remove(member) 462 cl.members.remove(member) 463 cl.removedMembers.add(member) 464 465 cl.Size-- 466 // notify to p2p TODO temporary code 467 peerID, err := types.IDFromBytes(member.PeerID) 468 if err != nil { 469 panic("invalid member peerid " + enc.ToString(member.PeerID)) 470 } 471 472 if cl.notifyFn != nil { 473 cl.notifyFn(&message.RaftClusterEvent{BPRemoved: []types.PeerID{peerID}}) 474 } 475 476 return nil 477 } 478 479 // ValidateAndMergeExistingCluster tests if members of existing cluster are matched with this cluster 480 func (cl *Cluster) ValidateAndMergeExistingCluster(existingCl *Cluster) bool { 481 cl.Lock() 482 defer cl.Unlock() 483 484 if !bytes.Equal(existingCl.chainID, cl.chainID) { 485 logger.Error().Msg("My chainID is different from the existing cluster") 486 return false 487 } 488 489 // check if this node is already added in existing cluster 490 remoteMember := existingCl.Members().getMemberByName(cl.NodeName()) 491 if remoteMember == nil { 492 logger.Error().Msg("This node doesn't exist in the existing cluster") 493 return false 494 } 495 496 // TODO check my network config is equal to member of remote 497 if enc.ToString(remoteMember.PeerID) != cl.NodePeerID() { 498 logger.Error().Msg("peerid is different with peerid of member of existing cluster") 499 } 500 501 cl.members = existingCl.Members() 502 cl.Size = existingCl.Size 503 504 myNodeID := existingCl.getNodeID(cl.NodeName()) 505 506 // reset self nodeID of cluster 507 cl.SetNodeID(myNodeID) 508 cl.SetClusterID(existingCl.ClusterID()) 509 510 logger.Debug().Str("my", cl.toStringWithLock()).Msg("cluster merged with existing cluster") 511 return true 512 } 513 514 func (cl *Cluster) getMemberAttrs() ([]*types.MemberAttr, error) { 515 cl.Lock() 516 defer cl.Unlock() 517 518 attrs := make([]*types.MemberAttr, cl.members.len()) 519 520 if cl.members.len() == 0 { 521 return nil, ErrClusterHasNoMember 522 } 523 524 var i = 0 525 for _, mbr := range cl.members.MapByID { 526 // copy attr since it can be modified 527 attr := mbr.MemberAttr 528 attrs[i] = &attr 529 i++ 530 } 531 532 return attrs, nil 533 } 534 535 // IsIDRemoved return true if given raft id is not exist in cluster 536 func (cl *Cluster) IsIDRemoved(id uint64) bool { 537 return cl.RemovedMembers().isExist(id) 538 } 539 540 // GenerateID generate cluster ID by hashing IDs of all initial members 541 func (cl *Cluster) GenerateID(useBackup bool) { 542 var buf []byte 543 544 if useBackup { 545 blk, err := cl.cdb.GetBestBlock() 546 if err != nil || blk == nil { 547 logger.Fatal().Msg("failed to get best block from backup datafiles") 548 } 549 550 buf = append(buf, blk.BlockHash()...) 551 } 552 553 mbrs := cl.Members().ToArray() 554 sort.Sort(consensus.MembersByName(mbrs)) 555 556 for _, mbr := range mbrs { 557 logger.Debug().Str("id", EtcdIDToString(mbr.GetID())).Msg("member ID") 558 559 buf = append(buf, types.Uint64ToBytes(mbr.GetID())...) 560 } 561 562 hash := sha1.Sum(buf) 563 cl.identity.ClusterID = binary.LittleEndian.Uint64(hash[:8]) 564 565 logger.Info().Str("id", EtcdIDToString(cl.ClusterID())).Msg("generate cluster ID") 566 } 567 568 func (cl *Cluster) isAllMembersEqual(members []*consensus.Member, RemovedMembers []*consensus.Member) bool { 569 membersEqual := func(x []*consensus.Member, y []*consensus.Member) bool { 570 if len(x) != len(y) { 571 return false 572 } 573 574 for i, mX := range x { 575 mY := y[i] 576 if !mX.Equal(mY) { 577 return false 578 } 579 } 580 581 return true 582 } 583 584 clMembers := cl.AppliedMembers().ToArray() 585 clRemovedMembers := cl.RemovedMembers().ToArray() 586 587 sort.Sort(consensus.MembersByName(members)) 588 sort.Sort(consensus.MembersByName(RemovedMembers)) 589 590 if !membersEqual(clMembers, members) { 591 return false 592 } 593 594 if !membersEqual(clRemovedMembers, RemovedMembers) { 595 return false 596 } 597 598 return true 599 } 600 601 func (mbrs *Members) add(member *consensus.Member) { 602 mbrs.MapByID[member.ID] = member 603 mbrs.MapByName[member.Name] = member 604 mbrs.Index[member.GetPeerID()] = member.ID 605 mbrs.Addresses = append(mbrs.Addresses, member.Address) 606 } 607 608 func (mbrs *Members) remove(member *consensus.Member) { 609 delete(mbrs.MapByID, member.ID) 610 delete(mbrs.MapByName, member.Name) 611 delete(mbrs.Index, member.GetPeerID()) 612 } 613 614 func (mbrs *Members) getMemberByName(name string) *consensus.Member { 615 member, ok := mbrs.MapByName[name] 616 if !ok { 617 return nil 618 } 619 620 return member 621 } 622 623 func (mbrs *Members) isExist(id uint64) bool { 624 return mbrs.getMember(id) != nil 625 } 626 627 func (mbrs *Members) getMember(id uint64) *consensus.Member { 628 member, ok := mbrs.MapByID[id] 629 if !ok { 630 return nil 631 } 632 633 return member 634 } 635 636 func (mbrs *Members) getMemberByPeerID(pid types.PeerID) *consensus.Member { 637 return mbrs.getMember(mbrs.Index[pid]) 638 } 639 640 func (mbrs *Members) getMemberPeerAddress(id uint64) (types.PeerID, error) { 641 member := mbrs.getMember(id) 642 if member == nil { 643 return "", ErrNotExistRaftMember 644 } 645 646 return member.GetPeerID(), nil 647 } 648 649 // hasDuplicatedMember returns true if any attributes of the given member is equal to the attributes of cluster members 650 func (mbrs *Members) hasDuplicatedMember(m *consensus.Member) error { 651 for _, prevMember := range mbrs.MapByID { 652 if prevMember.HasDuplicatedAttr(m) { 653 logger.Error().Str("old", prevMember.ToString()).Str("new", m.ToString()).Msg("duplicated attribute for new member") 654 return ErrDupBP 655 } 656 } 657 return nil 658 } 659 660 func MaxUint64(x, y uint64) uint64 { 661 if x < y { 662 return y 663 } 664 return x 665 } 666 667 /* 668 // hasSynced get result of GetPeers request from P2P service and check if chain of this node is synchronized with majority of members 669 func (cc *Cluster) hasSynced() (bool, error) { 670 var peers map[types.PeerID]*message.PeerInfo 671 var err error 672 var peerBestNo uint64 = 0 673 674 if cc.Size == 1 { 675 return true, nil 676 } 677 678 // request GetPeers to p2p 679 getBPPeers := func() (map[types.PeerID]*message.PeerInfo, error) { 680 peers := make(map[types.PeerID]*message.PeerInfo) 681 682 result, err := cc.RequestFuture(message.P2PSvc, &message.GetPeers{}, time.Second, "raft cluster sync test").Result() 683 if err != nil { 684 return nil, err 685 } 686 687 msg := result.(*message.GetPeersRsp) 688 689 for _, peerElem := range msg.Peers { 690 peerID := types.PeerID(peerElem.Addr.PeerID) 691 state := peerElem.State 692 693 if peerElem.Self { 694 continue 695 } 696 697 if state.Get() != types.RUNNING { 698 logger.Debug().Str("peer", p2putil.ShortForm(peerID)).Msg("peer is not running") 699 continue 700 701 } 702 703 // check if peer is not bp 704 if _, ok := cc.Index[peerID]; !ok { 705 continue 706 } 707 708 peers[peerID] = peerElem 709 710 peerBestNo = MaxUint64(peerElem.LastBlockNumber, peerBestNo) 711 } 712 713 return peers, nil 714 } 715 716 if peers, err = getBPPeers(); err != nil { 717 return false, err 718 } 719 720 if uint16(len(peers)) < (cc.Quorum() - 1) { 721 logger.Debug().Msg("a majority of peers are not connected") 722 return false, nil 723 } 724 725 var best *types.Block 726 if best, err = cc.cdb.GetBestBlock(); err != nil { 727 return false, err 728 } 729 730 if best.BlockNo()+DefaultMarginChainDiff < peerBestNo { 731 logger.Debug().Uint64("best", best.BlockNo()).Uint64("peerbest", peerBestNo).Msg("chain was not synced with majority of peers") 732 return false, nil 733 } 734 735 logger.Debug().Uint64("best", best.BlockNo()).Uint64("peerbest", peerBestNo).Int("margin", DefaultMarginChainDiff).Msg("chain has been synced with majority of peers") 736 737 return true, nil 738 } 739 */ 740 func (cl *Cluster) toStringWithLock() string { 741 var buf string 742 743 buf = fmt.Sprintf("total=%d, cluserID=%x, NodeName=%s, RaftID=%x, ", cl.Size, cl.ClusterID(), cl.NodeName(), cl.NodeID()) 744 buf += "members: " + cl.members.toString() 745 buf += ", appliedMembers: " + cl.appliedMembers.toString() 746 747 return buf 748 } 749 750 func (cl *Cluster) toString() string { 751 cl.Lock() 752 defer cl.Unlock() 753 754 return cl.toStringWithLock() 755 } 756 757 func (cl *Cluster) getNodeID(name string) uint64 { 758 m, ok := cl.Members().MapByName[name] 759 if !ok { 760 return consensus.InvalidMemberID 761 } 762 763 return m.ID 764 } 765 766 func (cl *Cluster) getRaftInfo(withStatus bool) *RaftInfo { 767 cl.Lock() 768 defer cl.Unlock() 769 770 var leader uint64 771 if cl.rs != nil { 772 leader = cl.rs.GetLeader() 773 } 774 775 var leaderName string 776 var m *consensus.Member 777 778 if m = cl.Members().getMember(leader); m != nil { 779 leaderName = m.Name 780 } else { 781 leaderName = "id=" + EtcdIDToString(leader) 782 } 783 784 rinfo := &RaftInfo{Leader: leaderName, Total: cl.Size, Name: cl.NodeName(), RaftId: EtcdIDToString(cl.NodeID())} 785 786 if withStatus && cl.rs != nil { 787 b, err := cl.rs.Status().MarshalJSON() 788 if err != nil { 789 logger.Error().Err(err).Msg("failed to marshalEntryData raft consensus") 790 } else { 791 m := json.RawMessage(b) 792 rinfo.Status = &m 793 } 794 } 795 return rinfo 796 } 797 798 func (cl *Cluster) toConsensusInfo() *types.ConsensusInfo { 799 emptyCons := types.ConsensusInfo{ 800 Type: GetName(), 801 } 802 803 type PeerInfo struct { 804 Name string 805 RaftID string 806 PeerID string 807 Addr string 808 } 809 810 b, err := json.Marshal(cl.getRaftInfo(true)) 811 if err != nil { 812 logger.Error().Err(err).Msg("failed to marshalEntryData raft consensus") 813 return &emptyCons 814 } 815 816 cl.Lock() 817 defer cl.Unlock() 818 819 cons := emptyCons 820 cons.Info = string(b) 821 822 var i int = 0 823 if cl.Size != 0 { 824 bps := make([]string, cl.Size) 825 826 for id, m := range cl.Members().MapByID { 827 bp := &PeerInfo{Name: m.Name, RaftID: EtcdIDToString(m.ID), PeerID: m.GetPeerID().Pretty(), Addr: m.Address} 828 b, err = json.Marshal(bp) 829 if err != nil { 830 logger.Error().Err(err).Str("raftid", EtcdIDToString(id)).Msg("failed to marshalEntryData raft consensus bp") 831 return &emptyCons 832 } 833 bps[i] = string(b) 834 835 i++ 836 } 837 cons.Bps = bps 838 } 839 840 return &cons 841 } 842 843 func (cl *Cluster) NewMemberFromAddReq(req *types.MembershipChange) (*consensus.Member, error) { 844 if len(req.Attr.Name) == 0 || len(req.Attr.Address) == 0 || len(req.Attr.PeerID) == 0 { 845 return nil, consensus.ErrInvalidMemberAttr 846 } 847 848 return consensus.NewMember(req.Attr.Name, req.Attr.Address, types.PeerID(req.Attr.PeerID), cl.chainID, time.Now().UnixNano()), nil 849 } 850 851 func (cl *Cluster) NewMemberFromRemoveReq(req *types.MembershipChange) (*consensus.Member, error) { 852 if req.Attr.ID == consensus.InvalidMemberID { 853 return nil, consensus.ErrInvalidMemberID 854 } 855 856 member := consensus.NewMember("", "", types.PeerID(""), cl.chainID, 0) 857 member.SetMemberID(req.Attr.ID) 858 859 return member, nil 860 } 861 862 func (cl *Cluster) ChangeMembership(req *types.MembershipChange, nowait bool) (*consensus.Member, error) { 863 var ( 864 proposal *consensus.ConfChangePropose 865 err error 866 ) 867 868 submit := func() error { 869 cl.Lock() 870 defer cl.Unlock() 871 872 if proposal, err = cl.makeProposal(req, nowait); err != nil { 873 logger.Error().Uint64("requestID", req.GetRequestID()).Msg("failed to make proposal for membership change") 874 return err 875 } 876 877 if err = cl.isEnableChangeMembership(proposal.Cc); err != nil { 878 logger.Error().Err(err).Msg("failed cluster availability check to change membership") 879 return err 880 } 881 882 if err = cl.submitProposal(proposal, nowait); err != nil { 883 return err 884 } 885 886 return nil 887 } 888 889 if err = submit(); err != nil { 890 return nil, err 891 } 892 893 if nowait { 894 return nil, nil 895 } 896 897 return cl.recvConfChangeReply(proposal.ReplyC) 898 } 899 900 func (cl *Cluster) makeProposal(req *types.MembershipChange, nowait bool) (*consensus.ConfChangePropose, error) { 901 if cl.savedChange != nil { 902 logger.Error().Str("cc", types.RaftConfChangeToString(cl.savedChange.Cc)).Msg("already exist pending conf change") 903 return nil, ErrPendingConfChange 904 } 905 906 var ( 907 replyC chan *consensus.ConfChangeReply 908 member *consensus.Member 909 err error 910 ) 911 912 switch req.Type { 913 case types.MembershipChangeType_ADD_MEMBER: 914 member, err = cl.NewMemberFromAddReq(req) 915 916 case types.MembershipChangeType_REMOVE_MEMBER: 917 member, err = cl.NewMemberFromRemoveReq(req) 918 919 default: 920 return nil, ErrInvalidMembershipReqType 921 } 922 923 if err != nil { 924 logger.Error().Err(err).Uint64("requestID", req.GetRequestID()).Msg("failed to make new member") 925 return nil, err 926 } 927 928 // make raft confChange 929 cc, err := cl.makeConfChange(req.GetRequestID(), req.Type, member) 930 if err != nil { 931 logger.Error().Err(err).Uint64("requestID", req.GetRequestID()).Msg("failed to make conf change of raft") 932 return nil, err 933 } 934 935 // validate member change 936 if err = cl.validateChangeMembership(cc, member, false); err != nil { 937 logger.Error().Err(err).Uint64("requestID", req.GetRequestID()).Msg("failed to validate request of membership change") 938 return nil, err 939 } 940 941 if !nowait { 942 replyC = make(chan *consensus.ConfChangeReply, 1) 943 } 944 945 // TODO check cancel 946 ctx, cancel := context.WithTimeout(context.Background(), MaxConfChangeTimeOut) 947 defer cancel() 948 949 // send proposeC (confChange, replyC) 950 proposal := consensus.ConfChangePropose{Ctx: ctx, Cc: cc, ReplyC: replyC} 951 952 return &proposal, nil 953 } 954 955 func (cl *Cluster) submitProposal(proposal *consensus.ConfChangePropose, nowait bool) error { 956 if cl.savedChange != nil { 957 return ErrPendingConfChange 958 } 959 960 cl.saveConfChangePropose(proposal) 961 962 select { 963 case cl.confChangeC <- proposal: 964 logger.Info().Uint64("requestID", proposal.Cc.ID).Msg("proposal of conf change is sent to raft") 965 default: 966 logger.Error().Uint64("requestID", proposal.Cc.ID).Msg("proposal of conf change is dropped. confChange channel is busy") 967 968 if !nowait { 969 close(proposal.ReplyC) 970 } 971 cl.resetSavedConfChangePropose() 972 return ErrConfChangeChannelBusy 973 } 974 975 return nil 976 } 977 978 func (cl *Cluster) recvConfChangeReply(replyC chan *consensus.ConfChangeReply) (*consensus.Member, error) { 979 select { 980 case reply, ok := <-replyC: 981 if !ok { 982 logger.Panic().Msg("reply channel of change request must not be closed") 983 } 984 985 if reply.Err != nil { 986 logger.Error().Err(reply.Err).Msg("failed conf change") 987 return nil, reply.Err 988 } 989 990 logger.Info().Str("cluster", cl.toString()).Str("target", reply.Member.ToString()).Msg("reply of conf change is succeed") 991 992 return reply.Member, nil 993 case <-time.After(MaxConfChangeTimeOut): 994 // saved conf change must be reset in raft server after request completes 995 logger.Warn().Msg("proposal of conf change is time-out") 996 997 return nil, ErrConChangeTimeOut 998 } 999 } 1000 1001 func (cl *Cluster) AfterConfChange(cc *raftpb.ConfChange, member *consensus.Member, err error) { 1002 cl.Lock() 1003 defer cl.Unlock() 1004 1005 // TODO XXX if leader is rebooted, savedChange will be nil, so need to handle this situation 1006 if cl.savedChange == nil || cl.savedChange.Cc.ID != cc.ID { 1007 return 1008 } 1009 1010 propose := cl.savedChange 1011 1012 logger.Info().Str("req", util.JSON(propose.Cc)).Msg("conf change succeed") 1013 1014 cl.resetSavedConfChangePropose() 1015 1016 if propose.ReplyC != nil { 1017 propose.ReplyC <- &consensus.ConfChangeReply{Member: member, Err: err} 1018 close(propose.ReplyC) 1019 } 1020 } 1021 1022 func (cl *Cluster) saveConfChangePropose(ccPropose *consensus.ConfChangePropose) { 1023 logger.Debug().Uint64("ccid", ccPropose.Cc.ID).Msg("this conf change propose is saved in cluster") 1024 cl.savedChange = ccPropose 1025 } 1026 1027 func (cl *Cluster) resetSavedConfChangePropose() { 1028 var ccid uint64 1029 1030 if cl.savedChange == nil { 1031 return 1032 } 1033 1034 ccid = cl.savedChange.Cc.ID 1035 1036 logger.Debug().Uint64("requestID", ccid).Msg("reset saved conf change propose") 1037 1038 cl.savedChange = nil 1039 } 1040 1041 var ( 1042 ErrRaftStatusEmpty = errors.New("raft status is empty") 1043 ) 1044 1045 // isEnableChangeMembership check if membership change request can stop cluster. 1046 // case add : current avaliable node < (n + 1)/ 2 + 1 1047 // case remove : avaliable node except node to remove < (n - 1) / 2 - 1 1048 // 1049 // Default : 1050 // - Add : 1 node라도 장애 node or slow node가 존재하면 add는 불가 1051 // slow node기준 - block 높이가 100이상 차이 나는 경우 1052 // - Remove : 1053 // 현재 cluster가 available 해야함 1054 // node를 뺄때는 (정상node - 1) >= (n - 1) / 2 + 1 이어야함 1055 // slow node는 항상 뺄수 있다. slow node를 뺌으로써 cluster를 정상으로 만들기 위함 1056 // - force 모드: 무조건 실행 한다. 1057 func (cl *Cluster) isEnableChangeMembership(cc *raftpb.ConfChange) error { 1058 status := cl.rs.Status() 1059 if status.ID == 0 { 1060 logger.Debug().Msg("raft node is not initialized") 1061 return ErrRaftStatusEmpty 1062 } 1063 1064 cp, err := cl.rs.GetClusterProgress() 1065 if err != nil { 1066 logger.Error().Err(err).Msg("failed to get cluster progress") 1067 return err 1068 } 1069 1070 logger.Info().Str("info", cp.ToString()).Msg("cluster progress") 1071 1072 getHealthyMembers := func(cp *ClusterProgress) int { 1073 var healthy int 1074 1075 for _, mp := range cp.MemberProgresses { 1076 if mp.Status == MemberProgressStateHealthy { 1077 healthy++ 1078 } 1079 } 1080 1081 return healthy 1082 } 1083 1084 isClusterAvilable := func(total int, healthy int) bool { 1085 quorum := total/2 + 1 1086 1087 logger.Info().Int("quorum", quorum).Int("total", total).Int("healthy", healthy).Msg("cluster quorum") 1088 1089 return healthy >= quorum 1090 } 1091 1092 healthy := getHealthyMembers(cp) 1093 1094 if !isClusterAvilable(cp.N, healthy) { 1095 logger.Warn().Msg("curretn cluster status doesn't satisfy quorum") 1096 } 1097 1098 switch { 1099 case cc.Type == raftpb.ConfChangeAddNode: 1100 for _, mp := range cp.MemberProgresses { 1101 if mp.Status != MemberProgressStateHealthy { 1102 logger.Error().Uint64("slowgap", MaxSlowNodeGap).Str("unhealthy member", mp.ToString()).Msg("exist unhealthy member in cluster. If you want add some node, fix the unhealthy node and try again") 1103 return ErrUnhealtyNodeExist 1104 } 1105 } 1106 1107 return nil 1108 case cc.Type == raftpb.ConfChangeRemoveNode: 1109 mp, ok := cp.MemberProgresses[cc.NodeID] 1110 if !ok { 1111 logger.Error().Uint64("id", cc.NodeID).Msg("not exist progress of member") 1112 return ErrNotExitRaftProgress 1113 } 1114 1115 if mp.Status != MemberProgressStateHealthy { 1116 logger.Warn().Uint64("memberid", mp.MemberID).Msg("try to remove slow node") 1117 return nil 1118 } 1119 1120 if !isClusterAvilable(cp.N-1, healthy-1) { 1121 logger.Error().Msg("can't remove healthy node. If you remove this node, cluster can be stop.") 1122 return ErrRemoveHealthyNode 1123 } 1124 1125 return nil 1126 default: 1127 logger.Error().Msg("type of conf change is invalid") 1128 return ErrInvalidMembershipReqType 1129 } 1130 } 1131 1132 func (cl *Cluster) validateChangeMembership(cc *raftpb.ConfChange, member *consensus.Member, needlock bool) error { 1133 if member == nil { 1134 return ErrCCMemberIsNil 1135 } 1136 1137 if needlock { 1138 cl.Lock() 1139 defer cl.Unlock() 1140 } 1141 1142 appliedMembers := cl.AppliedMembers() 1143 1144 if member.ID == consensus.InvalidMemberID { 1145 return consensus.ErrInvalidMemberID 1146 } 1147 if cl.RemovedMembers().isExist(member.ID) { 1148 return ErrCCAlreadyRemoved 1149 } 1150 1151 switch cc.Type { 1152 case raftpb.ConfChangeAddNode: 1153 if !member.IsValid() { 1154 logger.Error().Str("member", member.ToString()).Msg("member has invalid fields") 1155 return ErrInvalidMember 1156 } 1157 1158 if m := appliedMembers.getMember(member.ID); m != nil { 1159 return ErrCCAlreadyAdded 1160 } 1161 1162 if err := appliedMembers.hasDuplicatedMember(member); err != nil { 1163 return err 1164 } 1165 1166 case raftpb.ConfChangeRemoveNode: 1167 var m *consensus.Member 1168 1169 if m = appliedMembers.getMember(member.ID); m == nil { 1170 return ErrCCNoMemberToRemove 1171 } 1172 1173 *member = *m 1174 default: 1175 return ErrInvCCType 1176 } 1177 1178 // - TODO UPDATE 1179 return nil 1180 } 1181 1182 func (cl *Cluster) makeConfChange(reqID uint64, reqType types.MembershipChangeType, member *consensus.Member) (*raftpb.ConfChange, error) { 1183 var changeType raftpb.ConfChangeType 1184 switch reqType { 1185 case types.MembershipChangeType_ADD_MEMBER: 1186 changeType = raftpb.ConfChangeAddNode 1187 case types.MembershipChangeType_REMOVE_MEMBER: 1188 changeType = raftpb.ConfChangeRemoveNode 1189 default: 1190 return nil, ErrInvalidMembershipReqType 1191 } 1192 1193 logger.Debug().Uint64("requestID", reqID).Str("member", member.ToString()).Msg("conf change target member") 1194 1195 cl.changeSeq++ 1196 1197 data, err := json.Marshal(member) 1198 if err != nil { 1199 return nil, err 1200 } 1201 1202 // generateConfChangeID 1203 cc := &raftpb.ConfChange{ID: reqID, Type: changeType, NodeID: uint64(member.ID), Context: data} 1204 1205 return cc, nil 1206 } 1207 1208 func EtcdIDToString(id uint64) string { 1209 return fmt.Sprintf("%x", id) 1210 }