github.com/aergoio/aergo@v1.3.1/consensus/impl/raftv2/raftserver.go (about) 1 // Copyright 2015 The etcd Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package raftv2 16 17 import ( 18 "bytes" 19 "context" 20 "encoding/binary" 21 "encoding/json" 22 "errors" 23 "fmt" 24 "github.com/aergoio/aergo/chain" 25 "github.com/aergoio/aergo/message" 26 "github.com/aergoio/aergo/p2p/p2pcommon" 27 "github.com/aergoio/aergo/pkg/component" 28 "github.com/gogo/protobuf/proto" 29 "io" 30 "os" 31 "runtime/debug" 32 "strconv" 33 "sync" 34 "time" 35 36 "github.com/aergoio/aergo/consensus" 37 "github.com/aergoio/aergo/types" 38 39 "github.com/aergoio/etcd/etcdserver/stats" 40 etcdtypes "github.com/aergoio/etcd/pkg/types" 41 raftlib "github.com/aergoio/etcd/raft" 42 "github.com/aergoio/etcd/raft/raftpb" 43 "github.com/aergoio/etcd/rafthttp" 44 "github.com/aergoio/etcd/snap" 45 ) 46 47 const ( 48 HasNoLeader uint64 = 0 49 ) 50 51 //noinspection ALL 52 var ( 53 raftLogger raftlib.Logger 54 ) 55 56 var ( 57 ErrRaftNotReady = errors.New("raft library is not initialized") 58 ErrCCAlreadyApplied = errors.New("conf change entry is already applied") 59 ErrInvalidMember = errors.New("member of conf change is invalid") 60 ErrCCAlreadyAdded = errors.New("member has already added") 61 ErrCCAlreadyRemoved = errors.New("member has already removed") 62 ErrCCNoMemberToRemove = errors.New("there is no member to remove") 63 ErrEmptySnapshot = errors.New("received empty snapshot") 64 ErrInvalidRaftIdentity = errors.New("raft identity is not set") 65 ErrProposeNilBlock = errors.New("proposed block is nil") 66 ) 67 68 const ( 69 BackendP2P = "aergop2p" 70 BackendHTTP = "http" 71 ) 72 73 func init() { 74 raftLogger = NewRaftLogger(logger) 75 } 76 77 // A key-value stream backed by raft 78 // A key-value stream backed by raft 79 type raftServer struct { 80 *component.ComponentHub 81 sync.RWMutex 82 83 pa p2pcommon.PeerAccessor 84 85 cluster *Cluster 86 87 confChangeC <-chan *consensus.ConfChangePropose // proposed cluster config changes 88 commitC chan *commitEntry // entries committed to log (k,v) 89 errorC chan error // errors from raft session 90 91 id uint64 // client ID for raft session 92 join bool // node is joining an existing cluster 93 UseBackup bool // use backup chaindb datafiles to join a existing cluster 94 getSnapshot func() ([]byte, error) 95 lastIndex uint64 // index of log at start 96 97 snapshotIndex uint64 98 appliedIndex uint64 99 100 // raft backing for the commit/error channel 101 node raftlib.Node 102 raftStorage *raftlib.MemoryStorage 103 //wal *wal.WAL 104 walDB *WalDB 105 106 snapshotter *ChainSnapshotter 107 108 snapFrequency uint64 109 transport Transporter 110 stopc chan struct{} // signals proposal channel closed 111 112 curTerm uint64 113 leaderStatus LeaderStatus 114 115 promotable bool 116 117 tickMS time.Duration 118 119 confState *raftpb.ConfState 120 121 commitProgress CommitProgress 122 } 123 124 type LeaderStatus struct { 125 sync.RWMutex 126 Leader uint64 127 Term uint64 128 leaderChanged uint64 129 IsLeader bool 130 } 131 132 type commitEntry struct { 133 block *types.Block 134 index uint64 135 term uint64 136 } 137 138 func (ce *commitEntry) IsReadyMarker() bool { 139 return ce.block == nil 140 } 141 142 type CommitProgress struct { 143 sync.Mutex 144 145 connect commitEntry // last connected entry to chain 146 request commitEntry // last requested entry to commitC 147 } 148 149 func (cp *CommitProgress) UpdateConnect(ce *commitEntry) { 150 logger.Debug().Uint64("term", ce.term).Uint64("index", ce.index).Uint64("no", ce.block.BlockNo()).Str("hash", ce.block.ID()).Msg("set progress of last connected block") 151 152 cp.Lock() 153 defer cp.Unlock() 154 155 cp.connect = *ce 156 } 157 158 func (cp *CommitProgress) UpdateRequest(ce *commitEntry) { 159 logger.Debug().Uint64("term", ce.term).Uint64("index", ce.index).Uint64("no", ce.block.BlockNo()).Str("hash", ce.block.ID()).Msg("set progress of last request block") 160 161 cp.Lock() 162 defer cp.Unlock() 163 164 cp.request = *ce 165 } 166 167 func (cp *CommitProgress) GetConnect() *commitEntry { 168 cp.Lock() 169 defer cp.Unlock() 170 171 return &cp.connect 172 } 173 174 func (cp *CommitProgress) GetRequest() *commitEntry { 175 cp.Lock() 176 defer cp.Unlock() 177 178 return &cp.request 179 } 180 181 func (cp *CommitProgress) IsReadyToPropose() bool { 182 cp.Lock() 183 defer cp.Unlock() 184 185 if cp.request.block == nil { 186 return true 187 } 188 189 var connNo, reqNo uint64 190 reqNo = cp.request.block.BlockNo() 191 if cp.connect.block != nil { 192 connNo = cp.connect.block.BlockNo() 193 } 194 195 if reqNo <= connNo { 196 return true 197 } 198 199 logger.Debug().Uint64("requested", reqNo).Uint64("connected", connNo).Msg("remain pending request to connect") 200 201 return false 202 } 203 204 func RecoverExit() { 205 if r := recover(); r != nil { 206 logger.Error().Str("callstack", string(debug.Stack())).Msg("panic occurred in raft server") 207 os.Exit(10) 208 } 209 } 210 211 func makeConfig(nodeID uint64, storage *raftlib.MemoryStorage) *raftlib.Config { 212 c := &raftlib.Config{ 213 ID: nodeID, 214 ElectionTick: ElectionTickCount, 215 HeartbeatTick: 1, 216 Storage: storage, 217 MaxSizePerMsg: 1024 * 1024, 218 MaxInflightMsgs: 256, 219 Logger: raftLogger, 220 CheckQuorum: true, 221 DisableProposalForwarding: true, 222 } 223 224 return c 225 } 226 227 // newRaftServer initiates a raft instance and returns a committed log entry 228 // channel and error channel. Proposals for log updates are sent over the 229 // provided the proposal channel. All log entries are replayed over the 230 // commit channel, followed by a nil message (to indicate the channel is 231 // current), then new log entries. To shutdown, close proposeC and read errorC. 232 func newRaftServer(hub *component.ComponentHub, 233 cluster *Cluster, 234 join bool, useBackup bool, 235 getSnapshot func() ([]byte, error), 236 tickMS time.Duration, 237 confChangeC chan *consensus.ConfChangePropose, 238 commitC chan *commitEntry, 239 delayPromote bool, 240 chainWal consensus.ChainWAL) *raftServer { 241 242 errorC := make(chan error, 1) 243 244 rs := &raftServer{ 245 ComponentHub: hub, 246 RWMutex: sync.RWMutex{}, 247 cluster: cluster, 248 walDB: NewWalDB(chainWal), 249 confChangeC: confChangeC, 250 commitC: commitC, 251 errorC: errorC, 252 join: join, 253 UseBackup: useBackup, 254 getSnapshot: getSnapshot, 255 snapFrequency: ConfSnapFrequency, 256 stopc: make(chan struct{}), 257 258 // rest of structure populated after WAL replay 259 promotable: true, 260 tickMS: tickMS, 261 commitProgress: CommitProgress{}, 262 } 263 264 if delayPromote { 265 rs.SetPromotable(false) 266 } 267 268 rs.snapshotter = newChainSnapshotter(nil, rs.ComponentHub, rs.cluster, rs.walDB, func() uint64 { return rs.GetLeader() }) 269 270 return rs 271 } 272 273 func (rs *raftServer) SetPeerAccessor(pa p2pcommon.PeerAccessor) { 274 rs.pa = pa 275 rs.snapshotter.setPeerAccessor(pa) 276 } 277 278 func (rs *raftServer) SetPromotable(val bool) { 279 rs.Lock() 280 defer rs.Unlock() 281 282 rs.promotable = val 283 } 284 285 func (rs *raftServer) GetPromotable() bool { 286 rs.RLock() 287 defer rs.RUnlock() 288 289 val := rs.promotable 290 291 return val 292 } 293 294 func (rs *raftServer) Start() { 295 go rs.startRaft() 296 } 297 298 func (rs *raftServer) makeStartPeers() ([]raftlib.Peer, error) { 299 return rs.cluster.getStartPeers() 300 } 301 302 type RaftServerState int 303 304 const ( 305 RaftServerStateRestart = iota 306 RaftServerStateNewCluster 307 RaftServerStateJoinCluster 308 ) 309 310 func (rs *raftServer) startRaft() { 311 var node raftlib.Node 312 313 getState := func() RaftServerState { 314 hasWal, err := rs.walDB.HasWal(rs.cluster.identity) 315 if err != nil { 316 logger.Info().Err(err).Msg("wal database of raft is missing or not valid") 317 } 318 319 switch { 320 case hasWal: 321 return RaftServerStateRestart 322 case rs.join: 323 return RaftServerStateJoinCluster 324 default: 325 return RaftServerStateNewCluster 326 } 327 328 } 329 330 isEmptyLog := func() bool { 331 var ( 332 last uint64 333 err error 334 ) 335 336 if last, err = rs.walDB.GetRaftEntryLastIdx(); err != nil { 337 return true 338 } 339 340 // If joined node is crashed before writing snapshot, it may occur that last index is 0 and there is not snapshot. 341 if last == 0 { 342 if tmpsnap, err := rs.walDB.GetSnapshot(); tmpsnap == nil || err != nil { 343 return true 344 } 345 } 346 347 return false 348 } 349 350 switch getState() { 351 case RaftServerStateRestart: 352 logger.Info().Msg("raft restart from wal") 353 354 rs.cluster.ResetMembers() 355 356 if isEmptyLog() { 357 logger.Info().Msg("there is no log, so import cluster information from remote. This server may have been added and terminated before the first synchronization was completed") 358 359 if _, err := rs.ImportExistingCluster(); err != nil { 360 logger.Fatal().Err(err).Str("mine", rs.cluster.toString()).Msg("failed to import existing cluster info") 361 } 362 } 363 364 node = rs.restartNode(false) 365 366 case RaftServerStateJoinCluster: 367 logger.Info().Msg("raft start and join existing cluster") 368 369 var ( 370 hardstateinfo *types.HardStateInfo 371 err error 372 ) 373 374 rs.cluster.ResetMembers() 375 376 // get cluster info from existing cluster member and hardstate of bestblock 377 if hardstateinfo, err = rs.ImportExistingCluster(); err != nil { 378 logger.Fatal().Err(err).Str("mine", rs.cluster.toString()).Msg("failed to import existing cluster info") 379 } 380 381 if rs.UseBackup { 382 logger.Info().Msg("raft use given backup as wal") 383 384 if err := rs.walDB.ResetWAL(hardstateinfo); err != nil { 385 logger.Fatal().Err(err).Msg("reset wal failed for raft") 386 } 387 388 if err := rs.SaveIdentity(); err != nil { 389 logger.Fatal().Err(err).Msg("failed to save identity") 390 } 391 392 node = rs.restartNode(true) 393 394 logger.Info().Msg("raft restarted from backup") 395 } else { 396 node = rs.startNode(nil) 397 } 398 case RaftServerStateNewCluster: 399 logger.Info().Bool("usebackup", rs.UseBackup).Msg("raft start and make new cluster") 400 401 if rs.UseBackup { 402 rs.walDB.ClearWAL() 403 } 404 405 var startPeers []raftlib.Peer 406 407 startPeers, err := rs.makeStartPeers() 408 if err != nil { 409 logger.Fatal().Err(err).Msg("failed to make raft peer list") 410 } 411 412 node = rs.startNode(startPeers) 413 } 414 415 // need locking for sync with consensusAccessor 416 rs.setNodeSync(node) 417 418 rs.startTransport() 419 420 go rs.serveChannels() 421 } 422 423 func (rs *raftServer) ImportExistingCluster() (*types.HardStateInfo, error) { 424 logger.Info().Msg("import cluster information from remote") 425 426 // get cluster info from existing cluster member and hardstate of bestblock 427 existCluster, hardstateinfo, err := rs.GetExistingCluster() 428 if err != nil { 429 logger.Fatal().Err(err).Str("mine", rs.cluster.toString()).Msg("failed to get existing cluster info") 430 } 431 432 if hardstateinfo != nil { 433 logger.Info().Str("hardstate", hardstateinfo.ToString()).Msg("received hard state of best hash from remote cluster") 434 } 435 436 // config validate 437 if !rs.cluster.ValidateAndMergeExistingCluster(existCluster) { 438 logger.Fatal().Str("existcluster", existCluster.toString()).Str("mycluster", rs.cluster.toString()).Msg("this cluster configuration is not compatible with existing cluster") 439 } 440 441 return hardstateinfo, nil 442 } 443 444 func (rs *raftServer) ID() uint64 { 445 return rs.cluster.NodeID() 446 } 447 448 func (rs *raftServer) startNode(startPeers []raftlib.Peer) raftlib.Node { 449 var ( 450 blk *types.Block 451 err error 452 id *consensus.RaftIdentity 453 ) 454 validateEmpty := func() { 455 if blk, err = rs.walDB.GetBestBlock(); err != nil { 456 logger.Fatal().Err(err).Msg("failed to get best block, so failed to start raft server") 457 } 458 if blk.BlockNo() > 0 { 459 if rs.UseBackup { 460 logger.Info().Uint64("best no", blk.BlockNo()).Str("best hash", blk.ID()).Msg("start from existing block chain") 461 } else { 462 logger.Fatal().Err(err).Msg("blockchain data is not empty, so failed to start raft server") 463 } 464 } 465 466 if id, err = rs.walDB.GetIdentity(); err == nil && id != nil { 467 logger.Fatal().Err(err).Str("id", id.ToString()).Msg("raft identity is already written. so failed to start raft server") 468 } 469 } 470 471 validateEmpty() 472 473 if err := rs.cluster.SetThisNodeID(); err != nil { 474 logger.Fatal().Err(err).Msg("failed to set id of this node") 475 } 476 477 // when join to exising cluster, cluster ID is already set 478 if rs.cluster.ClusterID() == InvalidClusterID { 479 rs.cluster.GenerateID(rs.UseBackup) 480 } 481 482 if err := rs.SaveIdentity(); err != nil { 483 logger.Fatal().Err(err).Str("identity", rs.cluster.identity.ToString()).Msg("failed to save identity") 484 } 485 486 rs.raftStorage = raftlib.NewMemoryStorage() 487 488 c := makeConfig(rs.ID(), rs.raftStorage) 489 490 logger.Info().Msg("raft node start") 491 492 return raftlib.StartNode(c, startPeers) 493 } 494 495 func (rs *raftServer) restartNode(join bool) raftlib.Node { 496 snapshot, err := rs.loadSnapshot() 497 if err != nil { 498 logger.Fatal().Err(err).Msg("failed to read snapshot") 499 } 500 501 if err := rs.replayWAL(snapshot); err != nil { 502 logger.Fatal().Err(err).Msg("replay wal failed for raft") 503 } 504 505 // members of cluster will be loaded from snapshot or wal 506 // When restart from join, cluster must not recover from temporary snapshot since members are empty. 507 // Instead, node must use a cluster info received from remote server 508 if join == false && snapshot != nil { 509 if _, err := rs.cluster.Recover(snapshot); err != nil { 510 logger.Fatal().Err(err).Msg("failt to recover cluster from snapshot") 511 } 512 } 513 514 c := makeConfig(rs.ID(), rs.raftStorage) 515 516 logger.Info().Msg("raft node restart") 517 518 return raftlib.RestartNode(c) 519 } 520 521 func (rs *raftServer) startTransport() { 522 //rs.transport = rs.createHttpTransporter() 523 rs.transport = rs.createAergoP2PTransporter() 524 525 if err := rs.transport.Start(); err != nil { 526 logger.Fatal().Err(err).Msg("failed to start raft http") 527 } 528 529 for _, member := range rs.cluster.Members().MapByID { 530 if rs.cluster.NodeID() != member.ID { 531 rs.transport.AddPeer(etcdtypes.ID(member.ID), member.GetPeerID(), []string{member.Address}) 532 } 533 } 534 } 535 536 func (rs *raftServer) createHttpTransporter() Transporter { 537 transporter := &HttpTransportWrapper{Transport: rafthttp.Transport{ 538 ID: etcdtypes.ID(rs.ID()), 539 ClusterID: etcdtypes.ID(rs.cluster.ClusterID()), 540 Raft: rs, 541 ServerStats: stats.NewServerStats("", ""), 542 LeaderStats: stats.NewLeaderStats(strconv.FormatUint(rs.ID(), 10)), 543 Snapshotter: rs.snapshotter, 544 ErrorC: rs.errorC, 545 }} 546 transporter.SetLogger(httpLogger) 547 return transporter 548 } 549 550 func (rs *raftServer) createAergoP2PTransporter() Transporter { 551 future := rs.RequestFuture(message.P2PSvc, message.GetRaftTransport{rs.cluster}, time.Second<<4, "getbackend") 552 result, err := future.Result() 553 if err != nil { 554 panic(err.Error()) 555 } 556 return result.(Transporter) 557 } 558 559 func (rs *raftServer) SaveIdentity() error { 560 if rs.cluster.ClusterID() == 0 || rs.cluster.NodeID() == consensus.InvalidMemberID || len(rs.cluster.NodeName()) == 0 || len(rs.cluster.NodePeerID()) == 0 { 561 logger.Error().Str("identity", rs.cluster.identity.ToString()).Msg("failed to save raft identity. identity is invalid") 562 return ErrInvalidRaftIdentity 563 } 564 565 if err := rs.walDB.WriteIdentity(&rs.cluster.identity); err != nil { 566 logger.Fatal().Err(err).Msg("failed to write raft identity to wal") 567 return err 568 } 569 570 return nil 571 } 572 573 func (rs *raftServer) setNodeSync(node raftlib.Node) { 574 rs.Lock() 575 defer rs.Unlock() 576 577 rs.node = node 578 } 579 580 func (rs *raftServer) getNodeSync() raftlib.Node { 581 var node raftlib.Node 582 583 rs.RLock() 584 defer rs.RUnlock() 585 586 node = rs.node 587 588 return node 589 } 590 591 // stop closes http, closes all channels, and stops raft. 592 func (rs *raftServer) stop() { 593 logger.Info().Msg("stop raft server") 594 595 rs.stopHTTP() 596 close(rs.commitC) 597 close(rs.errorC) 598 rs.node.Stop() 599 } 600 601 func (rs *raftServer) stopHTTP() { 602 rs.transport.Stop() 603 } 604 605 func (rs *raftServer) writeError(err error) { 606 logger.Error().Err(err).Msg("write err has occurend raft server. ") 607 } 608 609 // TODO timeout handling with context 610 func (rs *raftServer) Propose(block *types.Block) error { 611 if block == nil { 612 return ErrProposeNilBlock 613 } 614 logger.Debug().Msg("propose block") 615 616 if data, err := marshalEntryData(block); err == nil { 617 // blocks until accepted by raft state machine 618 if err := rs.node.Propose(context.TODO(), data); err != nil { 619 return err 620 } 621 622 logger.Debug().Int("len", len(data)).Msg("proposed data to raft node") 623 } else { 624 logger.Fatal().Err(err).Msg("poposed data is invalid") 625 } 626 627 return nil 628 } 629 630 func (rs *raftServer) saveConfChangeState(id uint64, state types.ConfChangeState, errCC error) error { 631 var errStr string 632 633 if errCC != nil { 634 errStr = errCC.Error() 635 } 636 637 pr := types.ConfChangeProgress{State: state, Err: errStr, Members: rs.cluster.appliedMembers.ToMemberAttrArray()} 638 639 return rs.walDB.WriteConfChangeProgress(id, &pr) 640 } 641 642 func (rs *raftServer) serveConfChange() { 643 handleConfChange := func(propose *consensus.ConfChangePropose) { 644 var err error 645 646 err = rs.node.ProposeConfChange(context.TODO(), *propose.Cc) 647 if err != nil { 648 logger.Error().Err(err).Msg("failed to propose configure change") 649 rs.cluster.AfterConfChange(propose.Cc, nil, err) 650 } 651 652 err = rs.saveConfChangeState(propose.Cc.ID, types.ConfChangeState_CONF_CHANGE_STATE_PROPOSED, err) 653 if err != nil { 654 logger.Error().Err(err).Msg("failed to save progress of configure change") 655 } 656 } 657 658 // send proposals over raft 659 for rs.confChangeC != nil { 660 select { 661 case confChangePropose, ok := <-rs.confChangeC: 662 if !ok { 663 rs.confChangeC = nil 664 } else { 665 handleConfChange(confChangePropose) 666 } 667 } 668 } 669 // client closed channel; shutdown raft if not already 670 close(rs.stopc) 671 } 672 673 func (rs *raftServer) serveChannels() { 674 defer RecoverExit() 675 676 snapshot, err := rs.raftStorage.Snapshot() 677 if err != nil { 678 panic(err) 679 } 680 rs.setConfState(&snapshot.Metadata.ConfState) 681 rs.setSnapshotIndex(snapshot.Metadata.Index) 682 rs.setAppliedIndex(snapshot.Metadata.Index) 683 684 ticker := time.NewTicker(rs.tickMS) 685 defer ticker.Stop() 686 687 go rs.serveConfChange() 688 689 // event loop on raft state machine updates 690 for { 691 select { 692 case <-ticker.C: 693 if rs.GetPromotable() { 694 rs.node.Tick() 695 } 696 697 // store raft entries to walDB, then publish over commit channel 698 case rd := <-rs.node.Ready(): 699 if len(rd.Entries) > 0 || len(rd.CommittedEntries) > 0 || !raftlib.IsEmptyHardState(rd.HardState) || rd.SoftState != nil { 700 logger.Debug().Int("entries", len(rd.Entries)).Int("commitentries", len(rd.CommittedEntries)).Str("hardstate", types.RaftHardStateToString(rd.HardState)).Msg("ready to process") 701 } 702 703 if rs.IsLeader() { 704 if err := rs.processMessages(rd.Messages); err != nil { 705 logger.Fatal().Err(err).Msg("leader process message error") 706 } 707 } 708 709 if err := rs.walDB.SaveEntry(rd.HardState, rd.Entries); err != nil { 710 logger.Fatal().Err(err).Msg("failed to save entry to wal") 711 } 712 713 if !raftlib.IsEmptySnap(rd.Snapshot) { 714 if err := rs.walDB.WriteSnapshot(&rd.Snapshot); err != nil { 715 logger.Fatal().Err(err).Msg("failed to save snapshot to wal") 716 } 717 718 if err := rs.raftStorage.ApplySnapshot(rd.Snapshot); err != nil { 719 logger.Fatal().Err(err).Msg("failed to apply snapshot") 720 } 721 722 if err := rs.publishSnapshot(rd.Snapshot); err != nil { 723 logger.Fatal().Err(err).Msg("failed to publish snapshot") 724 } 725 } 726 if err := rs.raftStorage.Append(rd.Entries); err != nil { 727 logger.Fatal().Err(err).Msg("failed to append new entries to raft log") 728 } 729 730 if !rs.IsLeader() { 731 if err := rs.processMessages(rd.Messages); err != nil { 732 logger.Fatal().Err(err).Msg("process message error") 733 } 734 } 735 if ok := rs.publishEntries(rs.entriesToApply(rd.CommittedEntries)); !ok { 736 rs.stop() 737 return 738 } 739 rs.triggerSnapshot() 740 741 // New block must be created after connecting all commited block 742 if !raftlib.IsEmptyHardState(rd.HardState) { 743 rs.updateTerm(rd.HardState.Term) 744 } 745 746 if rd.SoftState != nil { 747 rs.updateLeader(rd.SoftState) 748 } 749 750 rs.node.Advance() 751 case err := <-rs.errorC: 752 rs.writeError(err) 753 return 754 755 case <-rs.stopc: 756 rs.stop() 757 return 758 } 759 } 760 } 761 762 func (rs *raftServer) processMessages(msgs []raftpb.Message) error { 763 var err error 764 var tmpSnapMsg *snap.Message 765 766 snapMsgs := make([]*snap.Message, 0) 767 // reset MsgSnap to send snap.Message 768 for i, msg := range msgs { 769 if msg.Type == raftpb.MsgSnap { 770 tmpSnapMsg, err = rs.makeSnapMessage(&msg) 771 if err != nil { 772 return err 773 } 774 snapMsgs = append(snapMsgs, tmpSnapMsg) 775 776 msgs[i].To = 0 777 } 778 } 779 780 rs.transport.Send(msgs) 781 782 for _, tmpSnapMsg := range snapMsgs { 783 rs.transport.SendSnapshot(*tmpSnapMsg) 784 } 785 786 return nil 787 } 788 789 func (rs *raftServer) makeSnapMessage(msg *raftpb.Message) (*snap.Message, error) { 790 if msg.Type != raftpb.MsgSnap { 791 return nil, ErrNotMsgSnap 792 } 793 794 /* 795 // make snapshot with last progress of raftserver 796 snapshot, err := rs.snapshotter.createSnapshot(rs.prevProgress, rs.confState) 797 if err != nil { 798 return nil, err 799 } 800 801 msg.Snapshot = *snapshot 802 */ 803 // TODO add cluster info to snapshot.data 804 805 logger.Debug().Uint64("term", msg.Snapshot.Metadata.Term).Uint64("index", msg.Snapshot.Metadata.Index).Msg("send merged snapshot message") 806 807 // not using pipe to send snapshot 808 pr, pw := io.Pipe() 809 810 go func() { 811 buf := new(bytes.Buffer) 812 err := binary.Write(buf, binary.LittleEndian, int32(1)) 813 if err != nil { 814 logger.Fatal().Err(err).Msg("raft pipe binary write err") 815 } 816 817 n, err := pw.Write(buf.Bytes()) 818 if err == nil { 819 logger.Debug().Msgf("wrote database snapshot out [total bytes: %d]", n) 820 } else { 821 logger.Error().Msgf("failed to write database snapshot out [written bytes: %d]: %v", n, err) 822 } 823 if err := pw.CloseWithError(err); err != nil { 824 logger.Fatal().Err(err).Msg("raft pipe close error") 825 } 826 }() 827 828 return snap.NewMessage(*msg, pr, 4), nil 829 } 830 831 func (rs *raftServer) loadSnapshot() (*raftpb.Snapshot, error) { 832 snapshot, err := rs.walDB.GetSnapshot() 833 if err != nil { 834 logger.Fatal().Err(err).Msg("error loading snapshot") 835 return nil, err 836 } 837 838 if snapshot == nil { 839 logger.Info().Msg("snapshot does not exist") 840 return nil, nil 841 } 842 843 snapdata := &consensus.SnapshotData{} 844 845 err = snapdata.Decode(snapshot.Data) 846 if err != nil { 847 logger.Fatal().Err(err).Msg("error decoding snapshot") 848 return nil, err 849 } 850 851 logger.Info().Str("meta", consensus.SnapToString(snapshot, snapdata)).Msg("loaded snapshot meta") 852 853 return snapshot, nil 854 } 855 856 // replayWAL replays WAL entries into the raft instance. 857 func (rs *raftServer) replayWAL(snapshot *raftpb.Snapshot) error { 858 logger.Info().Msg("replaying WAL") 859 860 identity, st, ents, err := rs.walDB.ReadAll(snapshot) 861 if err != nil { 862 logger.Fatal().Err(err).Msg("failed to read WAL") 863 return err 864 } 865 866 if err := rs.cluster.RecoverIdentity(identity); err != nil { 867 logger.Fatal().Err(err).Msg("failed to recover raft identity from wal") 868 } 869 870 rs.raftStorage = raftlib.NewMemoryStorage() 871 if snapshot != nil { 872 if err := rs.raftStorage.ApplySnapshot(*snapshot); err != nil { 873 logger.Fatal().Err(err).Msg("failed to apply snapshot to reaply wal") 874 } 875 } 876 if err := rs.raftStorage.SetHardState(*st); err != nil { 877 logger.Fatal().Err(err).Msg("failed to set hard state to reaply wal") 878 } 879 880 // append to storage so raft starts at the right place in log 881 if err := rs.raftStorage.Append(ents); err != nil { 882 logger.Fatal().Err(err).Msg("failed to append entries to reaply wal") 883 } 884 // send nil once lastIndex is published so client knows commit channel is current 885 if len(ents) > 0 { 886 rs.lastIndex = ents[len(ents)-1].Index 887 } 888 889 rs.updateTerm(st.Term) 890 891 logger.Info().Uint64("lastindex", rs.lastIndex).Msg("replaying WAL done") 892 893 return nil 894 } 895 896 /* 897 // createSnapshot make marshalled data of chain & cluster info 898 func (rs *raftServer) createSnapshot() ([]byte, error) { 899 // this snapshot is used when reboot and initialize raft log 900 if rs.prevProgress.isEmpty() { 901 logger.Fatal().Msg("last applied block is nil") 902 } 903 904 snapBlock := rs.prevProgress.block 905 906 logger.Info().Str("hash", snapBlock.ID()).Uint64("no", snapBlock.BlockNo()).Msg("create new snapshot of chain") 907 908 snap := consensus.NewChainSnapshot(snapBlock) 909 if snap == nil { 910 panic("new snap failed") 911 } 912 913 return snap.Encode() 914 }*/ 915 916 // triggerSnapshot create snapshot and make compaction for raft log storage 917 // raft can not wait until last applied entry commits. so snapshot must create from current best block. 918 // 919 // @ MatchBlockAndCluster 920 // snapshot use current state of cluster and confstate. but last applied block may not be commited yet. 921 // so raft use last commited block. because of this, some conf change log can cause error on node that received snapshot 922 func (rs *raftServer) triggerSnapshot() { 923 ce := rs.commitProgress.GetConnect() 924 newSnapshotIndex, snapBlock := ce.index, ce.block 925 926 if newSnapshotIndex == 0 || rs.confState == nil { 927 return 928 } 929 930 if len(rs.confState.Nodes) == 0 { 931 // TODO Fatal -> Error after test 932 logger.Fatal().Msg("confstate node is empty for snapshot") 933 return 934 } 935 936 if newSnapshotIndex-rs.snapshotIndex <= rs.snapFrequency { 937 return 938 } 939 940 logger.Info().Uint64("applied", rs.appliedIndex).Uint64("new snap index", newSnapshotIndex).Uint64("last snapshot index", rs.snapshotIndex).Msg("start snapshot") 941 942 // make snapshot data of best block 943 snapdata, err := rs.snapshotter.createSnapshotData(rs.cluster, snapBlock, rs.confState) 944 if err != nil { 945 logger.Fatal().Err(err).Msg("failed to create snapshot data from prev block") 946 } 947 948 data, err := snapdata.Encode() 949 if err != nil { 950 logger.Fatal().Err(err).Msg("failed to marshal snapshot data") 951 } 952 953 // snapshot.data is not used for snapshot transfer. At the time of transmission, a message is generated again with information at that time and sent. 954 snapshot, err := rs.raftStorage.CreateSnapshot(newSnapshotIndex, rs.confState, data) 955 if err != nil { 956 logger.Fatal().Err(err).Msg("failed to create snapshot") 957 } 958 959 // save snapshot to wal 960 if err := rs.walDB.WriteSnapshot(&snapshot); err != nil { 961 logger.Fatal().Err(err).Msg("failed to write snapshot") 962 } 963 964 compactIndex := uint64(1) 965 if newSnapshotIndex > ConfSnapshotCatchUpEntriesN { 966 compactIndex = newSnapshotIndex - ConfSnapshotCatchUpEntriesN 967 } 968 if err := rs.raftStorage.Compact(compactIndex); err != nil { 969 if err == raftlib.ErrCompacted { 970 return 971 } 972 panic(err) 973 } 974 975 logger.Info().Uint64("index", compactIndex).Msg("compacted raftLog.at index") 976 rs.setSnapshotIndex(newSnapshotIndex) 977 978 _ = chain.TestDebugger.Check(chain.DEBUG_RAFT_SNAP_FREQ, 0, 979 func(freq int) error { 980 rs.snapFrequency = uint64(freq) 981 return nil 982 }) 983 } 984 985 func (rs *raftServer) publishSnapshot(snapshotToSave raftpb.Snapshot) error { 986 updateProgress := func() error { 987 var snapdata = &consensus.SnapshotData{} 988 989 err := snapdata.Decode(snapshotToSave.Data) 990 if err != nil { 991 logger.Error().Msg("failed to unmarshal snapshot data to progress") 992 return err 993 } 994 995 block, err := rs.walDB.GetBlockByNo(snapdata.Chain.No) 996 if err != nil { 997 logger.Fatal().Msg("failed to get synchronized block") 998 return err 999 } 1000 1001 rs.commitProgress.UpdateConnect(&commitEntry{block: block, index: snapshotToSave.Metadata.Index, term: snapshotToSave.Metadata.Term}) 1002 1003 return nil 1004 } 1005 1006 if raftlib.IsEmptySnap(snapshotToSave) { 1007 return ErrEmptySnapshot 1008 } 1009 1010 logger.Info().Uint64("index", rs.snapshotIndex).Str("snap", consensus.SnapToString(&snapshotToSave, nil)).Msg("publishing snapshot at index") 1011 defer logger.Info().Uint64("index", rs.snapshotIndex).Msg("finished publishing snapshot at index") 1012 1013 if snapshotToSave.Metadata.Index <= rs.appliedIndex { 1014 logger.Fatal().Msgf("snapshot index [%d] should > progress.appliedIndex [%d] + 1", snapshotToSave.Metadata.Index, rs.appliedIndex) 1015 } 1016 //rs.commitC <- nil // trigger kvstore to load snapshot 1017 1018 rs.setConfState(&snapshotToSave.Metadata.ConfState) 1019 rs.setSnapshotIndex(snapshotToSave.Metadata.Index) 1020 rs.setAppliedIndex(snapshotToSave.Metadata.Index) 1021 1022 var ( 1023 isEqual bool 1024 err error 1025 ) 1026 1027 if isEqual, err = rs.cluster.Recover(&snapshotToSave); err != nil { 1028 return err 1029 } 1030 1031 if !isEqual { 1032 rs.recoverTransport() 1033 } 1034 1035 return updateProgress() 1036 } 1037 1038 func (rs *raftServer) recoverTransport() { 1039 logger.Info().Msg("remove all peers") 1040 rs.transport.RemoveAllPeers() 1041 1042 for _, m := range rs.cluster.AppliedMembers().MapByID { 1043 if m.ID == rs.cluster.NodeID() { 1044 continue 1045 } 1046 1047 logger.Info().Str("member", m.ToString()).Msg("add raft peer") 1048 rs.transport.AddPeer(etcdtypes.ID(uint64(m.ID)), m.GetPeerID(), []string{m.Address}) 1049 } 1050 } 1051 1052 func (rs *raftServer) entriesToApply(ents []raftpb.Entry) (nents []raftpb.Entry) { 1053 if len(ents) == 0 { 1054 return 1055 } 1056 firstIdx := ents[0].Index 1057 if firstIdx > rs.appliedIndex+1 { 1058 logger.Fatal().Msgf("first index of committed entry[%d] should <= progress.appliedIndex[%d] 1", firstIdx, rs.appliedIndex) 1059 } 1060 if rs.appliedIndex-firstIdx+1 < uint64(len(ents)) { 1061 nents = ents[rs.appliedIndex-firstIdx+1:] 1062 } 1063 return nents 1064 } 1065 1066 var ( 1067 ErrInvCCType = errors.New("change type of ") 1068 ) 1069 1070 func unmarshalConfChangeEntry(entry *raftpb.Entry) (*raftpb.ConfChange, *consensus.Member, error) { 1071 var cc raftpb.ConfChange 1072 1073 if err := cc.Unmarshal(entry.Data); err != nil { 1074 logger.Fatal().Err(err).Uint64("idx", entry.Index).Uint64("term", entry.Term).Msg("failed to unmarshal of conf change entry") 1075 return nil, nil, err 1076 } 1077 1078 // skip confChange of empty context 1079 if len(cc.Context) == 0 { 1080 return nil, nil, nil 1081 } 1082 1083 var member = consensus.Member{} 1084 if err := json.Unmarshal(cc.Context, &member); err != nil { 1085 logger.Fatal().Err(err).Uint64("idx", entry.Index).Uint64("term", entry.Term).Msg("failed to unmarshal of context of cc entry") 1086 return nil, nil, err 1087 } 1088 1089 return &cc, &member, nil 1090 } 1091 1092 func (rs *raftServer) ValidateConfChangeEntry(entry *raftpb.Entry) (*raftpb.ConfChange, *consensus.Member, error) { 1093 // TODO XXX validate from current cluster configure 1094 var cc *raftpb.ConfChange 1095 var member *consensus.Member 1096 var err error 1097 1098 alreadyApplied := func(entry *raftpb.Entry) bool { 1099 return rs.cluster.appliedTerm >= entry.Term || rs.cluster.appliedIndex >= entry.Index 1100 } 1101 1102 cc, member, err = unmarshalConfChangeEntry(entry) 1103 if err != nil { 1104 logger.Fatal().Err(err).Str("entry", entry.String()).Uint64("requestID", cc.ID).Msg("failed to unmarshal conf change") 1105 } 1106 1107 if alreadyApplied(entry) { 1108 return cc, member, ErrCCAlreadyApplied 1109 } 1110 1111 if err = rs.cluster.validateChangeMembership(cc, member, true); err != nil { 1112 return cc, member, err 1113 } 1114 1115 return cc, member, nil 1116 } 1117 1118 // TODO refactoring by cc.Type 1119 // separate unmarshal & apply[type] 1120 // applyConfChange returns false if this node is removed from cluster 1121 func (rs *raftServer) applyConfChange(ent *raftpb.Entry) bool { 1122 var cc *raftpb.ConfChange 1123 var member *consensus.Member 1124 var err error 1125 1126 postWork := func(err error) bool { 1127 if err != nil { 1128 cc.NodeID = raftlib.None 1129 rs.node.ApplyConfChange(*cc) 1130 } 1131 1132 if cc.ID != 0 { 1133 if err = rs.saveConfChangeState(cc.ID, types.ConfChangeState_CONF_CHANGE_STATE_APPLIED, err); err != nil { 1134 logger.Error().Err(err).Msg("failed to save conf change status") 1135 } 1136 rs.cluster.AfterConfChange(cc, member, err) 1137 } 1138 return true 1139 } 1140 1141 // ConfChanges may be applied more than once. This is because cluster information is more up-to-date than block information when a snapshot is received. 1142 if cc, member, err = rs.ValidateConfChangeEntry(ent); err != nil { 1143 logger.Warn().Err(err).Str("entry", types.RaftEntryToString(ent)).Str("cluster", rs.cluster.toString()).Msg("failed to validate conf change") 1144 return postWork(err) 1145 } 1146 1147 rs.confState = rs.node.ApplyConfChange(*cc) 1148 1149 logger.Info().Uint64("requestID", cc.ID).Str("type", cc.Type.String()).Str("member", member.ToString()).Msg("publish conf change entry") 1150 1151 switch cc.Type { 1152 case raftpb.ConfChangeAddNode: 1153 if err := rs.cluster.addMember(member, true); err != nil { 1154 logger.Fatal().Str("member", member.ToString()).Msg("failed to add member to cluster") 1155 } 1156 1157 if len(cc.Context) > 0 && rs.ID() != cc.NodeID { 1158 rs.transport.AddPeer(etcdtypes.ID(cc.NodeID), member.GetPeerID(), []string{member.Address}) 1159 } else { 1160 logger.Debug().Msg("skip add peer myself for addnode ") 1161 } 1162 case raftpb.ConfChangeRemoveNode: 1163 if err := rs.cluster.removeMember(member); err != nil { 1164 logger.Fatal().Str("member", member.ToString()).Msg("failed to add member to cluster") 1165 } 1166 1167 if cc.NodeID == rs.ID() { 1168 logger.Info().Msg("I've been removed from the cluster! Shutting down.") 1169 return false 1170 } 1171 rs.transport.RemovePeer(etcdtypes.ID(cc.NodeID)) 1172 } 1173 1174 logger.Debug().Uint64("requestID", cc.ID).Str("cluster", rs.cluster.toString()).Msg("after conf changed") 1175 1176 return postWork(nil) 1177 } 1178 1179 // publishEntries writes committed log entries to commit channel and returns 1180 // whether all entries could be published. 1181 func (rs *raftServer) publishEntries(ents []raftpb.Entry) bool { 1182 var lastBlockEnt *raftpb.Entry 1183 1184 isDuplicateCommit := func(block *types.Block) bool { 1185 lastReq := rs.commitProgress.GetRequest() 1186 1187 if lastReq != nil && lastReq.block.BlockNo() >= block.BlockNo() { 1188 if StopDupCommit { 1189 logger.Fatal().Str("last", lastReq.block.ID()).Str("dup", block.ID()).Uint64("no", block.BlockNo()).Msg("fork occured by invalid commit entry") 1190 } else { 1191 logger.Debug().Str("last", lastReq.block.ID()).Str("dup", block.ID()).Uint64("no", block.BlockNo()).Msg("skip commit entry of smaller index") 1192 } 1193 1194 return true 1195 } 1196 1197 return false 1198 } 1199 1200 for i := range ents { 1201 logger.Info().Uint64("idx", ents[i].Index).Uint64("term", ents[i].Term).Str("type", ents[i].Type.String()).Int("datalen", len(ents[i].Data)).Msg("publish entry") 1202 1203 switch ents[i].Type { 1204 case raftpb.EntryNormal: 1205 var block *types.Block 1206 var err error 1207 if len(ents[i].Data) != 0 { 1208 if block, err = unmarshalEntryData(ents[i].Data); err != nil { 1209 logger.Fatal().Err(err).Uint64("idx", ents[i].Index).Uint64("term", ents[i].Term).Msg("commit entry is corrupted") 1210 continue 1211 } 1212 1213 if block != nil { 1214 if isDuplicateCommit(block) { 1215 continue 1216 } 1217 1218 logger.Info().Str("hash", block.ID()).Uint64("no", block.BlockNo()).Msg("commit normal block entry") 1219 rs.commitProgress.UpdateRequest(&commitEntry{block: block, index: ents[i].Index, term: ents[i].Term}) 1220 } 1221 } 1222 1223 select { 1224 case rs.commitC <- &commitEntry{block: block, index: ents[i].Index, term: ents[i].Term}: 1225 case <-rs.stopc: 1226 return false 1227 } 1228 1229 case raftpb.EntryConfChange: 1230 if !rs.applyConfChange(&ents[i]) { 1231 return false 1232 } 1233 } 1234 1235 // after commit, update appliedIndex 1236 rs.setAppliedIndex(ents[i].Index) 1237 } 1238 1239 if lastBlockEnt != nil { 1240 } 1241 return true 1242 } 1243 1244 func (rs *raftServer) setSnapshotIndex(idx uint64) { 1245 logger.Debug().Uint64("index", idx).Msg("raft server set snapshotIndex") 1246 1247 rs.snapshotIndex = idx 1248 } 1249 1250 func (rs *raftServer) setAppliedIndex(idx uint64) { 1251 logger.Debug().Uint64("index", idx).Msg("raft server set appliedIndex") 1252 1253 rs.appliedIndex = idx 1254 } 1255 1256 func (rs *raftServer) setConfState(state *raftpb.ConfState) { 1257 logger.Debug().Str("state", consensus.ConfStateToString(state)).Msg("raft server set confstate") 1258 1259 rs.confState = state 1260 } 1261 1262 func (rs *raftServer) Process(ctx context.Context, m raftpb.Message) error { 1263 node := rs.getNodeSync() 1264 if node == nil { 1265 return ErrRaftNotReady 1266 } 1267 return node.Step(ctx, m) 1268 } 1269 1270 func (rs *raftServer) IsIDRemoved(id uint64) bool { 1271 return rs.cluster.IsIDRemoved(id) 1272 } 1273 1274 func (rs *raftServer) ReportUnreachable(id uint64) { 1275 logger.Debug().Str("toID", EtcdIDToString(id)).Msg("report unreachable") 1276 1277 rs.node.ReportUnreachable(id) 1278 } 1279 1280 func (rs *raftServer) ReportSnapshot(id uint64, status raftlib.SnapshotStatus) { 1281 logger.Info().Str("toID", EtcdIDToString(id)).Bool("isSucceed", status == raftlib.SnapshotFinish).Msg("finished to send snapshot") 1282 1283 rs.node.ReportSnapshot(id, status) 1284 } 1285 1286 func (rs *raftServer) WaitStartup() { 1287 logger.Debug().Msg("raft start wait") 1288 for s := range rs.commitC { 1289 if s == nil { 1290 break 1291 } 1292 } 1293 logger.Debug().Msg("raft start succeed") 1294 } 1295 1296 // updateTerm is called only by raftserver. so it doesn't have lock. 1297 func (rs *raftServer) updateTerm(term uint64) { 1298 rs.curTerm = term 1299 } 1300 1301 func (rs *raftServer) updateLeader(softState *raftlib.SoftState) { 1302 if softState.Lead != rs.GetLeader() { 1303 rs.Lock() 1304 defer rs.Unlock() 1305 1306 rs.leaderStatus.Leader = softState.Lead 1307 1308 if rs.curTerm == 0 { 1309 logger.Fatal().Msg("term must not be 0") 1310 } 1311 rs.leaderStatus.Term = rs.curTerm 1312 1313 rs.leaderStatus.IsLeader = rs.checkLeader() 1314 rs.leaderStatus.leaderChanged++ 1315 1316 logger.Info().Uint64("term", rs.curTerm).Str("ID", EtcdIDToString(rs.ID())).Str("leader", EtcdIDToString(softState.Lead)).Msg("leader changed") 1317 } else { 1318 logger.Info().Uint64("term", rs.curTerm).Str("ID", EtcdIDToString(rs.ID())).Str("leader", EtcdIDToString(softState.Lead)).Msg("soft state leader unchanged") 1319 } 1320 } 1321 1322 func (rs *raftServer) GetLeader() uint64 { 1323 rs.leaderStatus.RLock() 1324 defer rs.leaderStatus.RUnlock() 1325 1326 return rs.leaderStatus.Leader 1327 } 1328 1329 func (rs *raftServer) checkLeader() bool { 1330 return rs.ID() != consensus.InvalidMemberID && rs.ID() == rs.leaderStatus.Leader 1331 } 1332 1333 func (rs *raftServer) IsLeader() bool { 1334 rs.leaderStatus.RLock() 1335 defer rs.leaderStatus.RUnlock() 1336 1337 return rs.leaderStatus.IsLeader 1338 } 1339 1340 func (rs *raftServer) GetLeaderStatus() LeaderStatus { 1341 rs.leaderStatus.RLock() 1342 defer rs.leaderStatus.RUnlock() 1343 1344 tmpStatus := rs.leaderStatus 1345 return tmpStatus 1346 } 1347 1348 // IsTermLeader returns true if this node is leader of given term 1349 func (rs *raftServer) IsLeaderOfTerm(term uint64) bool { 1350 status := rs.GetLeaderStatus() 1351 return status.IsLeader && status.Term == term 1352 } 1353 1354 func (rs *raftServer) Status() raftlib.Status { 1355 node := rs.getNodeSync() 1356 if node == nil { 1357 return raftlib.Status{} 1358 } 1359 1360 return node.Status() 1361 } 1362 1363 type MemberProgressState int32 1364 1365 const ( 1366 MemberProgressStateHealthy MemberProgressState = iota 1367 MemberProgressStateSlow 1368 MemberProgressStateSyncing 1369 MemberProgressStateUnknown 1370 ) 1371 1372 var ( 1373 MemberProgressStateNames = map[MemberProgressState]string{ 1374 MemberProgressStateHealthy: "MemberProgressStateHealthy", 1375 MemberProgressStateSlow: "MemberProgressStateSlow", 1376 MemberProgressStateSyncing: "MemberProgressStateSyncing", 1377 MemberProgressStateUnknown: "MemberProgressStateUnknown", 1378 } 1379 ) 1380 1381 type MemberProgress struct { 1382 MemberID uint64 1383 Status MemberProgressState 1384 LogDifference uint64 1385 1386 progress raftlib.Progress 1387 } 1388 1389 type ClusterProgress struct { 1390 N int 1391 1392 MemberProgresses map[uint64]*MemberProgress 1393 } 1394 1395 func (cp *ClusterProgress) ToString() string { 1396 buf := fmt.Sprintf("{ Total: %d, Members[", cp.N) 1397 1398 for _, mp := range cp.MemberProgresses { 1399 buf = buf + mp.ToString() 1400 } 1401 1402 buf = buf + fmt.Sprintf("] }") 1403 1404 return buf 1405 } 1406 1407 func (cp *MemberProgress) ToString() string { 1408 return fmt.Sprintf("{ id: %x, Staus: \"%s\", LogDifference: %d }", cp.MemberID, MemberProgressStateNames[cp.Status], cp.LogDifference) 1409 } 1410 1411 func (rs *raftServer) GetLastIndex() (uint64, error) { 1412 return rs.raftStorage.LastIndex() 1413 } 1414 1415 func (rs *raftServer) GetClusterProgress() (*ClusterProgress, error) { 1416 getProgressState := func(raftProgress *raftlib.Progress, lastLeaderIndex uint64, nodeID uint64, leadID uint64) MemberProgressState { 1417 isLeader := nodeID == leadID 1418 1419 if !isLeader { 1420 // syncing 1421 if raftProgress.State == raftlib.ProgressStateSnapshot { 1422 return MemberProgressStateSyncing 1423 } 1424 1425 // slow 1426 // - Even if node state is ProgressStateReplicate, if matchNo of the node is too lower than last index of leader, it is considered a slow state. 1427 var isSlowFollower bool 1428 if lastLeaderIndex > raftProgress.Match && (lastLeaderIndex-raftProgress.Match) > MaxSlowNodeGap { 1429 isSlowFollower = true 1430 } 1431 1432 if raftProgress.State == raftlib.ProgressStateProbe || isSlowFollower { 1433 return MemberProgressStateSlow 1434 } 1435 } 1436 // normal 1437 return MemberProgressStateHealthy 1438 } 1439 1440 var ( 1441 lastIdx uint64 1442 err error 1443 ) 1444 1445 prog := ClusterProgress{} 1446 1447 node := rs.getNodeSync() 1448 if node == nil || !rs.IsLeader() { 1449 return &prog, nil 1450 } 1451 1452 status := node.Status() 1453 1454 n := len(status.Progress) 1455 if n == 0 { 1456 return &prog, nil 1457 } 1458 1459 statusBytes, err := status.MarshalJSON() 1460 if err != nil { 1461 logger.Error().Err(err).Msg("failed to marshalEntryData raft status") 1462 } else { 1463 logger.Debug().Str("status", string(statusBytes)).Msg("raft status") 1464 } 1465 1466 if lastIdx, err = rs.GetLastIndex(); err != nil { 1467 logger.Error().Err(err).Msg("Get last raft index on leader") 1468 return &prog, err 1469 } 1470 1471 prog.MemberProgresses = make(map[uint64]*MemberProgress) 1472 prog.N = n 1473 for id, nodeProgress := range status.Progress { 1474 prog.MemberProgresses[id] = &MemberProgress{MemberID: id, Status: getProgressState(&nodeProgress, lastIdx, rs.cluster.NodeID(), id), LogDifference: lastIdx - nodeProgress.Match, progress: nodeProgress} 1475 } 1476 1477 return &prog, nil 1478 } 1479 1480 // GetExistingCluster returns information of existing cluster. 1481 // It request member info to all of peers. 1482 func (rs *raftServer) GetExistingCluster() (*Cluster, *types.HardStateInfo, error) { 1483 var ( 1484 cl *Cluster 1485 hardstate *types.HardStateInfo 1486 err error 1487 bestHash []byte 1488 bestBlk *types.Block 1489 ) 1490 1491 getBestHash := func() []byte { 1492 if bestBlk, err = rs.walDB.GetBestBlock(); err != nil { 1493 logger.Fatal().Msg("failed to get best block of my chain to get existing cluster info") 1494 } 1495 1496 if bestBlk.BlockNo() == 0 { 1497 return nil 1498 } 1499 1500 logger.Info().Str("hash", bestBlk.ID()).Uint64("no", bestBlk.BlockNo()).Msg("best block of blockchain") 1501 1502 return bestBlk.BlockHash() 1503 } 1504 1505 bestHash = getBestHash() 1506 1507 for i := 1; i <= MaxTryGetCluster; i++ { 1508 cl, hardstate, err = GetClusterInfo(rs.ComponentHub, bestHash) 1509 if err != nil { 1510 if err != ErrGetClusterTimeout && i != MaxTryGetCluster { 1511 logger.Error().Err(err).Int("try", i).Msg("failed try to get cluster. and sleep") 1512 time.Sleep(time.Second * 10) 1513 } else { 1514 logger.Warn().Err(err).Int("try", i).Msg("failed try to get cluster") 1515 } 1516 continue 1517 } 1518 1519 return cl, hardstate, nil 1520 } 1521 1522 return nil, nil, ErrGetClusterFail 1523 } 1524 1525 func marshalEntryData(block *types.Block) ([]byte, error) { 1526 var data []byte 1527 var err error 1528 if data, err = proto.Marshal(block); err != nil { 1529 logger.Fatal().Err(err).Msg("poposed data is invalid") 1530 } 1531 1532 return data, nil 1533 } 1534 1535 var ( 1536 ErrUnmarshal = errors.New("failed to unmarshalEntryData log entry") 1537 ) 1538 1539 func unmarshalEntryData(data []byte) (*types.Block, error) { 1540 block := &types.Block{} 1541 if err := proto.Unmarshal(data, block); err != nil { 1542 return block, ErrUnmarshal 1543 } 1544 1545 return block, nil 1546 } 1547 1548 type raftHttpWrapper struct { 1549 bf *BlockFactory 1550 raftServer *raftServer 1551 } 1552 1553 func (rhw *raftHttpWrapper) Process(ctx context.Context, peerID types.PeerID, m raftpb.Message) error { 1554 return rhw.raftServer.Process(ctx, m) 1555 } 1556 1557 func (rhw *raftHttpWrapper) IsIDRemoved(peerID types.PeerID) bool { 1558 if member := rhw.raftServer.cluster.Members().getMemberByPeerID(peerID); member != nil { 1559 return rhw.raftServer.IsIDRemoved(member.ID) 1560 } 1561 return true 1562 } 1563 1564 func (rhw *raftHttpWrapper) ReportUnreachable(peerID types.PeerID) { 1565 if member := rhw.raftServer.cluster.Members().getMemberByPeerID(peerID); member != nil { 1566 rhw.raftServer.ReportUnreachable(member.ID) 1567 } 1568 } 1569 1570 func (rhw *raftHttpWrapper) ReportSnapshot(peerID types.PeerID, status raftlib.SnapshotStatus) { 1571 if member := rhw.raftServer.cluster.Members().getMemberByPeerID(peerID); member != nil { 1572 rhw.raftServer.ReportSnapshot(member.ID, status) 1573 } 1574 } 1575 1576 func (rhw *raftHttpWrapper) GetMemberByID(id uint64) *consensus.Member { 1577 return rhw.raftServer.cluster.Members().getMember(id) 1578 } 1579 1580 func (rhw *raftHttpWrapper) GetMemberByPeerID(peerID types.PeerID) *consensus.Member { 1581 return rhw.raftServer.cluster.Members().getMemberByPeerID(peerID) 1582 } 1583 1584 func (rhw *raftHttpWrapper) SaveFromRemote(r io.Reader, id uint64, msg raftpb.Message) (int64, error) { 1585 return rhw.raftServer.snapshotter.SaveFromRemote(r, id, msg) 1586 }