github.com/aergoio/aergo@v1.3.1/consensus/impl/raftv2/snapshot.go (about) 1 package raftv2 2 3 import ( 4 "errors" 5 chainsvc "github.com/aergoio/aergo/chain" 6 "github.com/aergoio/aergo/consensus" 7 "github.com/aergoio/aergo/consensus/chain" 8 "github.com/aergoio/aergo/p2p/p2pcommon" 9 "github.com/aergoio/aergo/p2p/p2putil" 10 "github.com/aergoio/aergo/pkg/component" 11 "github.com/aergoio/aergo/types" 12 "github.com/aergoio/etcd/raft/raftpb" 13 "io" 14 "sync" 15 "time" 16 ) 17 18 var ( 19 DfltTimeWaitPeerLive = time.Second * 5 20 ErrNotMsgSnap = errors.New("not pb.MsgSnap") 21 ErrClusterMismatchConfState = errors.New("members of cluster doesn't match with raft confstate") 22 ) 23 24 type getLeaderFuncType func() uint64 25 26 type ChainSnapshotter struct { 27 sync.Mutex 28 29 pa p2pcommon.PeerAccessor 30 31 *component.ComponentHub 32 cluster *Cluster 33 34 walDB *WalDB 35 36 getLeaderFunc getLeaderFuncType 37 } 38 39 func newChainSnapshotter(pa p2pcommon.PeerAccessor, hub *component.ComponentHub, cluster *Cluster, walDB *WalDB, getLeader getLeaderFuncType) *ChainSnapshotter { 40 return &ChainSnapshotter{pa: pa, ComponentHub: hub, cluster: cluster, walDB: walDB, getLeaderFunc: getLeader} 41 } 42 43 func (chainsnap *ChainSnapshotter) setPeerAccessor(pa p2pcommon.PeerAccessor) { 44 chainsnap.Lock() 45 defer chainsnap.Unlock() 46 47 chainsnap.pa = pa 48 } 49 50 /* createSnapshot isn't used this api since new MsgSnap isn't made 51 // createSnapshot make marshalled data of chain & cluster info 52 func (chainsnap *ChainSnapshotter) createSnapshot(prevProgress BlockProgress, confState raftpb.ConfState) (*raftpb.Snapshot, error) { 53 if prevProgress.isEmpty() { 54 return nil, ErrEmptyProgress 55 } 56 57 snapdata, err := chainsnap.createSnapshotData(chainsnap.cluster, prevProgress.block) 58 if err != nil { 59 logger.Fatal().Err(err).Msg("make snapshot of chain") 60 return nil, err 61 } 62 63 64 data, err := snapdata.Encode() 65 if err != nil { 66 logger.Fatal().Err(err).Msg("failed to marshale snapshot of chain") 67 return nil, err 68 } 69 70 snapshot := &raftpb.Snapshot{ 71 Metadata: raftpb.SnapshotMetadata{ 72 Index: prevProgress.index, 73 Term: prevProgress.term, 74 ConfState: confState, 75 }, 76 Data: data, 77 } 78 79 logger.Info().Str("snapshot", consensus.SnapToString(snapshot, snapdata)).Msg("raft snapshot for remote") 80 81 return snapshot, nil 82 } 83 */ 84 85 // createSnapshotData generate serialized data of chain and cluster info 86 func (chainsnap *ChainSnapshotter) createSnapshotData(cluster *Cluster, snapBlock *types.Block, confstate *raftpb.ConfState) (*consensus.SnapshotData, error) { 87 logger.Info().Str("hash", snapBlock.ID()).Uint64("no", snapBlock.BlockNo()).Msg("create new snapshot data of block") 88 89 cluster.Lock() 90 defer cluster.Unlock() 91 92 if !cluster.isMatch(confstate) { 93 logger.Fatal().Str("confstate", consensus.ConfStateToString(confstate)).Str("cluster", cluster.toStringWithLock()).Msg("cluster doesn't match with confstate") 94 return nil, ErrClusterMismatchConfState 95 } 96 97 members := cluster.AppliedMembers().ToArray() 98 removedMembers := cluster.RemovedMembers().ToArray() 99 100 snap := consensus.NewSnapshotData(members, removedMembers, snapBlock) 101 if snap == nil { 102 panic("new snap failed") 103 } 104 105 return snap, nil 106 } 107 108 // chainSnapshotter rece ives snapshot from http request 109 // TODO replace rafthttp with p2p 110 func (chainsnap *ChainSnapshotter) SaveFromRemote(r io.Reader, id uint64, msg raftpb.Message) (int64, error) { 111 defer RecoverExit() 112 113 if msg.Type != raftpb.MsgSnap { 114 logger.Error().Int32("type", int32(msg.Type)).Msg("received msg snap is invalid type") 115 return 0, ErrNotMsgSnap 116 } 117 118 // not return until block sync is complete 119 // receive chain & request sync & wait 120 return 0, chainsnap.syncSnap(&msg.Snapshot) 121 } 122 123 func (chainsnap *ChainSnapshotter) syncSnap(snap *raftpb.Snapshot) error { 124 var snapdata = &consensus.SnapshotData{} 125 126 err := snapdata.Decode(snap.Data) 127 if err != nil { 128 logger.Error().Msg("failed to unmarshal snapshot data to write") 129 return err 130 } 131 132 // write snapshot log in WAL for crash recovery 133 logger.Info().Str("snap", consensus.SnapToString(snap, snapdata)).Msg("start to sync snapshot") 134 // TODO request sync for chain with snapshot.data 135 // wait to finish sync of chain 136 if err := chainsnap.requestSync(&snapdata.Chain); err != nil { 137 logger.Error().Err(err).Msg("failed to sync snapshot") 138 return err 139 } 140 141 logger.Info().Str("snap", consensus.SnapToString(snap, snapdata)).Msg("finished to sync snapshot") 142 143 return nil 144 } 145 146 func (chainsnap *ChainSnapshotter) checkPeerLive(peerID types.PeerID) bool { 147 if chainsnap.pa == nil { 148 logger.Fatal().Msg("peer accessor of chain snapshotter is not set") 149 } 150 151 _, ok := chainsnap.pa.GetPeer(peerID) 152 return ok 153 } 154 155 // TODO handle error case that leader stops while synchronizing 156 func (chainsnap *ChainSnapshotter) requestSync(snap *consensus.ChainSnapshot) error { 157 158 var leader uint64 159 getSyncLeader := func() (types.PeerID, error) { 160 var peerID types.PeerID 161 var err error 162 163 for { 164 leader = chainsnap.getLeaderFunc() 165 166 if leader == HasNoLeader { 167 peerID, err = chainsnap.cluster.getAnyPeerAddressToSync() 168 if err != nil { 169 logger.Error().Err(err).Str("leader", EtcdIDToString(leader)).Msg("can't get peeraddress of leader") 170 return "", err 171 } 172 } else { 173 peerID, err = chainsnap.cluster.Members().getMemberPeerAddress(leader) 174 if err != nil { 175 logger.Error().Err(err).Str("leader", EtcdIDToString(leader)).Msg("can't get peeraddress of leader") 176 return "", err 177 } 178 } 179 180 if chainsnap.checkPeerLive(peerID) { 181 break 182 } 183 184 logger.Debug().Str("peer", p2putil.ShortForm(peerID)).Str("leader", EtcdIDToString(leader)).Msg("peer is not alive") 185 186 time.Sleep(DfltTimeWaitPeerLive) 187 } 188 189 logger.Debug().Str("peer", p2putil.ShortForm(peerID)).Str("leader", EtcdIDToString(leader)).Msg("target peer to sync") 190 191 return peerID, err 192 } 193 194 chainsvc.TestDebugger.Check(chainsvc.DEBUG_SYNCER_CRASH, 1, nil) 195 196 peerID, err := getSyncLeader() 197 if err != nil { 198 return err 199 } 200 201 if err := chain.SyncChain(chainsnap.ComponentHub, snap.Hash, snap.No, peerID); err != nil { 202 return err 203 } 204 205 return nil 206 }