github.com/aergoio/aergo@v1.3.1/consensus/impl/raftv2/snapshot.go (about)

     1  package raftv2
     2  
     3  import (
     4  	"errors"
     5  	chainsvc "github.com/aergoio/aergo/chain"
     6  	"github.com/aergoio/aergo/consensus"
     7  	"github.com/aergoio/aergo/consensus/chain"
     8  	"github.com/aergoio/aergo/p2p/p2pcommon"
     9  	"github.com/aergoio/aergo/p2p/p2putil"
    10  	"github.com/aergoio/aergo/pkg/component"
    11  	"github.com/aergoio/aergo/types"
    12  	"github.com/aergoio/etcd/raft/raftpb"
    13  	"io"
    14  	"sync"
    15  	"time"
    16  )
    17  
    18  var (
    19  	DfltTimeWaitPeerLive        = time.Second * 5
    20  	ErrNotMsgSnap               = errors.New("not pb.MsgSnap")
    21  	ErrClusterMismatchConfState = errors.New("members of cluster doesn't match with raft confstate")
    22  )
    23  
    24  type getLeaderFuncType func() uint64
    25  
    26  type ChainSnapshotter struct {
    27  	sync.Mutex
    28  
    29  	pa p2pcommon.PeerAccessor
    30  
    31  	*component.ComponentHub
    32  	cluster *Cluster
    33  
    34  	walDB *WalDB
    35  
    36  	getLeaderFunc getLeaderFuncType
    37  }
    38  
    39  func newChainSnapshotter(pa p2pcommon.PeerAccessor, hub *component.ComponentHub, cluster *Cluster, walDB *WalDB, getLeader getLeaderFuncType) *ChainSnapshotter {
    40  	return &ChainSnapshotter{pa: pa, ComponentHub: hub, cluster: cluster, walDB: walDB, getLeaderFunc: getLeader}
    41  }
    42  
    43  func (chainsnap *ChainSnapshotter) setPeerAccessor(pa p2pcommon.PeerAccessor) {
    44  	chainsnap.Lock()
    45  	defer chainsnap.Unlock()
    46  
    47  	chainsnap.pa = pa
    48  }
    49  
    50  /* createSnapshot isn't used this api since new MsgSnap isn't made
    51  // createSnapshot make marshalled data of chain & cluster info
    52  func (chainsnap *ChainSnapshotter) createSnapshot(prevProgress BlockProgress, confState raftpb.ConfState) (*raftpb.Snapshot, error) {
    53  	if prevProgress.isEmpty() {
    54  		return nil, ErrEmptyProgress
    55  	}
    56  
    57  	snapdata, err := chainsnap.createSnapshotData(chainsnap.cluster, prevProgress.block)
    58  	if err != nil {
    59  		logger.Fatal().Err(err).Msg("make snapshot of chain")
    60  		return nil, err
    61  	}
    62  
    63  
    64  	data, err := snapdata.Encode()
    65  	if err != nil {
    66  		logger.Fatal().Err(err).Msg("failed to marshale snapshot of chain")
    67  		return nil, err
    68  	}
    69  
    70  	snapshot := &raftpb.Snapshot{
    71  		Metadata: raftpb.SnapshotMetadata{
    72  			Index:     prevProgress.index,
    73  			Term:      prevProgress.term,
    74  			ConfState: confState,
    75  		},
    76  		Data: data,
    77  	}
    78  
    79  	logger.Info().Str("snapshot", consensus.SnapToString(snapshot, snapdata)).Msg("raft snapshot for remote")
    80  
    81  	return snapshot, nil
    82  }
    83  */
    84  
    85  // createSnapshotData generate serialized data of chain and cluster info
    86  func (chainsnap *ChainSnapshotter) createSnapshotData(cluster *Cluster, snapBlock *types.Block, confstate *raftpb.ConfState) (*consensus.SnapshotData, error) {
    87  	logger.Info().Str("hash", snapBlock.ID()).Uint64("no", snapBlock.BlockNo()).Msg("create new snapshot data of block")
    88  
    89  	cluster.Lock()
    90  	defer cluster.Unlock()
    91  
    92  	if !cluster.isMatch(confstate) {
    93  		logger.Fatal().Str("confstate", consensus.ConfStateToString(confstate)).Str("cluster", cluster.toStringWithLock()).Msg("cluster doesn't match with confstate")
    94  		return nil, ErrClusterMismatchConfState
    95  	}
    96  
    97  	members := cluster.AppliedMembers().ToArray()
    98  	removedMembers := cluster.RemovedMembers().ToArray()
    99  
   100  	snap := consensus.NewSnapshotData(members, removedMembers, snapBlock)
   101  	if snap == nil {
   102  		panic("new snap failed")
   103  	}
   104  
   105  	return snap, nil
   106  }
   107  
   108  // chainSnapshotter rece ives snapshot from http request
   109  // TODO replace rafthttp with p2p
   110  func (chainsnap *ChainSnapshotter) SaveFromRemote(r io.Reader, id uint64, msg raftpb.Message) (int64, error) {
   111  	defer RecoverExit()
   112  
   113  	if msg.Type != raftpb.MsgSnap {
   114  		logger.Error().Int32("type", int32(msg.Type)).Msg("received msg snap is invalid type")
   115  		return 0, ErrNotMsgSnap
   116  	}
   117  
   118  	// not return until block sync is complete
   119  	// receive chain & request sync & wait
   120  	return 0, chainsnap.syncSnap(&msg.Snapshot)
   121  }
   122  
   123  func (chainsnap *ChainSnapshotter) syncSnap(snap *raftpb.Snapshot) error {
   124  	var snapdata = &consensus.SnapshotData{}
   125  
   126  	err := snapdata.Decode(snap.Data)
   127  	if err != nil {
   128  		logger.Error().Msg("failed to unmarshal snapshot data to write")
   129  		return err
   130  	}
   131  
   132  	// write snapshot log in WAL for crash recovery
   133  	logger.Info().Str("snap", consensus.SnapToString(snap, snapdata)).Msg("start to sync snapshot")
   134  	// TODO	request sync for chain with snapshot.data
   135  	// wait to finish sync of chain
   136  	if err := chainsnap.requestSync(&snapdata.Chain); err != nil {
   137  		logger.Error().Err(err).Msg("failed to sync snapshot")
   138  		return err
   139  	}
   140  
   141  	logger.Info().Str("snap", consensus.SnapToString(snap, snapdata)).Msg("finished to sync snapshot")
   142  
   143  	return nil
   144  }
   145  
   146  func (chainsnap *ChainSnapshotter) checkPeerLive(peerID types.PeerID) bool {
   147  	if chainsnap.pa == nil {
   148  		logger.Fatal().Msg("peer accessor of chain snapshotter is not set")
   149  	}
   150  
   151  	_, ok := chainsnap.pa.GetPeer(peerID)
   152  	return ok
   153  }
   154  
   155  // TODO handle error case that leader stops while synchronizing
   156  func (chainsnap *ChainSnapshotter) requestSync(snap *consensus.ChainSnapshot) error {
   157  
   158  	var leader uint64
   159  	getSyncLeader := func() (types.PeerID, error) {
   160  		var peerID types.PeerID
   161  		var err error
   162  
   163  		for {
   164  			leader = chainsnap.getLeaderFunc()
   165  
   166  			if leader == HasNoLeader {
   167  				peerID, err = chainsnap.cluster.getAnyPeerAddressToSync()
   168  				if err != nil {
   169  					logger.Error().Err(err).Str("leader", EtcdIDToString(leader)).Msg("can't get peeraddress of leader")
   170  					return "", err
   171  				}
   172  			} else {
   173  				peerID, err = chainsnap.cluster.Members().getMemberPeerAddress(leader)
   174  				if err != nil {
   175  					logger.Error().Err(err).Str("leader", EtcdIDToString(leader)).Msg("can't get peeraddress of leader")
   176  					return "", err
   177  				}
   178  			}
   179  
   180  			if chainsnap.checkPeerLive(peerID) {
   181  				break
   182  			}
   183  
   184  			logger.Debug().Str("peer", p2putil.ShortForm(peerID)).Str("leader", EtcdIDToString(leader)).Msg("peer is not alive")
   185  
   186  			time.Sleep(DfltTimeWaitPeerLive)
   187  		}
   188  
   189  		logger.Debug().Str("peer", p2putil.ShortForm(peerID)).Str("leader", EtcdIDToString(leader)).Msg("target peer to sync")
   190  
   191  		return peerID, err
   192  	}
   193  
   194  	chainsvc.TestDebugger.Check(chainsvc.DEBUG_SYNCER_CRASH, 1, nil)
   195  
   196  	peerID, err := getSyncLeader()
   197  	if err != nil {
   198  		return err
   199  	}
   200  
   201  	if err := chain.SyncChain(chainsnap.ComponentHub, snap.Hash, snap.No, peerID); err != nil {
   202  		return err
   203  	}
   204  
   205  	return nil
   206  }