github.com/kisexp/xdchain@v0.0.0-20211206025815-490d6b732aa7/raft/snapshot.go (about)

     1  package raft
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"io"
     7  	"math/big"
     8  	"net"
     9  	"sort"
    10  	"time"
    11  
    12  	"github.com/coreos/etcd/raft/raftpb"
    13  	"github.com/coreos/etcd/snap"
    14  	"github.com/coreos/etcd/wal/walpb"
    15  	mapset "github.com/deckarep/golang-set"
    16  	"github.com/kisexp/xdchain/common"
    17  	"github.com/kisexp/xdchain/core/types"
    18  	"github.com/kisexp/xdchain/eth/downloader"
    19  	"github.com/kisexp/xdchain/log"
    20  	"github.com/kisexp/xdchain/p2p/enode"
    21  	"github.com/kisexp/xdchain/p2p/enr"
    22  	"github.com/kisexp/xdchain/permission/core"
    23  	"github.com/kisexp/xdchain/rlp"
    24  )
    25  
    26  type SnapshotWithHostnames struct {
    27  	Addresses      []Address
    28  	RemovedRaftIds []uint16
    29  	HeadBlockHash  common.Hash
    30  }
    31  
    32  type AddressWithoutHostname struct {
    33  	RaftId   uint16
    34  	NodeId   enode.EnodeID
    35  	Ip       net.IP
    36  	P2pPort  enr.TCP
    37  	RaftPort enr.RaftPort
    38  }
    39  
    40  type SnapshotWithoutHostnames struct {
    41  	Addresses      []AddressWithoutHostname
    42  	RemovedRaftIds []uint16 // Raft IDs for permanently removed peers
    43  	HeadBlockHash  common.Hash
    44  }
    45  
    46  type ByRaftId []Address
    47  
    48  func (a ByRaftId) Len() int           { return len(a) }
    49  func (a ByRaftId) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
    50  func (a ByRaftId) Less(i, j int) bool { return a[i].RaftId < a[j].RaftId }
    51  
    52  func (pm *ProtocolManager) buildSnapshot() *SnapshotWithHostnames {
    53  	pm.mu.RLock()
    54  	defer pm.mu.RUnlock()
    55  
    56  	numNodes := len(pm.confState.Nodes) + len(pm.confState.Learners)
    57  	numRemovedNodes := pm.removedPeers.Cardinality()
    58  
    59  	snapshot := &SnapshotWithHostnames{
    60  		Addresses:      make([]Address, numNodes),
    61  		RemovedRaftIds: make([]uint16, numRemovedNodes),
    62  		HeadBlockHash:  pm.blockchain.CurrentBlock().Hash(),
    63  	}
    64  
    65  	// Populate addresses
    66  
    67  	for i, rawRaftId := range append(pm.confState.Nodes, pm.confState.Learners...) {
    68  		raftId := uint16(rawRaftId)
    69  
    70  		if raftId == pm.raftId {
    71  			snapshot.Addresses[i] = *pm.address
    72  		} else {
    73  			snapshot.Addresses[i] = *pm.peers[raftId].address
    74  		}
    75  	}
    76  	sort.Sort(ByRaftId(snapshot.Addresses))
    77  
    78  	// Populate removed IDs
    79  	i := 0
    80  	for removedIface := range pm.removedPeers.Iterator().C {
    81  		snapshot.RemovedRaftIds[i] = removedIface.(uint16)
    82  		i++
    83  	}
    84  	return snapshot
    85  }
    86  
    87  // Note that we do *not* read `pm.appliedIndex` here. We only use the `index`
    88  // parameter instead. This is because we need to support a scenario when we
    89  // snapshot for a future index that we have not yet recorded in LevelDB. See
    90  // comments around the use of `forceSnapshot`.
    91  func (pm *ProtocolManager) triggerSnapshot(index uint64) {
    92  	pm.mu.RLock()
    93  	snapshotIndex := pm.snapshotIndex
    94  	pm.mu.RUnlock()
    95  
    96  	log.Info("start snapshot", "applied index", pm.appliedIndex, "last snapshot index", snapshotIndex)
    97  
    98  	//snapData := pm.blockchain.CurrentBlock().Hash().Bytes()
    99  	//snap, err := pm.raftStorage.CreateSnapshot(pm.appliedIndex, &pm.confState, snapData)
   100  	snapData := pm.buildSnapshot().toBytes()
   101  	snap, err := pm.raftStorage.CreateSnapshot(index, &pm.confState, snapData)
   102  	if err != nil {
   103  		panic(err)
   104  	}
   105  	if err := pm.saveRaftSnapshot(snap); err != nil {
   106  		panic(err)
   107  	}
   108  	// Discard all log entries prior to index.
   109  	if err := pm.raftStorage.Compact(index); err != nil {
   110  		panic(err)
   111  	}
   112  	log.Info("compacted log", "index", pm.appliedIndex)
   113  
   114  	pm.mu.Lock()
   115  	pm.snapshotIndex = index
   116  	pm.mu.Unlock()
   117  }
   118  
   119  func confStateIdSet(confState raftpb.ConfState) mapset.Set {
   120  	set := mapset.NewSet()
   121  	for _, rawRaftId := range append(confState.Nodes, confState.Learners...) {
   122  		set.Add(uint16(rawRaftId))
   123  	}
   124  	return set
   125  }
   126  
   127  func (pm *ProtocolManager) updateClusterMembership(newConfState raftpb.ConfState, addresses []Address, removedRaftIds []uint16) {
   128  	log.Info("updating cluster membership per raft snapshot")
   129  
   130  	prevConfState := pm.confState
   131  
   132  	// Update tombstones for permanently removed peers. For simplicity we do not
   133  	// allow the re-use of peer IDs once a peer is removed.
   134  
   135  	removedPeers := mapset.NewSet()
   136  	for _, removedRaftId := range removedRaftIds {
   137  		removedPeers.Add(removedRaftId)
   138  	}
   139  	pm.mu.Lock()
   140  	pm.removedPeers = removedPeers
   141  	pm.mu.Unlock()
   142  
   143  	// Remove old peers that we're still connected to
   144  
   145  	prevIds := confStateIdSet(prevConfState)
   146  	newIds := confStateIdSet(newConfState)
   147  	idsToRemove := prevIds.Difference(newIds)
   148  	for idIfaceToRemove := range idsToRemove.Iterator().C {
   149  		raftId := idIfaceToRemove.(uint16)
   150  		log.Info("removing old raft peer", "peer id", raftId)
   151  
   152  		pm.removePeer(raftId)
   153  	}
   154  
   155  	// Update local and remote addresses
   156  
   157  	for _, tempAddress := range addresses {
   158  		address := tempAddress // Allocate separately on the heap for each iteration.
   159  
   160  		if address.RaftId == pm.raftId {
   161  			// If we're a newcomer to an existing cluster, this is where we learn
   162  			// our own Address.
   163  			pm.setLocalAddress(&address)
   164  		} else {
   165  			pm.mu.RLock()
   166  			existingPeer := pm.peers[address.RaftId]
   167  			pm.mu.RUnlock()
   168  
   169  			if existingPeer == nil {
   170  				log.Info("adding new raft peer", "raft id", address.RaftId)
   171  				pm.addPeer(&address)
   172  			}
   173  		}
   174  	}
   175  
   176  	pm.mu.Lock()
   177  	pm.confState = newConfState
   178  	pm.mu.Unlock()
   179  
   180  	log.Info("updated cluster membership")
   181  }
   182  
   183  func (pm *ProtocolManager) maybeTriggerSnapshot() {
   184  	pm.mu.RLock()
   185  	appliedIndex := pm.appliedIndex
   186  	entriesSinceLastSnap := appliedIndex - pm.snapshotIndex
   187  	pm.mu.RUnlock()
   188  
   189  	if entriesSinceLastSnap < snapshotPeriod {
   190  		return
   191  	}
   192  
   193  	pm.triggerSnapshot(appliedIndex)
   194  }
   195  
   196  func (pm *ProtocolManager) loadSnapshot() *raftpb.Snapshot {
   197  	if raftSnapshot := pm.readRaftSnapshot(); raftSnapshot != nil {
   198  		log.Info("loading snapshot")
   199  		pm.applyRaftSnapshot(*raftSnapshot)
   200  
   201  		return raftSnapshot
   202  	} else {
   203  		log.Info("no snapshot to load")
   204  
   205  		return nil
   206  	}
   207  }
   208  
   209  func (snapshot *SnapshotWithHostnames) toBytes() []byte {
   210  	var (
   211  		useOldSnapshot bool
   212  		oldSnapshot    SnapshotWithoutHostnames
   213  		toEncode       interface{}
   214  	)
   215  
   216  	// use old snapshot if all snapshot.Addresses are ips
   217  	// but use the new snapshot if any of it is a hostname
   218  	useOldSnapshot = true
   219  	oldSnapshot.HeadBlockHash, oldSnapshot.RemovedRaftIds = snapshot.HeadBlockHash, snapshot.RemovedRaftIds
   220  	oldSnapshot.Addresses = make([]AddressWithoutHostname, len(snapshot.Addresses))
   221  
   222  	for index, addrWithHost := range snapshot.Addresses {
   223  		// validate addrWithHost.Hostname is a hostname/ip
   224  		ip := net.ParseIP(addrWithHost.Hostname)
   225  		if ip == nil {
   226  			// this is a hostname
   227  			useOldSnapshot = false
   228  			break
   229  		}
   230  		// this is an ip
   231  		oldSnapshot.Addresses[index] = AddressWithoutHostname{
   232  			addrWithHost.RaftId,
   233  			addrWithHost.NodeId,
   234  			ip,
   235  			addrWithHost.P2pPort,
   236  			addrWithHost.RaftPort,
   237  		}
   238  	}
   239  
   240  	if useOldSnapshot {
   241  		toEncode = oldSnapshot
   242  	} else {
   243  		toEncode = snapshot
   244  	}
   245  	buffer, err := rlp.EncodeToBytes(toEncode)
   246  	if err != nil {
   247  		panic(fmt.Sprintf("error: failed to RLP-encode Snapshot: %s", err.Error()))
   248  	}
   249  	return buffer
   250  }
   251  
   252  func bytesToSnapshot(input []byte) *SnapshotWithHostnames {
   253  	var err, errOld error
   254  
   255  	snapshot := new(SnapshotWithHostnames)
   256  	streamNewSnapshot := rlp.NewStream(bytes.NewReader(input), 0)
   257  	if err = streamNewSnapshot.Decode(snapshot); err == nil {
   258  		return snapshot
   259  	}
   260  
   261  	// Build new snapshot with hostname from legacy Address struct
   262  	snapshotOld := new(SnapshotWithoutHostnames)
   263  	streamOldSnapshot := rlp.NewStream(bytes.NewReader(input), 0)
   264  	if errOld = streamOldSnapshot.Decode(snapshotOld); errOld == nil {
   265  		var snapshotConverted SnapshotWithHostnames
   266  		snapshotConverted.RemovedRaftIds, snapshotConverted.HeadBlockHash = snapshotOld.RemovedRaftIds, snapshotOld.HeadBlockHash
   267  		snapshotConverted.Addresses = make([]Address, len(snapshotOld.Addresses))
   268  
   269  		for index, oldAddrWithIp := range snapshotOld.Addresses {
   270  			snapshotConverted.Addresses[index] = Address{
   271  				RaftId:   oldAddrWithIp.RaftId,
   272  				NodeId:   oldAddrWithIp.NodeId,
   273  				Ip:       nil,
   274  				P2pPort:  oldAddrWithIp.P2pPort,
   275  				RaftPort: oldAddrWithIp.RaftPort,
   276  				Hostname: oldAddrWithIp.Ip.String(),
   277  			}
   278  		}
   279  
   280  		return &snapshotConverted
   281  	}
   282  
   283  	fatalf("failed to RLP-decode Snapshot: %v, %v", err, errOld)
   284  	return nil
   285  }
   286  
   287  func (snapshot *SnapshotWithHostnames) EncodeRLP(w io.Writer) error {
   288  	return rlp.Encode(w, []interface{}{snapshot.Addresses, snapshot.RemovedRaftIds, snapshot.HeadBlockHash})
   289  }
   290  
   291  // Raft snapshot
   292  
   293  func (pm *ProtocolManager) saveRaftSnapshot(snap raftpb.Snapshot) error {
   294  	if err := pm.snapshotter.SaveSnap(snap); err != nil {
   295  		return err
   296  	}
   297  
   298  	walSnap := walpb.Snapshot{
   299  		Index: snap.Metadata.Index,
   300  		Term:  snap.Metadata.Term,
   301  	}
   302  
   303  	if err := pm.wal.SaveSnapshot(walSnap); err != nil {
   304  		return err
   305  	}
   306  
   307  	return pm.wal.ReleaseLockTo(snap.Metadata.Index)
   308  }
   309  
   310  func (pm *ProtocolManager) readRaftSnapshot() *raftpb.Snapshot {
   311  	snapshot, err := pm.snapshotter.Load()
   312  	if err != nil && err != snap.ErrNoSnapshot {
   313  		fatalf("error loading snapshot: %v", err)
   314  	}
   315  
   316  	return snapshot
   317  }
   318  
   319  func (pm *ProtocolManager) applyRaftSnapshot(raftSnapshot raftpb.Snapshot) {
   320  	log.Info("applying snapshot to raft storage")
   321  	if err := pm.raftStorage.ApplySnapshot(raftSnapshot); err != nil {
   322  		fatalf("failed to apply snapshot: %s", err)
   323  	}
   324  	snapshot := bytesToSnapshot(raftSnapshot.Data)
   325  
   326  	latestBlockHash := snapshot.HeadBlockHash
   327  
   328  	pm.updateClusterMembership(raftSnapshot.Metadata.ConfState, snapshot.Addresses, snapshot.RemovedRaftIds)
   329  
   330  	preSyncHead := pm.blockchain.CurrentBlock()
   331  
   332  	if latestBlock := pm.blockchain.GetBlockByHash(latestBlockHash); latestBlock == nil {
   333  		pm.syncBlockchainUntil(latestBlockHash)
   334  		pm.logNewlyAcceptedTransactions(preSyncHead)
   335  
   336  		log.Info(chainExtensionMessage, "hash", pm.blockchain.CurrentBlock().Hash())
   337  	} else {
   338  		// added for permissions changes to indicate node sync up has started
   339  		core.SetSyncStatus()
   340  		log.Info("blockchain is caught up; no need to synchronize")
   341  	}
   342  
   343  	snapMeta := raftSnapshot.Metadata
   344  	pm.confState = snapMeta.ConfState
   345  	pm.mu.Lock()
   346  	pm.snapshotIndex = snapMeta.Index
   347  	pm.mu.Unlock()
   348  }
   349  
   350  func (pm *ProtocolManager) syncBlockchainUntil(hash common.Hash) {
   351  	pm.mu.RLock()
   352  	peerMap := make(map[uint16]*Peer, len(pm.peers))
   353  	for raftId, peer := range pm.peers {
   354  		peerMap[raftId] = peer
   355  	}
   356  	pm.mu.RUnlock()
   357  
   358  	for {
   359  		for peerId, peer := range peerMap {
   360  			log.Info("synchronizing with peer", "peer id", peerId, "hash", hash)
   361  
   362  			peerId := peer.p2pNode.ID().String()
   363  			peerIdPrefix := fmt.Sprintf("%x", peer.p2pNode.ID().Bytes()[:8])
   364  
   365  			if err := pm.downloader.Synchronise(peerIdPrefix, hash, big.NewInt(0), downloader.BoundedFullSync); err != nil {
   366  				log.Info("failed to synchronize with peer", "peer id", peerId)
   367  
   368  				time.Sleep(500 * time.Millisecond)
   369  			} else {
   370  				return
   371  			}
   372  		}
   373  	}
   374  }
   375  
   376  func (pm *ProtocolManager) logNewlyAcceptedTransactions(preSyncHead *types.Block) {
   377  	newHead := pm.blockchain.CurrentBlock()
   378  	numBlocks := newHead.NumberU64() - preSyncHead.NumberU64()
   379  	blocks := make([]*types.Block, numBlocks)
   380  	currBlock := newHead
   381  	blocksSeen := 0
   382  	for currBlock.Hash() != preSyncHead.Hash() {
   383  		blocks[int(numBlocks)-(1+blocksSeen)] = currBlock
   384  
   385  		blocksSeen += 1
   386  		currBlock = pm.blockchain.GetBlockByHash(currBlock.ParentHash())
   387  	}
   388  	for _, block := range blocks {
   389  		for _, tx := range block.Transactions() {
   390  			log.EmitCheckpoint(log.TxAccepted, "tx", tx.Hash().Hex())
   391  		}
   392  	}
   393  }