github.com/kisexp/xdchain@v0.0.0-20211206025815-490d6b732aa7/raft/snapshot.go (about) 1 package raft 2 3 import ( 4 "bytes" 5 "fmt" 6 "io" 7 "math/big" 8 "net" 9 "sort" 10 "time" 11 12 "github.com/coreos/etcd/raft/raftpb" 13 "github.com/coreos/etcd/snap" 14 "github.com/coreos/etcd/wal/walpb" 15 mapset "github.com/deckarep/golang-set" 16 "github.com/kisexp/xdchain/common" 17 "github.com/kisexp/xdchain/core/types" 18 "github.com/kisexp/xdchain/eth/downloader" 19 "github.com/kisexp/xdchain/log" 20 "github.com/kisexp/xdchain/p2p/enode" 21 "github.com/kisexp/xdchain/p2p/enr" 22 "github.com/kisexp/xdchain/permission/core" 23 "github.com/kisexp/xdchain/rlp" 24 ) 25 26 type SnapshotWithHostnames struct { 27 Addresses []Address 28 RemovedRaftIds []uint16 29 HeadBlockHash common.Hash 30 } 31 32 type AddressWithoutHostname struct { 33 RaftId uint16 34 NodeId enode.EnodeID 35 Ip net.IP 36 P2pPort enr.TCP 37 RaftPort enr.RaftPort 38 } 39 40 type SnapshotWithoutHostnames struct { 41 Addresses []AddressWithoutHostname 42 RemovedRaftIds []uint16 // Raft IDs for permanently removed peers 43 HeadBlockHash common.Hash 44 } 45 46 type ByRaftId []Address 47 48 func (a ByRaftId) Len() int { return len(a) } 49 func (a ByRaftId) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 50 func (a ByRaftId) Less(i, j int) bool { return a[i].RaftId < a[j].RaftId } 51 52 func (pm *ProtocolManager) buildSnapshot() *SnapshotWithHostnames { 53 pm.mu.RLock() 54 defer pm.mu.RUnlock() 55 56 numNodes := len(pm.confState.Nodes) + len(pm.confState.Learners) 57 numRemovedNodes := pm.removedPeers.Cardinality() 58 59 snapshot := &SnapshotWithHostnames{ 60 Addresses: make([]Address, numNodes), 61 RemovedRaftIds: make([]uint16, numRemovedNodes), 62 HeadBlockHash: pm.blockchain.CurrentBlock().Hash(), 63 } 64 65 // Populate addresses 66 67 for i, rawRaftId := range append(pm.confState.Nodes, pm.confState.Learners...) { 68 raftId := uint16(rawRaftId) 69 70 if raftId == pm.raftId { 71 snapshot.Addresses[i] = *pm.address 72 } else { 73 snapshot.Addresses[i] = *pm.peers[raftId].address 74 } 75 } 76 sort.Sort(ByRaftId(snapshot.Addresses)) 77 78 // Populate removed IDs 79 i := 0 80 for removedIface := range pm.removedPeers.Iterator().C { 81 snapshot.RemovedRaftIds[i] = removedIface.(uint16) 82 i++ 83 } 84 return snapshot 85 } 86 87 // Note that we do *not* read `pm.appliedIndex` here. We only use the `index` 88 // parameter instead. This is because we need to support a scenario when we 89 // snapshot for a future index that we have not yet recorded in LevelDB. See 90 // comments around the use of `forceSnapshot`. 91 func (pm *ProtocolManager) triggerSnapshot(index uint64) { 92 pm.mu.RLock() 93 snapshotIndex := pm.snapshotIndex 94 pm.mu.RUnlock() 95 96 log.Info("start snapshot", "applied index", pm.appliedIndex, "last snapshot index", snapshotIndex) 97 98 //snapData := pm.blockchain.CurrentBlock().Hash().Bytes() 99 //snap, err := pm.raftStorage.CreateSnapshot(pm.appliedIndex, &pm.confState, snapData) 100 snapData := pm.buildSnapshot().toBytes() 101 snap, err := pm.raftStorage.CreateSnapshot(index, &pm.confState, snapData) 102 if err != nil { 103 panic(err) 104 } 105 if err := pm.saveRaftSnapshot(snap); err != nil { 106 panic(err) 107 } 108 // Discard all log entries prior to index. 109 if err := pm.raftStorage.Compact(index); err != nil { 110 panic(err) 111 } 112 log.Info("compacted log", "index", pm.appliedIndex) 113 114 pm.mu.Lock() 115 pm.snapshotIndex = index 116 pm.mu.Unlock() 117 } 118 119 func confStateIdSet(confState raftpb.ConfState) mapset.Set { 120 set := mapset.NewSet() 121 for _, rawRaftId := range append(confState.Nodes, confState.Learners...) { 122 set.Add(uint16(rawRaftId)) 123 } 124 return set 125 } 126 127 func (pm *ProtocolManager) updateClusterMembership(newConfState raftpb.ConfState, addresses []Address, removedRaftIds []uint16) { 128 log.Info("updating cluster membership per raft snapshot") 129 130 prevConfState := pm.confState 131 132 // Update tombstones for permanently removed peers. For simplicity we do not 133 // allow the re-use of peer IDs once a peer is removed. 134 135 removedPeers := mapset.NewSet() 136 for _, removedRaftId := range removedRaftIds { 137 removedPeers.Add(removedRaftId) 138 } 139 pm.mu.Lock() 140 pm.removedPeers = removedPeers 141 pm.mu.Unlock() 142 143 // Remove old peers that we're still connected to 144 145 prevIds := confStateIdSet(prevConfState) 146 newIds := confStateIdSet(newConfState) 147 idsToRemove := prevIds.Difference(newIds) 148 for idIfaceToRemove := range idsToRemove.Iterator().C { 149 raftId := idIfaceToRemove.(uint16) 150 log.Info("removing old raft peer", "peer id", raftId) 151 152 pm.removePeer(raftId) 153 } 154 155 // Update local and remote addresses 156 157 for _, tempAddress := range addresses { 158 address := tempAddress // Allocate separately on the heap for each iteration. 159 160 if address.RaftId == pm.raftId { 161 // If we're a newcomer to an existing cluster, this is where we learn 162 // our own Address. 163 pm.setLocalAddress(&address) 164 } else { 165 pm.mu.RLock() 166 existingPeer := pm.peers[address.RaftId] 167 pm.mu.RUnlock() 168 169 if existingPeer == nil { 170 log.Info("adding new raft peer", "raft id", address.RaftId) 171 pm.addPeer(&address) 172 } 173 } 174 } 175 176 pm.mu.Lock() 177 pm.confState = newConfState 178 pm.mu.Unlock() 179 180 log.Info("updated cluster membership") 181 } 182 183 func (pm *ProtocolManager) maybeTriggerSnapshot() { 184 pm.mu.RLock() 185 appliedIndex := pm.appliedIndex 186 entriesSinceLastSnap := appliedIndex - pm.snapshotIndex 187 pm.mu.RUnlock() 188 189 if entriesSinceLastSnap < snapshotPeriod { 190 return 191 } 192 193 pm.triggerSnapshot(appliedIndex) 194 } 195 196 func (pm *ProtocolManager) loadSnapshot() *raftpb.Snapshot { 197 if raftSnapshot := pm.readRaftSnapshot(); raftSnapshot != nil { 198 log.Info("loading snapshot") 199 pm.applyRaftSnapshot(*raftSnapshot) 200 201 return raftSnapshot 202 } else { 203 log.Info("no snapshot to load") 204 205 return nil 206 } 207 } 208 209 func (snapshot *SnapshotWithHostnames) toBytes() []byte { 210 var ( 211 useOldSnapshot bool 212 oldSnapshot SnapshotWithoutHostnames 213 toEncode interface{} 214 ) 215 216 // use old snapshot if all snapshot.Addresses are ips 217 // but use the new snapshot if any of it is a hostname 218 useOldSnapshot = true 219 oldSnapshot.HeadBlockHash, oldSnapshot.RemovedRaftIds = snapshot.HeadBlockHash, snapshot.RemovedRaftIds 220 oldSnapshot.Addresses = make([]AddressWithoutHostname, len(snapshot.Addresses)) 221 222 for index, addrWithHost := range snapshot.Addresses { 223 // validate addrWithHost.Hostname is a hostname/ip 224 ip := net.ParseIP(addrWithHost.Hostname) 225 if ip == nil { 226 // this is a hostname 227 useOldSnapshot = false 228 break 229 } 230 // this is an ip 231 oldSnapshot.Addresses[index] = AddressWithoutHostname{ 232 addrWithHost.RaftId, 233 addrWithHost.NodeId, 234 ip, 235 addrWithHost.P2pPort, 236 addrWithHost.RaftPort, 237 } 238 } 239 240 if useOldSnapshot { 241 toEncode = oldSnapshot 242 } else { 243 toEncode = snapshot 244 } 245 buffer, err := rlp.EncodeToBytes(toEncode) 246 if err != nil { 247 panic(fmt.Sprintf("error: failed to RLP-encode Snapshot: %s", err.Error())) 248 } 249 return buffer 250 } 251 252 func bytesToSnapshot(input []byte) *SnapshotWithHostnames { 253 var err, errOld error 254 255 snapshot := new(SnapshotWithHostnames) 256 streamNewSnapshot := rlp.NewStream(bytes.NewReader(input), 0) 257 if err = streamNewSnapshot.Decode(snapshot); err == nil { 258 return snapshot 259 } 260 261 // Build new snapshot with hostname from legacy Address struct 262 snapshotOld := new(SnapshotWithoutHostnames) 263 streamOldSnapshot := rlp.NewStream(bytes.NewReader(input), 0) 264 if errOld = streamOldSnapshot.Decode(snapshotOld); errOld == nil { 265 var snapshotConverted SnapshotWithHostnames 266 snapshotConverted.RemovedRaftIds, snapshotConverted.HeadBlockHash = snapshotOld.RemovedRaftIds, snapshotOld.HeadBlockHash 267 snapshotConverted.Addresses = make([]Address, len(snapshotOld.Addresses)) 268 269 for index, oldAddrWithIp := range snapshotOld.Addresses { 270 snapshotConverted.Addresses[index] = Address{ 271 RaftId: oldAddrWithIp.RaftId, 272 NodeId: oldAddrWithIp.NodeId, 273 Ip: nil, 274 P2pPort: oldAddrWithIp.P2pPort, 275 RaftPort: oldAddrWithIp.RaftPort, 276 Hostname: oldAddrWithIp.Ip.String(), 277 } 278 } 279 280 return &snapshotConverted 281 } 282 283 fatalf("failed to RLP-decode Snapshot: %v, %v", err, errOld) 284 return nil 285 } 286 287 func (snapshot *SnapshotWithHostnames) EncodeRLP(w io.Writer) error { 288 return rlp.Encode(w, []interface{}{snapshot.Addresses, snapshot.RemovedRaftIds, snapshot.HeadBlockHash}) 289 } 290 291 // Raft snapshot 292 293 func (pm *ProtocolManager) saveRaftSnapshot(snap raftpb.Snapshot) error { 294 if err := pm.snapshotter.SaveSnap(snap); err != nil { 295 return err 296 } 297 298 walSnap := walpb.Snapshot{ 299 Index: snap.Metadata.Index, 300 Term: snap.Metadata.Term, 301 } 302 303 if err := pm.wal.SaveSnapshot(walSnap); err != nil { 304 return err 305 } 306 307 return pm.wal.ReleaseLockTo(snap.Metadata.Index) 308 } 309 310 func (pm *ProtocolManager) readRaftSnapshot() *raftpb.Snapshot { 311 snapshot, err := pm.snapshotter.Load() 312 if err != nil && err != snap.ErrNoSnapshot { 313 fatalf("error loading snapshot: %v", err) 314 } 315 316 return snapshot 317 } 318 319 func (pm *ProtocolManager) applyRaftSnapshot(raftSnapshot raftpb.Snapshot) { 320 log.Info("applying snapshot to raft storage") 321 if err := pm.raftStorage.ApplySnapshot(raftSnapshot); err != nil { 322 fatalf("failed to apply snapshot: %s", err) 323 } 324 snapshot := bytesToSnapshot(raftSnapshot.Data) 325 326 latestBlockHash := snapshot.HeadBlockHash 327 328 pm.updateClusterMembership(raftSnapshot.Metadata.ConfState, snapshot.Addresses, snapshot.RemovedRaftIds) 329 330 preSyncHead := pm.blockchain.CurrentBlock() 331 332 if latestBlock := pm.blockchain.GetBlockByHash(latestBlockHash); latestBlock == nil { 333 pm.syncBlockchainUntil(latestBlockHash) 334 pm.logNewlyAcceptedTransactions(preSyncHead) 335 336 log.Info(chainExtensionMessage, "hash", pm.blockchain.CurrentBlock().Hash()) 337 } else { 338 // added for permissions changes to indicate node sync up has started 339 core.SetSyncStatus() 340 log.Info("blockchain is caught up; no need to synchronize") 341 } 342 343 snapMeta := raftSnapshot.Metadata 344 pm.confState = snapMeta.ConfState 345 pm.mu.Lock() 346 pm.snapshotIndex = snapMeta.Index 347 pm.mu.Unlock() 348 } 349 350 func (pm *ProtocolManager) syncBlockchainUntil(hash common.Hash) { 351 pm.mu.RLock() 352 peerMap := make(map[uint16]*Peer, len(pm.peers)) 353 for raftId, peer := range pm.peers { 354 peerMap[raftId] = peer 355 } 356 pm.mu.RUnlock() 357 358 for { 359 for peerId, peer := range peerMap { 360 log.Info("synchronizing with peer", "peer id", peerId, "hash", hash) 361 362 peerId := peer.p2pNode.ID().String() 363 peerIdPrefix := fmt.Sprintf("%x", peer.p2pNode.ID().Bytes()[:8]) 364 365 if err := pm.downloader.Synchronise(peerIdPrefix, hash, big.NewInt(0), downloader.BoundedFullSync); err != nil { 366 log.Info("failed to synchronize with peer", "peer id", peerId) 367 368 time.Sleep(500 * time.Millisecond) 369 } else { 370 return 371 } 372 } 373 } 374 } 375 376 func (pm *ProtocolManager) logNewlyAcceptedTransactions(preSyncHead *types.Block) { 377 newHead := pm.blockchain.CurrentBlock() 378 numBlocks := newHead.NumberU64() - preSyncHead.NumberU64() 379 blocks := make([]*types.Block, numBlocks) 380 currBlock := newHead 381 blocksSeen := 0 382 for currBlock.Hash() != preSyncHead.Hash() { 383 blocks[int(numBlocks)-(1+blocksSeen)] = currBlock 384 385 blocksSeen += 1 386 currBlock = pm.blockchain.GetBlockByHash(currBlock.ParentHash()) 387 } 388 for _, block := range blocks { 389 for _, tx := range block.Transactions() { 390 log.EmitCheckpoint(log.TxAccepted, "tx", tx.Hash().Hex()) 391 } 392 } 393 }