github.com/bigzoro/my_simplechain@v0.0.0-20240315012955-8ad0a2a29bb9/consensus/raft/backend/handler.go (about) 1 package backend 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "net/http" 8 "net/url" 9 "os" 10 "strconv" 11 "sync" 12 "time" 13 14 "github.com/bigzoro/my_simplechain/common" 15 "github.com/bigzoro/my_simplechain/consensus/raft" 16 "github.com/bigzoro/my_simplechain/core" 17 "github.com/bigzoro/my_simplechain/core/types" 18 "github.com/bigzoro/my_simplechain/eth/downloader" 19 "github.com/bigzoro/my_simplechain/event" 20 "github.com/bigzoro/my_simplechain/log" 21 "github.com/bigzoro/my_simplechain/miner" 22 "github.com/bigzoro/my_simplechain/p2p" 23 "github.com/bigzoro/my_simplechain/p2p/enode" 24 "github.com/bigzoro/my_simplechain/p2p/enr" 25 "github.com/bigzoro/my_simplechain/rlp" 26 27 "github.com/coreos/etcd/etcdserver/stats" 28 "github.com/coreos/etcd/pkg/fileutil" 29 raftTypes "github.com/coreos/etcd/pkg/types" 30 etcdRaft "github.com/coreos/etcd/raft" 31 "github.com/coreos/etcd/raft/raftpb" 32 "github.com/coreos/etcd/rafthttp" 33 "github.com/coreos/etcd/snap" 34 "github.com/coreos/etcd/wal" 35 mapset "github.com/deckarep/golang-set" 36 "github.com/syndtr/goleveldb/leveldb" 37 etcdlog "log" 38 ) 39 40 type ProtocolManager struct { 41 mu sync.RWMutex // For protecting concurrent JS access to "local peer" and "remote peer" state 42 quitSync chan struct{} 43 stopped bool 44 45 // Static configuration 46 joinExisting bool // Whether to join an existing cluster when a WAL doesn't already exist 47 bootstrapNodes []*enode.Node 48 raftId uint16 49 raftPort uint16 50 raftConfigPath string // add wsw 51 // Local peer state (protected by mu vs concurrent access via JS) 52 address *raft.Address 53 role int // Role: minter or verifier 54 appliedIndex uint64 // The index of the last-applied raft entry 55 snapshotIndex uint64 // The index of the latest snapshot. 56 57 // Remote peer state (protected by mu vs concurrent access via JS) 58 leader uint16 59 peers map[uint16]*raft.Peer 60 removedPeers mapset.Set // *Permanently removed* peers 61 62 // P2P transport 63 p2pServer *p2p.Server // Initialized in start() 64 65 // Blockchain services 66 blockchain *core.BlockChain 67 downloader *downloader.Downloader 68 minter *miner.Miner 69 70 // Blockchain events 71 eventMux *event.TypeMux 72 minedBlockSub *event.TypeMuxSubscription 73 74 // Raft proposal events 75 blockProposalC chan *types.Block // for mined blocks to raft 76 confChangeProposalC chan raftpb.ConfChange // for config changes from js console to raft 77 78 // Raft transport 79 unsafeRawNode etcdRaft.Node 80 transport *rafthttp.Transport 81 httpstopc chan struct{} 82 httpdonec chan struct{} 83 84 // Raft snapshotting 85 snapshotter *snap.Snapshotter 86 snapdir string 87 confState raftpb.ConfState 88 89 // Raft write-ahead log 90 waldir string 91 wal *wal.WAL 92 93 // Storage 94 raftDb *leveldb.DB // Persistent storage for last-applied raft index 95 raftStorage *etcdRaft.MemoryStorage // Volatile raft storage 96 } 97 98 // 99 // Public interface 100 // 101 102 func NewProtocolManager(raftId uint16, raftPort uint16, blockchain *core.BlockChain, mux *event.TypeMux, bootstrapNodes []*enode.Node, joinExisting bool, datadir string, minter *miner.Miner, downloader *downloader.Downloader) (*ProtocolManager, error) { 103 waldir := fmt.Sprintf("%s/raft-wal", datadir) 104 snapdir := fmt.Sprintf("%s/raft-snap", datadir) 105 raftDbLoc := fmt.Sprintf("%s/raft-state", datadir) 106 raftConfigDir := fmt.Sprintf("%s/sipe", datadir) 107 108 manager := &ProtocolManager{ 109 bootstrapNodes: bootstrapNodes, 110 peers: make(map[uint16]*raft.Peer), 111 leader: uint16(etcdRaft.None), 112 removedPeers: mapset.NewSet(), 113 joinExisting: joinExisting, 114 blockchain: blockchain, 115 eventMux: mux, 116 blockProposalC: make(chan *types.Block), 117 confChangeProposalC: make(chan raftpb.ConfChange), 118 httpstopc: make(chan struct{}), 119 httpdonec: make(chan struct{}), 120 waldir: waldir, 121 snapdir: snapdir, 122 raftConfigPath: raftConfigDir, 123 snapshotter: snap.New(snapdir), 124 raftId: raftId, 125 raftPort: raftPort, 126 quitSync: make(chan struct{}), 127 raftStorage: etcdRaft.NewMemoryStorage(), 128 minter: minter, 129 downloader: downloader, 130 } 131 132 if db, err := openRaftDb(raftDbLoc); err != nil { 133 return nil, err 134 } else { 135 manager.raftDb = db 136 } 137 138 return manager, nil 139 } 140 141 func (pm *ProtocolManager) Start(p2pServer *p2p.Server) { 142 log.Info("starting raft protocol handler") 143 144 pm.p2pServer = p2pServer 145 pm.minedBlockSub = pm.eventMux.Subscribe(core.NewMinedBlockEvent{}) 146 pm.startRaft() 147 go pm.minedBroadcastLoop() 148 } 149 150 func (pm *ProtocolManager) Stop() { 151 pm.mu.Lock() 152 defer pm.mu.Unlock() 153 154 defer log.Info("raft protocol handler stopped") 155 156 if pm.stopped { 157 return 158 } 159 160 log.Info("stopping raft protocol handler...") 161 162 for raftId, peer := range pm.peers { 163 pm.disconnectFromPeer(raftId, peer) 164 } 165 166 pm.minedBlockSub.Unsubscribe() 167 168 if pm.transport != nil { 169 pm.transport.Stop() 170 } 171 172 close(pm.httpstopc) 173 <-pm.httpdonec 174 close(pm.quitSync) 175 176 if pm.unsafeRawNode != nil { 177 pm.unsafeRawNode.Stop() 178 } 179 180 pm.raftDb.Close() 181 182 pm.p2pServer = nil 183 184 pm.minter.Stop() 185 186 pm.stopped = true 187 } 188 189 func (pm *ProtocolManager) NodeInfo() *RaftNodeInfo { 190 pm.mu.RLock() // as we read role and peers 191 defer pm.mu.RUnlock() 192 193 var roleDescription string 194 if pm.role == raft.MinterRole { 195 roleDescription = "minter" 196 } else { 197 roleDescription = "verifier" 198 } 199 200 peerAddresses := make([]*raft.Address, len(pm.peers)) 201 peerIdx := 0 202 for _, peer := range pm.peers { 203 peerAddresses[peerIdx] = peer.Address 204 peerIdx += 1 205 } 206 207 removedPeerIfaces := pm.removedPeers 208 removedPeerIds := make([]uint16, removedPeerIfaces.Cardinality()) 209 i := 0 210 for removedIface := range removedPeerIfaces.Iterator().C { 211 removedPeerIds[i] = removedIface.(uint16) 212 i++ 213 } 214 215 // 216 // NOTE: before exposing any new fields here, make sure that the underlying 217 // ProtocolManager members are protected from concurrent access by pm.mu! 218 // 219 return &RaftNodeInfo{ 220 ClusterSize: len(pm.peers) + 1, 221 Role: roleDescription, 222 Address: pm.address, 223 PeerAddresses: peerAddresses, 224 RemovedPeerIds: removedPeerIds, 225 AppliedIndex: pm.appliedIndex, 226 SnapshotIndex: pm.snapshotIndex, 227 } 228 } 229 230 // There seems to be a very rare race in raft where during `etcdRaft.StartNode` 231 // it will call back our `Process` method before it's finished returning the 232 // `raft.Node`, `pm.unsafeRawNode`, to us. This re-entrance through a separate 233 // thread will cause a nil pointer dereference. To work around this, this 234 // getter method should be used instead of reading `pm.unsafeRawNode` directly. 235 func (pm *ProtocolManager) rawNode() etcdRaft.Node { 236 for pm.unsafeRawNode == nil { 237 time.Sleep(100 * time.Millisecond) 238 } 239 240 return pm.unsafeRawNode 241 } 242 243 func (pm *ProtocolManager) nextRaftId() uint16 { 244 pm.mu.RLock() 245 defer pm.mu.RUnlock() 246 247 maxId := pm.raftId 248 249 for peerId := range pm.peers { 250 if maxId < peerId { 251 maxId = peerId 252 } 253 } 254 255 removedPeerIfaces := pm.removedPeers 256 for removedIface := range removedPeerIfaces.Iterator().C { 257 removedId := removedIface.(uint16) 258 259 if maxId < removedId { 260 maxId = removedId 261 } 262 } 263 264 return maxId + 1 265 } 266 267 func (pm *ProtocolManager) isRaftIdRemoved(id uint16) bool { 268 pm.mu.RLock() 269 defer pm.mu.RUnlock() 270 271 return pm.removedPeers.Contains(id) 272 } 273 274 func (pm *ProtocolManager) isRaftIdUsed(raftId uint16) bool { 275 if pm.raftId == raftId || pm.isRaftIdRemoved(raftId) { 276 return true 277 } 278 279 pm.mu.RLock() 280 defer pm.mu.RUnlock() 281 282 return pm.peers[raftId] != nil 283 } 284 285 func (pm *ProtocolManager) isNodeAlreadyInCluster(node *enode.Node) error { 286 pm.mu.RLock() 287 defer pm.mu.RUnlock() 288 289 for _, peer := range pm.peers { 290 peerRaftId := peer.Address.RaftId 291 peerNode := peer.P2pNode 292 293 if peerNode.ID() == node.ID() { 294 return fmt.Errorf("node with this enode has already been added to the cluster: %s", node.ID()) 295 } 296 297 if peerNode.IP().Equal(node.IP()) { 298 if peerNode.TCP() == node.TCP() { 299 return fmt.Errorf("existing node %v with raft ID %v is already using eth p2p at %v:%v", peerNode.ID(), peerRaftId, node.IP(), node.TCP()) 300 } else if peer.Address.RaftPort == enr.RaftPort(node.RaftPort()) { 301 return fmt.Errorf("existing node %v with raft ID %v is already using raft at %v:%v", peerNode.ID(), peerRaftId, node.IP(), node.RaftPort()) 302 } 303 } 304 } 305 306 return nil 307 } 308 309 func (pm *ProtocolManager) ProposeNewPeer(enodeId string, raftId uint16) (uint16, error) { 310 node, err := enode.ParseV4(enodeId) 311 if err != nil { 312 return 0, err 313 } 314 315 if len(node.IP()) != 4 { 316 return 0, fmt.Errorf("expected IPv4 address (with length 4), but got IP of length %v", len(node.IP())) 317 } 318 319 if !node.HasRaftPort() { 320 return 0, fmt.Errorf("enodeId is missing raftport querystring parameter: %v", enodeId) 321 } 322 323 if err := pm.isNodeAlreadyInCluster(node); err != nil { 324 return 0, nil // wsw add 325 } 326 address := raft.NewAddress(raftId, node.RaftPort(), node) 327 328 pm.confChangeProposalC <- raftpb.ConfChange{ 329 Type: raftpb.ConfChangeAddNode, 330 NodeID: uint64(raftId), 331 Context: address.ToBytes(), 332 } 333 334 return raftId, nil 335 } 336 337 func (pm *ProtocolManager) ProposePeerRemoval(raftId uint16) { 338 pm.confChangeProposalC <- raftpb.ConfChange{ 339 Type: raftpb.ConfChangeRemoveNode, 340 NodeID: uint64(raftId), 341 } 342 } 343 344 // 345 // MsgWriter interface (necessary for p2p.Send) 346 // 347 348 func (pm *ProtocolManager) WriteMsg(msg p2p.Msg) error { 349 // read *into* buffer 350 var buffer = make([]byte, msg.Size) 351 msg.Payload.Read(buffer) 352 353 return pm.rawNode().Propose(context.TODO(), buffer) 354 } 355 356 // 357 // Raft interface 358 // 359 360 func (pm *ProtocolManager) Process(ctx context.Context, m raftpb.Message) error { 361 return pm.rawNode().Step(ctx, m) 362 } 363 364 func (pm *ProtocolManager) IsIDRemoved(id uint64) bool { 365 return pm.isRaftIdRemoved(uint16(id)) 366 } 367 368 func (pm *ProtocolManager) ReportUnreachable(id uint64) { 369 log.Info("peer is currently unreachable", "peer id", id) 370 371 pm.rawNode().ReportUnreachable(id) 372 } 373 374 func (pm *ProtocolManager) ReportSnapshot(id uint64, status etcdRaft.SnapshotStatus) { 375 if status == etcdRaft.SnapshotFailure { 376 log.Info("failed to send snapshot", "raft peer", id) 377 } else if status == etcdRaft.SnapshotFinish { 378 log.Info("finished sending snapshot", "raft peer", id) 379 } 380 381 pm.rawNode().ReportSnapshot(id, status) 382 } 383 384 // 385 // Private methods 386 // 387 388 func (pm *ProtocolManager) resetRaftId(enodeId string, raftId uint16) error { 389 node, err := enode.ParseV4(enodeId) 390 if err != nil { 391 return err 392 } 393 address := raft.NewAddress(raftId, node.RaftPort(), node) 394 pm.confChangeProposalC <- raftpb.ConfChange{ 395 Type: raftpb.ConfChangeUpdateNode, 396 NodeID: uint64(raftId), 397 Context: address.ToBytes(), 398 } 399 return nil 400 } 401 402 func (pm *ProtocolManager) startRaft() { 403 if !fileutil.Exist(pm.snapdir) { 404 if err := os.Mkdir(pm.snapdir, 0750); err != nil { 405 raft.Fatalf("cannot create dir for snapshot (%v)", err) 406 } 407 } 408 walExisted := wal.Exist(pm.waldir) 409 lastAppliedIndex := pm.loadAppliedIndex() 410 411 ss := &stats.ServerStats{} 412 ss.Initialize() 413 pm.transport = &rafthttp.Transport{ 414 ID: raftTypes.ID(pm.raftId), 415 ClusterID: 0x1000, 416 Raft: pm, 417 ServerStats: ss, 418 LeaderStats: stats.NewLeaderStats(strconv.Itoa(int(pm.raftId))), 419 ErrorC: make(chan error), 420 } 421 pm.transport.Start() 422 423 // We load the snapshot to connect to prev peers before replaying the WAL, 424 // which typically goes further into the future than the snapshot. 425 426 var maybeRaftSnapshot *raftpb.Snapshot 427 428 if walExisted { 429 maybeRaftSnapshot = pm.loadSnapshot() // re-establishes peer connections 430 } 431 432 pm.wal = pm.replayWAL(maybeRaftSnapshot) 433 434 if walExisted { 435 if hardState, _, err := pm.raftStorage.InitialState(); err != nil { 436 panic(fmt.Sprintf("failed to read initial state from raft while restarting: %v", err)) 437 } else { 438 if lastPersistedCommittedIndex := hardState.Commit; lastPersistedCommittedIndex < lastAppliedIndex { 439 log.Info("rolling back applied index to last-durably-committed", "last applied index", lastAppliedIndex, "last persisted index", lastPersistedCommittedIndex) 440 441 // Roll back our applied index. See the logic and explanation around 442 // the single call to `pm.applyNewChainHead` for more context. 443 lastAppliedIndex = lastPersistedCommittedIndex 444 } 445 } 446 } 447 448 // NOTE: cockroach sets this to false for now until they've "worked out the 449 // bugs" 450 enablePreVote := true 451 defaultLogger := &etcdRaft.DefaultLogger{Logger: etcdlog.New(os.Stderr, "raft", etcdlog.LstdFlags)} 452 defaultLogger.EnableDebug() 453 logger := etcdRaft.Logger(defaultLogger) 454 455 raftConfig := &etcdRaft.Config{ 456 Applied: lastAppliedIndex, 457 ID: uint64(pm.raftId), 458 ElectionTick: 10, // NOTE: cockroach sets this to 15 459 HeartbeatTick: 1, // NOTE: cockroach sets this to 5 460 Storage: pm.raftStorage, 461 462 // NOTE, from cockroach: 463 // "PreVote and CheckQuorum are two ways of achieving the same thing. 464 // PreVote is more compatible with quiesced ranges, so we want to switch 465 // to it once we've worked out the bugs." 466 // 467 // TODO: vendor again? 468 // PreVote: enablePreVote, 469 CheckQuorum: !enablePreVote, 470 471 // MaxSizePerMsg controls how many Raft log entries the leader will send to 472 // followers in a single MsgApp. 473 MaxSizePerMsg: 4096, // NOTE: in cockroachdb this is 16*1024 474 475 // MaxInflightMsgs controls how many in-flight messages Raft will send to 476 // a follower without hearing a response. The total number of Raft log 477 // entries is a combination of this setting and MaxSizePerMsg. 478 // 479 // NOTE: Cockroach's settings (MaxSizePerMsg of 4k and MaxInflightMsgs 480 // of 4) provide for up to 64 KB of raft log to be sent without 481 // acknowledgement. With an average entry size of 1 KB that translates 482 // to ~64 commands that might be executed in the handling of a single 483 // etcdraft.Ready operation. 484 MaxInflightMsgs: 256, // NOTE: in cockroachdb this is 4 485 Logger: logger, 486 } 487 raftConfig.Logger.Debug("raftlog startRaft", "raft ID", raftConfig.ID) 488 log.Info("startRaft", "raft ID", raftConfig.ID) 489 490 if walExisted { 491 log.Info("remounting an existing raft log; connecting to peers.") 492 pm.unsafeRawNode = etcdRaft.RestartNode(raftConfig) 493 } else if pm.joinExisting { 494 log.Info("newly joining an existing cluster; waiting for connections.") 495 pm.unsafeRawNode = etcdRaft.StartNode(raftConfig, nil) 496 } else { 497 if numPeers := len(pm.bootstrapNodes); numPeers == 0 { 498 panic("exiting due to empty raft peers list") 499 } else { 500 log.Info("starting a new raft log", "initial cluster size of", numPeers) 501 } 502 503 raftPeers, peerAddresses, localAddress := pm.makeInitialRaftPeers() 504 505 pm.setLocalAddress(localAddress) 506 507 // We add all peers up-front even though we will see a ConfChangeAddNode 508 // for each shortly. This is because raft's ConfState will contain all of 509 // these nodes before we see these log entries, and we always want our 510 // snapshots to have all addresses for each of the nodes in the ConfState. 511 for _, peerAddress := range peerAddresses { 512 pm.addPeer(peerAddress) 513 } 514 515 pm.unsafeRawNode = etcdRaft.StartNode(raftConfig, raftPeers) 516 } 517 518 go pm.serveRaft() 519 go pm.serveLocalProposals() 520 go pm.eventLoop() 521 go pm.handleRoleChange(pm.rawNode().RoleChan().Out()) 522 } 523 524 func (pm *ProtocolManager) setLocalAddress(addr *raft.Address) { 525 pm.mu.Lock() 526 pm.address = addr 527 pm.mu.Unlock() 528 529 // By setting `URLs` on the raft transport, we advertise our URL (in an HTTP 530 // header) to any recipient. This is necessary for a newcomer to the cluster 531 // to be able to accept a snapshot from us to bootstrap them. 532 if urls, err := raftTypes.NewURLs([]string{raftUrl(addr)}); err == nil { 533 pm.transport.URLs = urls 534 } else { 535 panic(fmt.Sprintf("error: could not create URL from local address: %v", addr)) 536 } 537 } 538 539 func (pm *ProtocolManager) serveRaft() { 540 urlString := fmt.Sprintf("http://0.0.0.0:%d", pm.raftPort) 541 url, err := url.Parse(urlString) 542 if err != nil { 543 raft.Fatalf("Failed parsing URL (%v)", err) 544 } 545 546 listener, err := raft.NewStoppableListener(url.Host, pm.httpstopc) 547 if err != nil { 548 raft.Fatalf("Failed to listen rafthttp (%v)", err) 549 } 550 err = (&http.Server{Handler: pm.transport.Handler()}).Serve(listener) 551 552 select { 553 case <-pm.httpstopc: 554 default: 555 raft.Fatalf("Failed to serve rafthttp (%v)", err) 556 } 557 close(pm.httpdonec) 558 } 559 560 func (pm *ProtocolManager) handleRoleChange(roleC <-chan interface{}) { 561 for { 562 select { 563 case role := <-roleC: 564 intRole, ok := role.(int) 565 566 if !ok { 567 panic("Couldn't cast role to int") 568 } 569 570 if intRole == raft.MinterRole { 571 pm.minter.Start(common.Address{}) 572 } else { // verifier 573 pm.minter.Stop() 574 } 575 576 pm.mu.Lock() 577 pm.role = intRole 578 pm.mu.Unlock() 579 580 case <-pm.quitSync: 581 return 582 } 583 } 584 } 585 586 func (pm *ProtocolManager) minedBroadcastLoop() { 587 for obj := range pm.minedBlockSub.Chan() { 588 switch ev := obj.Data.(type) { 589 case core.NewMinedBlockEvent: 590 select { 591 case pm.blockProposalC <- ev.Block: 592 case <-pm.quitSync: 593 return 594 } 595 } 596 } 597 } 598 599 // Serve two channels to handle new blocks and raft configuration changes originating locally. 600 func (pm *ProtocolManager) serveLocalProposals() { 601 // 602 // TODO: does it matter that this will restart from 0 whenever we restart a cluster? 603 // 604 var confChangeCount uint64 605 606 for { 607 select { 608 case block, ok := <-pm.blockProposalC: 609 if !ok { 610 log.Info("error: read from blockProposalC failed") 611 return 612 } 613 614 size, r, err := rlp.EncodeToReader(block) 615 if err != nil { 616 panic(fmt.Sprintf("error: failed to send RLP-encoded block: %s", err.Error())) 617 } 618 var buffer = make([]byte, uint32(size)) 619 r.Read(buffer) 620 621 // blocks until accepted by the raft state machine 622 pm.rawNode().Propose(context.TODO(), buffer) 623 case cc, ok := <-pm.confChangeProposalC: 624 if !ok { 625 log.Info("error: read from confChangeProposalC failed") 626 return 627 } 628 629 confChangeCount++ 630 cc.ID = confChangeCount 631 pm.rawNode().ProposeConfChange(context.TODO(), cc) 632 case <-pm.quitSync: 633 return 634 } 635 } 636 } 637 638 func (pm *ProtocolManager) entriesToApply(allEntries []raftpb.Entry) (entriesToApply []raftpb.Entry) { 639 if len(allEntries) == 0 { 640 return 641 } 642 643 first := allEntries[0].Index 644 pm.mu.RLock() 645 lastApplied := pm.appliedIndex 646 pm.mu.RUnlock() 647 648 if first > lastApplied+1 { 649 raft.Fatalf("first index of committed entry[%d] should <= appliedIndex[%d] + 1", first, lastApplied) 650 } 651 652 firstToApply := lastApplied - first + 1 653 654 if firstToApply < uint64(len(allEntries)) { 655 entriesToApply = allEntries[firstToApply:] 656 } 657 return 658 } 659 660 func raftUrl(address *raft.Address) string { 661 return fmt.Sprintf("http://%s:%d", address.Ip, address.RaftPort) 662 } 663 664 func (pm *ProtocolManager) addPeer(address *raft.Address) { 665 pm.mu.Lock() 666 defer pm.mu.Unlock() 667 668 raftId := address.RaftId 669 670 //Quorum - RAFT - derive pubkey from nodeId 671 pubKey, err := enode.HexPubkey(address.NodeId.String()) 672 if err != nil { 673 log.Error("error decoding pub key from enodeId", "enodeId", address.NodeId.String(), "err", err) 674 panic(err) 675 } 676 677 // Add P2P connection: 678 p2pNode := enode.NewV4WithRaft(pubKey, address.Ip, int(address.P2pPort), 0, int(address.RaftPort)) 679 pm.p2pServer.AddPeer(p2pNode) 680 681 // Add raft transport connection: 682 pm.transport.AddPeer(raftTypes.ID(raftId), []string{raftUrl(address)}) 683 pm.peers[raftId] = &raft.Peer{Address: address, P2pNode: p2pNode} 684 } 685 686 func (pm *ProtocolManager) disconnectFromPeer(raftId uint16, peer *raft.Peer) { 687 pm.p2pServer.RemovePeer(peer.P2pNode) 688 pm.transport.RemovePeer(raftTypes.ID(raftId)) 689 } 690 691 func (pm *ProtocolManager) removePeer(raftId uint16) { 692 pm.mu.Lock() 693 defer pm.mu.Unlock() 694 695 if peer := pm.peers[raftId]; peer != nil { 696 pm.disconnectFromPeer(raftId, peer) 697 698 delete(pm.peers, raftId) 699 } 700 701 // This is only necessary sometimes, but it's idempotent. Also, we *always* 702 // do this, and not just when there's still a peer in the map, because we 703 // need to do it for our *own* raft ID before we get booted from the cluster 704 // so that snapshots are identical on all nodes. It's important for a booted 705 // node to have a snapshot identical to every other node because that node 706 // can potentially re-enter the cluster with a new raft ID. 707 pm.removedPeers.Add(raftId) 708 } 709 710 func (pm *ProtocolManager) eventLoop() { 711 ticker := time.NewTicker(raft.TickerMS * time.Millisecond) 712 defer ticker.Stop() 713 defer pm.wal.Close() 714 715 exitAfterApplying := false 716 717 for { 718 select { 719 case <-ticker.C: 720 pm.rawNode().Tick() 721 722 // when the node is first ready it gives us entries to commit and messages 723 // to immediately publish 724 case rd := <-pm.rawNode().Ready(): 725 pm.wal.Save(rd.HardState, rd.Entries) 726 727 if rd.SoftState != nil { 728 pm.updateLeader(rd.SoftState.Lead) 729 } 730 731 if snap := rd.Snapshot; !etcdRaft.IsEmptySnap(snap) { 732 pm.saveRaftSnapshot(snap) 733 pm.applyRaftSnapshot(snap) 734 pm.advanceAppliedIndex(snap.Metadata.Index) 735 } 736 737 // 1: Write HardState, Entries, and Snapshot to persistent storage if they 738 // are not empty. 739 pm.raftStorage.Append(rd.Entries) 740 741 // 2: Send all Messages to the nodes named in the To field. 742 pm.transport.Send(rd.Messages) 743 744 // 3: Apply Snapshot (if any) and CommittedEntries to the state machine. 745 for _, entry := range pm.entriesToApply(rd.CommittedEntries) { 746 switch entry.Type { 747 case raftpb.EntryNormal: 748 if len(entry.Data) == 0 { 749 break 750 } 751 var block types.Block 752 err := rlp.DecodeBytes(entry.Data, &block) 753 if err != nil { 754 log.Error("error decoding block: ", err) 755 } 756 757 if pm.blockchain.HasBlock(block.Hash(), block.NumberU64()) { 758 // This can happen: 759 // 760 // if (1) we crashed after applying this block to the chain, but 761 // before writing appliedIndex to LDB. 762 // or (2) we crashed in a scenario where we applied further than 763 // raft *durably persisted* its committed index (see 764 // https://github.com/coreos/etcd/pull/7899). In this 765 // scenario, when the node comes back up, we will re-apply 766 // a few entries. 767 768 headBlockHash := pm.blockchain.CurrentBlock().Hash() 769 log.Warn("not applying already-applied block", "block hash", block.Hash(), "parent", block.ParentHash(), "head", headBlockHash) 770 } else { 771 pm.applyNewChainHead(&block) 772 } 773 774 case raftpb.EntryConfChange: 775 var cc raftpb.ConfChange 776 cc.Unmarshal(entry.Data) 777 raftId := uint16(cc.NodeID) 778 779 pm.confState = *pm.rawNode().ApplyConfChange(cc) 780 781 forceSnapshot := false 782 783 switch cc.Type { 784 case raftpb.ConfChangeAddNode: 785 if pm.isRaftIdRemoved(raftId) { 786 log.Info("ignoring ConfChangeAddNode for permanently-removed peer", "raft id", raftId) 787 } else if pm.isRaftIdUsed(raftId) && raftId <= uint16(len(pm.bootstrapNodes)) { 788 // See initial cluster logic in startRaft() for more information. 789 log.Info("ignoring expected ConfChangeAddNode for initial peer", "raft id", raftId) 790 791 // We need a snapshot to exist to reconnect to peers on start-up after a crash. 792 forceSnapshot = true 793 } else if pm.isRaftIdUsed(raftId) { 794 log.Info("ignoring ConfChangeAddNode for already-used raft ID", "raft id", raftId) 795 } else { 796 log.Info("adding peer due to ConfChangeAddNode", "raft id", raftId) 797 798 forceSnapshot = true 799 pm.addPeer(raft.BytesToAddress(cc.Context)) 800 } 801 802 case raftpb.ConfChangeRemoveNode: 803 if pm.isRaftIdRemoved(raftId) { 804 log.Info("ignoring ConfChangeRemoveNode for already-removed peer", "raft id", raftId) 805 } else { 806 log.Info("removing peer due to ConfChangeRemoveNode", "raft id", raftId) 807 808 forceSnapshot = true 809 810 if raftId == pm.raftId { 811 exitAfterApplying = true 812 } 813 814 pm.removePeer(raftId) 815 } 816 817 case raftpb.ConfChangeUpdateNode: 818 // NOTE: remember to forceSnapshot in this case, if we add support 819 // for this. 820 raft.Fatalf("not yet handled: ConfChangeUpdateNode") 821 } 822 823 if forceSnapshot { 824 // We force a snapshot here to persist our updated confState, so we 825 // know our fellow cluster members when we come back online. 826 // 827 // It is critical here to snapshot *before* writing our applied 828 // index in LevelDB, otherwise a crash while/before snapshotting 829 // (after advancing our applied index) would result in the loss of a 830 // cluster member upon restart: we would re-mount with an old 831 // ConfState. 832 pm.triggerSnapshot(entry.Index) 833 } 834 } 835 836 pm.advanceAppliedIndex(entry.Index) 837 } 838 839 pm.maybeTriggerSnapshot() 840 841 if exitAfterApplying { 842 log.Warn("permanently removing self from the cluster") 843 pm.Stop() 844 log.Warn("permanently exited the cluster") 845 846 return 847 } 848 849 // 4: Call Node.Advance() to signal readiness for the next batch of 850 // updates. 851 pm.rawNode().Advance() 852 853 case <-pm.quitSync: 854 return 855 } 856 } 857 } 858 859 func (pm *ProtocolManager) makeInitialRaftPeers() (raftPeers []etcdRaft.Peer, peerAddresses []*raft.Address, localAddress *raft.Address) { 860 initialNodes := pm.bootstrapNodes 861 raftPeers = make([]etcdRaft.Peer, len(initialNodes)) // Entire cluster 862 peerAddresses = make([]*raft.Address, len(initialNodes)-1) // Cluster without *this* node 863 864 peersSeen := 0 865 for i, node := range initialNodes { 866 raftId, err := raft.GetRaftConfigJson(pm.raftConfigPath) 867 if err != nil { 868 panic(err) 869 } 870 log.Info("makeInitialRaftPeers", "raft id ", raftId, "pm.raftid", pm.raftId) 871 // We initially get the raftPort from the enode ID's query string. As an alternative, we can move away from 872 // requiring the use of static peers for the initial set, and load them from e.g. another JSON file which 873 // contains pairs of enodes and raft ports, or we can get this initial peer list from commandline flags. 874 address := raft.NewAddress(raftId, node.RaftPort(), node) 875 raftPeers[i] = etcdRaft.Peer{ 876 ID: uint64(raftId), 877 Context: address.ToBytes(), 878 } 879 880 if raftId == pm.raftId { 881 localAddress = address 882 } else { 883 peerAddresses[peersSeen] = address 884 peersSeen += 1 885 } 886 } 887 888 return 889 } 890 891 func blockExtendsChain(block *types.Block, chain *core.BlockChain) bool { 892 return block.ParentHash() == chain.CurrentBlock().Hash() 893 } 894 895 func (pm *ProtocolManager) applyNewChainHead(block *types.Block) { 896 if !blockExtendsChain(block, pm.blockchain) { 897 headBlock := pm.blockchain.CurrentBlock() 898 log.Info("Non-extending block", "block", block.Hash(), "parent", block.ParentHash(), "head", headBlock.Hash()) 899 pm.minter.InvalidRaftOrdering() <- raft.InvalidRaftOrdering{HeadBlock: headBlock, InvalidBlock: block} 900 901 } else { 902 if existingBlock := pm.blockchain.GetBlockByHash(block.Hash()); nil == existingBlock { 903 if err := pm.blockchain.Validator().ValidateBody(block); err != nil { 904 panic(fmt.Sprintf("failed to validate block %x (%v)", block.Hash(), err)) 905 } 906 } 907 908 _, err := pm.blockchain.InsertChain([]*types.Block{block}) 909 910 if err != nil { 911 panic(fmt.Sprintf("failed to extend chain: %s", err.Error())) 912 } 913 } 914 } 915 916 // Sets new appliedIndex in-memory, *and* writes this appliedIndex to LevelDB. 917 func (pm *ProtocolManager) advanceAppliedIndex(index uint64) { 918 pm.writeAppliedIndex(index) 919 920 pm.mu.Lock() 921 pm.appliedIndex = index 922 pm.mu.Unlock() 923 } 924 925 func (pm *ProtocolManager) updateLeader(leader uint64) { 926 pm.mu.Lock() 927 defer pm.mu.Unlock() 928 929 pm.leader = uint16(leader) 930 } 931 932 // The Address for the current leader, or an error if no leader is elected. 933 func (pm *ProtocolManager) LeaderAddress() (*raft.Address, error) { 934 pm.mu.RLock() 935 defer pm.mu.RUnlock() 936 937 if raft.MinterRole == pm.role { 938 return pm.address, nil 939 } else if l, ok := pm.peers[pm.leader]; ok { 940 return l.Address, nil 941 } 942 // We expect to reach this if pm.leader is 0, which is how etcd denotes the lack of a leader. 943 return nil, errors.New("no leader is currently elected") 944 } 945 946 // Returns the raft id for a given enodeId 947 func (pm *ProtocolManager) FetchRaftId(enodeId string) (uint16, error) { 948 node, err := enode.ParseV4(enodeId) 949 if err != nil { 950 return 0, err 951 } 952 for raftId, peer := range pm.peers { 953 if peer.P2pNode.ID() == node.ID() { 954 return raftId, nil 955 } 956 } 957 return 0, fmt.Errorf("node not found in the cluster: %v", enodeId) 958 } 959 960 func (pm *ProtocolManager) MaxRaftId() uint16 { 961 maxId := pm.raftId 962 963 for peerId := range pm.peers { 964 if maxId < peerId { 965 maxId = peerId 966 } 967 } 968 return maxId 969 }