github.com/hechain20/hechain@v0.0.0-20220316014945-b544036ba106/orderer/consensus/etcdraft/chain.go (about) 1 /* 2 Copyright hechain. All Rights Reserved. 3 4 SPDX-License-Identifier: Apache-2.0 5 */ 6 7 package etcdraft 8 9 import ( 10 "context" 11 "encoding/pem" 12 "fmt" 13 "sync" 14 "sync/atomic" 15 "time" 16 17 "code.cloudfoundry.org/clock" 18 "github.com/golang/protobuf/proto" 19 "github.com/hechain20/hechain/bccsp" 20 "github.com/hechain20/hechain/common/channelconfig" 21 "github.com/hechain20/hechain/common/flogging" 22 "github.com/hechain20/hechain/orderer/common/cluster" 23 "github.com/hechain20/hechain/orderer/common/types" 24 "github.com/hechain20/hechain/orderer/consensus" 25 "github.com/hechain20/hechain/protoutil" 26 "github.com/hyperledger/fabric-protos-go/common" 27 "github.com/hyperledger/fabric-protos-go/orderer" 28 "github.com/hyperledger/fabric-protos-go/orderer/etcdraft" 29 "github.com/pkg/errors" 30 "go.etcd.io/etcd/raft" 31 "go.etcd.io/etcd/raft/raftpb" 32 "go.etcd.io/etcd/wal" 33 ) 34 35 const ( 36 BYTE = 1 << (10 * iota) 37 KILOBYTE 38 MEGABYTE 39 GIGABYTE 40 TERABYTE 41 ) 42 43 const ( 44 // DefaultSnapshotCatchUpEntries is the default number of entries 45 // to preserve in memory when a snapshot is taken. This is for 46 // slow followers to catch up. 47 DefaultSnapshotCatchUpEntries = uint64(4) 48 49 // DefaultSnapshotIntervalSize is the default snapshot interval. It is 50 // used if SnapshotIntervalSize is not provided in channel config options. 51 // It is needed to enforce snapshot being set. 52 DefaultSnapshotIntervalSize = 16 * MEGABYTE 53 54 // DefaultEvictionSuspicion is the threshold that a node will start 55 // suspecting its own eviction if it has been leaderless for this 56 // period of time. 57 DefaultEvictionSuspicion = time.Minute * 10 58 59 // DefaultLeaderlessCheckInterval is the interval that a chain checks 60 // its own leadership status. 61 DefaultLeaderlessCheckInterval = time.Second * 10 62 ) 63 64 //go:generate counterfeiter -o mocks/configurator.go . Configurator 65 66 // Configurator is used to configure the communication layer 67 // when the chain starts. 68 type Configurator interface { 69 Configure(channel string, newNodes []cluster.RemoteNode) 70 } 71 72 //go:generate counterfeiter -o mocks/mock_rpc.go . RPC 73 74 // RPC is used to mock the transport layer in tests. 75 type RPC interface { 76 SendConsensus(dest uint64, msg *orderer.ConsensusRequest) error 77 SendSubmit(dest uint64, request *orderer.SubmitRequest, report func(err error)) error 78 } 79 80 //go:generate counterfeiter -o mocks/mock_blockpuller.go . BlockPuller 81 82 // BlockPuller is used to pull blocks from other OSN 83 type BlockPuller interface { 84 PullBlock(seq uint64) *common.Block 85 HeightsByEndpoints() (map[string]uint64, error) 86 Close() 87 } 88 89 // CreateBlockPuller is a function to create BlockPuller on demand. 90 // It is passed into chain initializer so that tests could mock this. 91 type CreateBlockPuller func() (BlockPuller, error) 92 93 // Options contains all the configurations relevant to the chain. 94 type Options struct { 95 RPCTimeout time.Duration 96 RaftID uint64 97 98 Clock clock.Clock 99 100 WALDir string 101 SnapDir string 102 SnapshotIntervalSize uint32 103 104 // This is configurable mainly for testing purpose. Users are not 105 // expected to alter this. Instead, DefaultSnapshotCatchUpEntries is used. 106 SnapshotCatchUpEntries uint64 107 108 MemoryStorage MemoryStorage 109 Logger *flogging.FabricLogger 110 111 TickInterval time.Duration 112 ElectionTick int 113 HeartbeatTick int 114 MaxSizePerMsg uint64 115 MaxInflightBlocks int 116 117 // BlockMetadata and Consenters should only be modified while under lock 118 // of raftMetadataLock 119 BlockMetadata *etcdraft.BlockMetadata 120 Consenters map[uint64]*etcdraft.Consenter 121 122 // MigrationInit is set when the node starts right after consensus-type migration 123 MigrationInit bool 124 125 Metrics *Metrics 126 Cert []byte 127 128 EvictionSuspicion time.Duration 129 LeaderCheckInterval time.Duration 130 } 131 132 type submit struct { 133 req *orderer.SubmitRequest 134 leader chan uint64 135 } 136 137 type gc struct { 138 index uint64 139 state raftpb.ConfState 140 data []byte 141 } 142 143 // Chain implements consensus.Chain interface. 144 type Chain struct { 145 configurator Configurator 146 147 rpc RPC 148 149 raftID uint64 150 channelID string 151 152 lastKnownLeader uint64 153 ActiveNodes atomic.Value 154 155 submitC chan *submit 156 applyC chan apply 157 observeC chan<- raft.SoftState // Notifies external observer on leader change (passed in optionally as an argument for tests) 158 haltC chan struct{} // Signals to goroutines that the chain is halting 159 doneC chan struct{} // Closes when the chain halts 160 startC chan struct{} // Closes when the node is started 161 snapC chan *raftpb.Snapshot // Signal to catch up with snapshot 162 gcC chan *gc // Signal to take snapshot 163 164 errorCLock sync.RWMutex 165 errorC chan struct{} // returned by Errored() 166 167 raftMetadataLock sync.RWMutex 168 confChangeInProgress *raftpb.ConfChange 169 justElected bool // this is true when node has just been elected 170 configInflight bool // this is true when there is config block or ConfChange in flight 171 blockInflight int // number of in flight blocks 172 173 clock clock.Clock // Tests can inject a fake clock 174 175 support consensus.ConsenterSupport 176 177 lastBlock *common.Block 178 appliedIndex uint64 179 180 // needed by snapshotting 181 sizeLimit uint32 // SnapshotIntervalSize in bytes 182 accDataSize uint32 // accumulative data size since last snapshot 183 lastSnapBlockNum uint64 184 confState raftpb.ConfState // Etcdraft requires ConfState to be persisted within snapshot 185 186 createPuller CreateBlockPuller // func used to create BlockPuller on demand 187 188 fresh bool // indicate if this is a fresh raft node 189 190 // this is exported so that test can use `Node.Status()` to get raft node status. 191 Node *node 192 opts Options 193 194 Metrics *Metrics 195 logger *flogging.FabricLogger 196 197 periodicChecker *PeriodicCheck 198 199 haltCallback func() 200 201 statusReportMutex sync.Mutex 202 consensusRelation types.ConsensusRelation 203 status types.Status 204 205 // BCCSP instance 206 CryptoProvider bccsp.BCCSP 207 } 208 209 // NewChain constructs a chain object. 210 func NewChain( 211 support consensus.ConsenterSupport, 212 opts Options, 213 conf Configurator, 214 rpc RPC, 215 cryptoProvider bccsp.BCCSP, 216 f CreateBlockPuller, 217 haltCallback func(), 218 observeC chan<- raft.SoftState, 219 ) (*Chain, error) { 220 lg := opts.Logger.With("channel", support.ChannelID(), "node", opts.RaftID) 221 222 fresh := !wal.Exist(opts.WALDir) 223 storage, err := CreateStorage(lg, opts.WALDir, opts.SnapDir, opts.MemoryStorage) 224 if err != nil { 225 return nil, errors.Errorf("failed to restore persisted raft data: %s", err) 226 } 227 228 if opts.SnapshotCatchUpEntries == 0 { 229 storage.SnapshotCatchUpEntries = DefaultSnapshotCatchUpEntries 230 } else { 231 storage.SnapshotCatchUpEntries = opts.SnapshotCatchUpEntries 232 } 233 234 sizeLimit := opts.SnapshotIntervalSize 235 if sizeLimit == 0 { 236 sizeLimit = DefaultSnapshotIntervalSize 237 } 238 239 // get block number in last snapshot, if exists 240 var snapBlkNum uint64 241 var cc raftpb.ConfState 242 if s := storage.Snapshot(); !raft.IsEmptySnap(s) { 243 b := protoutil.UnmarshalBlockOrPanic(s.Data) 244 snapBlkNum = b.Header.Number 245 cc = s.Metadata.ConfState 246 } 247 248 b := support.Block(support.Height() - 1) 249 if b == nil { 250 return nil, errors.Errorf("failed to get last block") 251 } 252 253 c := &Chain{ 254 configurator: conf, 255 rpc: rpc, 256 channelID: support.ChannelID(), 257 raftID: opts.RaftID, 258 submitC: make(chan *submit), 259 applyC: make(chan apply), 260 haltC: make(chan struct{}), 261 doneC: make(chan struct{}), 262 startC: make(chan struct{}), 263 snapC: make(chan *raftpb.Snapshot), 264 errorC: make(chan struct{}), 265 gcC: make(chan *gc), 266 observeC: observeC, 267 support: support, 268 fresh: fresh, 269 appliedIndex: opts.BlockMetadata.RaftIndex, 270 lastBlock: b, 271 sizeLimit: sizeLimit, 272 lastSnapBlockNum: snapBlkNum, 273 confState: cc, 274 createPuller: f, 275 clock: opts.Clock, 276 haltCallback: haltCallback, 277 consensusRelation: types.ConsensusRelationConsenter, 278 status: types.StatusActive, 279 Metrics: &Metrics{ 280 ClusterSize: opts.Metrics.ClusterSize.With("channel", support.ChannelID()), 281 IsLeader: opts.Metrics.IsLeader.With("channel", support.ChannelID()), 282 ActiveNodes: opts.Metrics.ActiveNodes.With("channel", support.ChannelID()), 283 CommittedBlockNumber: opts.Metrics.CommittedBlockNumber.With("channel", support.ChannelID()), 284 SnapshotBlockNumber: opts.Metrics.SnapshotBlockNumber.With("channel", support.ChannelID()), 285 LeaderChanges: opts.Metrics.LeaderChanges.With("channel", support.ChannelID()), 286 ProposalFailures: opts.Metrics.ProposalFailures.With("channel", support.ChannelID()), 287 DataPersistDuration: opts.Metrics.DataPersistDuration.With("channel", support.ChannelID()), 288 NormalProposalsReceived: opts.Metrics.NormalProposalsReceived.With("channel", support.ChannelID()), 289 ConfigProposalsReceived: opts.Metrics.ConfigProposalsReceived.With("channel", support.ChannelID()), 290 }, 291 logger: lg, 292 opts: opts, 293 CryptoProvider: cryptoProvider, 294 } 295 296 // Sets initial values for metrics 297 c.Metrics.ClusterSize.Set(float64(len(c.opts.BlockMetadata.ConsenterIds))) 298 c.Metrics.IsLeader.Set(float64(0)) // all nodes start out as followers 299 c.Metrics.ActiveNodes.Set(float64(0)) 300 c.Metrics.CommittedBlockNumber.Set(float64(c.lastBlock.Header.Number)) 301 c.Metrics.SnapshotBlockNumber.Set(float64(c.lastSnapBlockNum)) 302 303 // DO NOT use Applied option in config, see https://github.com/etcd-io/etcd/issues/10217 304 // We guard against replay of written blocks with `appliedIndex` instead. 305 config := &raft.Config{ 306 ID: c.raftID, 307 ElectionTick: c.opts.ElectionTick, 308 HeartbeatTick: c.opts.HeartbeatTick, 309 MaxSizePerMsg: c.opts.MaxSizePerMsg, 310 MaxInflightMsgs: c.opts.MaxInflightBlocks, 311 Logger: c.logger, 312 Storage: c.opts.MemoryStorage, 313 // PreVote prevents reconnected node from disturbing network. 314 // See etcd/raft doc for more details. 315 PreVote: true, 316 CheckQuorum: true, 317 DisableProposalForwarding: true, // This prevents blocks from being accidentally proposed by followers 318 } 319 320 disseminator := &Disseminator{RPC: c.rpc} 321 disseminator.UpdateMetadata(nil) // initialize 322 c.ActiveNodes.Store([]uint64{}) 323 324 c.Node = &node{ 325 chainID: c.channelID, 326 chain: c, 327 logger: c.logger, 328 metrics: c.Metrics, 329 storage: storage, 330 rpc: disseminator, 331 config: config, 332 tickInterval: c.opts.TickInterval, 333 clock: c.clock, 334 metadata: c.opts.BlockMetadata, 335 tracker: &Tracker{ 336 id: c.raftID, 337 sender: disseminator, 338 gauge: c.Metrics.ActiveNodes, 339 active: &c.ActiveNodes, 340 logger: c.logger, 341 }, 342 } 343 344 return c, nil 345 } 346 347 // Start instructs the orderer to begin serving the chain and keep it current. 348 func (c *Chain) Start() { 349 c.logger.Infof("Starting Raft node") 350 351 if err := c.configureComm(); err != nil { 352 c.logger.Errorf("Failed to start chain, aborting: +%v", err) 353 close(c.doneC) 354 return 355 } 356 357 isJoin := c.support.Height() > 1 358 if isJoin && c.opts.MigrationInit { 359 isJoin = false 360 c.logger.Infof("Consensus-type migration detected, starting new raft node on an existing channel; height=%d", c.support.Height()) 361 } 362 c.Node.start(c.fresh, isJoin) 363 364 close(c.startC) 365 close(c.errorC) 366 367 go c.gc() 368 go c.run() 369 370 es := c.newEvictionSuspector() 371 372 interval := DefaultLeaderlessCheckInterval 373 if c.opts.LeaderCheckInterval != 0 { 374 interval = c.opts.LeaderCheckInterval 375 } 376 377 c.periodicChecker = &PeriodicCheck{ 378 Logger: c.logger, 379 Report: es.confirmSuspicion, 380 ReportCleared: es.clearSuspicion, 381 CheckInterval: interval, 382 Condition: c.suspectEviction, 383 } 384 c.periodicChecker.Run() 385 } 386 387 // Order submits normal type transactions for ordering. 388 func (c *Chain) Order(env *common.Envelope, configSeq uint64) error { 389 c.Metrics.NormalProposalsReceived.Add(1) 390 return c.Submit(&orderer.SubmitRequest{LastValidationSeq: configSeq, Payload: env, Channel: c.channelID}, 0) 391 } 392 393 // Configure submits config type transactions for ordering. 394 func (c *Chain) Configure(env *common.Envelope, configSeq uint64) error { 395 c.Metrics.ConfigProposalsReceived.Add(1) 396 return c.Submit(&orderer.SubmitRequest{LastValidationSeq: configSeq, Payload: env, Channel: c.channelID}, 0) 397 } 398 399 // WaitReady blocks when the chain: 400 // - is catching up with other nodes using snapshot 401 // 402 // In any other case, it returns right away. 403 func (c *Chain) WaitReady() error { 404 if err := c.isRunning(); err != nil { 405 return err 406 } 407 408 select { 409 case c.submitC <- nil: 410 case <-c.doneC: 411 return errors.Errorf("chain is stopped") 412 } 413 414 return nil 415 } 416 417 // Errored returns a channel that closes when the chain stops. 418 func (c *Chain) Errored() <-chan struct{} { 419 c.errorCLock.RLock() 420 defer c.errorCLock.RUnlock() 421 return c.errorC 422 } 423 424 // Halt stops the chain. 425 func (c *Chain) Halt() { 426 c.stop() 427 } 428 429 func (c *Chain) stop() bool { 430 select { 431 case <-c.startC: 432 default: 433 c.logger.Warn("Attempted to halt a chain that has not started") 434 return false 435 } 436 437 select { 438 case c.haltC <- struct{}{}: 439 case <-c.doneC: 440 return false 441 } 442 <-c.doneC 443 444 c.statusReportMutex.Lock() 445 defer c.statusReportMutex.Unlock() 446 c.status = types.StatusInactive 447 448 return true 449 } 450 451 // halt stops the chain and calls the haltCallback function, which allows the 452 // chain to transfer responsibility to a follower or the inactive chain registry when a chain 453 // discovers it is no longer a member of a channel. 454 func (c *Chain) halt() { 455 if stopped := c.stop(); !stopped { 456 c.logger.Info("This node was stopped, the haltCallback will not be called") 457 return 458 } 459 if c.haltCallback != nil { 460 c.haltCallback() // Must be invoked WITHOUT any internal lock 461 462 c.statusReportMutex.Lock() 463 defer c.statusReportMutex.Unlock() 464 465 // If the haltCallback registers the chain in to the inactive chain registry (i.e., system channel exists) then 466 // this is the correct consensusRelation. If the haltCallback transfers responsibility to a follower.Chain, then 467 // this chain is about to be GC anyway. The new follower.Chain replacing this one will report the correct 468 // StatusReport. 469 c.consensusRelation = types.ConsensusRelationConfigTracker 470 } 471 } 472 473 func (c *Chain) isRunning() error { 474 select { 475 case <-c.startC: 476 default: 477 return errors.Errorf("chain is not started") 478 } 479 480 select { 481 case <-c.doneC: 482 return errors.Errorf("chain is stopped") 483 default: 484 } 485 486 return nil 487 } 488 489 // Consensus passes the given ConsensusRequest message to the raft.Node instance 490 func (c *Chain) Consensus(req *orderer.ConsensusRequest, sender uint64) error { 491 if err := c.isRunning(); err != nil { 492 return err 493 } 494 495 stepMsg := &raftpb.Message{} 496 if err := proto.Unmarshal(req.Payload, stepMsg); err != nil { 497 return fmt.Errorf("failed to unmarshal StepRequest payload to Raft Message: %s", err) 498 } 499 500 if stepMsg.To != c.raftID { 501 c.logger.Warnf("Received msg to %d, my ID is probably wrong due to out of date, cowardly halting", stepMsg.To) 502 c.halt() 503 return nil 504 } 505 506 if err := c.Node.Step(context.TODO(), *stepMsg); err != nil { 507 return fmt.Errorf("failed to process Raft Step message: %s", err) 508 } 509 510 if len(req.Metadata) == 0 || atomic.LoadUint64(&c.lastKnownLeader) != sender { // ignore metadata from non-leader 511 return nil 512 } 513 514 clusterMetadata := &etcdraft.ClusterMetadata{} 515 if err := proto.Unmarshal(req.Metadata, clusterMetadata); err != nil { 516 return errors.Errorf("failed to unmarshal ClusterMetadata: %s", err) 517 } 518 519 c.Metrics.ActiveNodes.Set(float64(len(clusterMetadata.ActiveNodes))) 520 c.ActiveNodes.Store(clusterMetadata.ActiveNodes) 521 522 return nil 523 } 524 525 // Submit forwards the incoming request to: 526 // - the local run goroutine if this is leader 527 // - the actual leader via the transport mechanism 528 // The call fails if there's no leader elected yet. 529 func (c *Chain) Submit(req *orderer.SubmitRequest, sender uint64) error { 530 if err := c.isRunning(); err != nil { 531 c.Metrics.ProposalFailures.Add(1) 532 return err 533 } 534 535 leadC := make(chan uint64, 1) 536 select { 537 case c.submitC <- &submit{req, leadC}: 538 lead := <-leadC 539 if lead == raft.None { 540 c.Metrics.ProposalFailures.Add(1) 541 return errors.Errorf("no Raft leader") 542 } 543 544 if lead != c.raftID { 545 if err := c.forwardToLeader(lead, req); err != nil { 546 return err 547 } 548 } 549 550 case <-c.doneC: 551 c.Metrics.ProposalFailures.Add(1) 552 return errors.Errorf("chain is stopped") 553 } 554 555 return nil 556 } 557 558 func (c *Chain) forwardToLeader(lead uint64, req *orderer.SubmitRequest) error { 559 c.logger.Infof("Forwarding transaction to the leader %d", lead) 560 timer := time.NewTimer(c.opts.RPCTimeout) 561 defer timer.Stop() 562 563 sentChan := make(chan struct{}) 564 atomicErr := &atomic.Value{} 565 566 report := func(err error) { 567 if err != nil { 568 atomicErr.Store(err.Error()) 569 c.Metrics.ProposalFailures.Add(1) 570 } 571 close(sentChan) 572 } 573 574 c.rpc.SendSubmit(lead, req, report) 575 576 select { 577 case <-sentChan: 578 case <-c.doneC: 579 return errors.Errorf("chain is stopped") 580 case <-timer.C: 581 return errors.Errorf("timed out (%v) waiting on forwarding to %d", c.opts.RPCTimeout, lead) 582 } 583 584 if atomicErr.Load() != nil { 585 return errors.Errorf(atomicErr.Load().(string)) 586 } 587 return nil 588 } 589 590 type apply struct { 591 entries []raftpb.Entry 592 soft *raft.SoftState 593 } 594 595 func isCandidate(state raft.StateType) bool { 596 return state == raft.StatePreCandidate || state == raft.StateCandidate 597 } 598 599 func (c *Chain) run() { 600 ticking := false 601 timer := c.clock.NewTimer(time.Second) 602 // we need a stopped timer rather than nil, 603 // because we will be select waiting on timer.C() 604 if !timer.Stop() { 605 <-timer.C() 606 } 607 608 // if timer is already started, this is a no-op 609 startTimer := func() { 610 if !ticking { 611 ticking = true 612 timer.Reset(c.support.SharedConfig().BatchTimeout()) 613 } 614 } 615 616 stopTimer := func() { 617 if !timer.Stop() && ticking { 618 // we only need to drain the channel if the timer expired (not explicitly stopped) 619 <-timer.C() 620 } 621 ticking = false 622 } 623 624 var soft raft.SoftState 625 submitC := c.submitC 626 var bc *blockCreator 627 628 var propC chan<- *common.Block 629 var cancelProp context.CancelFunc 630 cancelProp = func() {} // no-op as initial value 631 632 becomeLeader := func() (chan<- *common.Block, context.CancelFunc) { 633 c.Metrics.IsLeader.Set(1) 634 635 c.blockInflight = 0 636 c.justElected = true 637 submitC = nil 638 ch := make(chan *common.Block, c.opts.MaxInflightBlocks) 639 640 // if there is unfinished ConfChange, we should resume the effort to propose it as 641 // new leader, and wait for it to be committed before start serving new requests. 642 if cc := c.getInFlightConfChange(); cc != nil { 643 // The reason `ProposeConfChange` should be called in go routine is documented in `writeConfigBlock` method. 644 go func() { 645 if err := c.Node.ProposeConfChange(context.TODO(), *cc); err != nil { 646 c.logger.Warnf("Failed to propose configuration update to Raft node: %s", err) 647 } 648 }() 649 650 c.confChangeInProgress = cc 651 c.configInflight = true 652 } 653 654 // Leader should call Propose in go routine, because this method may be blocked 655 // if node is leaderless (this can happen when leader steps down in a heavily 656 // loaded network). We need to make sure applyC can still be consumed properly. 657 ctx, cancel := context.WithCancel(context.Background()) 658 go func(ctx context.Context, ch <-chan *common.Block) { 659 for { 660 select { 661 case b := <-ch: 662 data := protoutil.MarshalOrPanic(b) 663 if err := c.Node.Propose(ctx, data); err != nil { 664 c.logger.Errorf("Failed to propose block [%d] to raft and discard %d blocks in queue: %s", b.Header.Number, len(ch), err) 665 return 666 } 667 c.logger.Debugf("Proposed block [%d] to raft consensus", b.Header.Number) 668 669 case <-ctx.Done(): 670 c.logger.Debugf("Quit proposing blocks, discarded %d blocks in the queue", len(ch)) 671 return 672 } 673 } 674 }(ctx, ch) 675 676 return ch, cancel 677 } 678 679 becomeFollower := func() { 680 cancelProp() 681 c.blockInflight = 0 682 _ = c.support.BlockCutter().Cut() 683 stopTimer() 684 submitC = c.submitC 685 bc = nil 686 c.Metrics.IsLeader.Set(0) 687 } 688 689 for { 690 select { 691 case s := <-submitC: 692 if s == nil { 693 // polled by `WaitReady` 694 continue 695 } 696 697 if soft.RaftState == raft.StatePreCandidate || soft.RaftState == raft.StateCandidate { 698 s.leader <- raft.None 699 continue 700 } 701 702 s.leader <- soft.Lead 703 if soft.Lead != c.raftID { 704 continue 705 } 706 707 batches, pending, err := c.ordered(s.req) 708 if err != nil { 709 c.logger.Errorf("Failed to order message: %s", err) 710 continue 711 } 712 if pending { 713 startTimer() // no-op if timer is already started 714 } else { 715 stopTimer() 716 } 717 718 c.propose(propC, bc, batches...) 719 720 if c.configInflight { 721 c.logger.Info("Received config transaction, pause accepting transaction till it is committed") 722 submitC = nil 723 } else if c.blockInflight >= c.opts.MaxInflightBlocks { 724 c.logger.Debugf("Number of in-flight blocks (%d) reaches limit (%d), pause accepting transaction", 725 c.blockInflight, c.opts.MaxInflightBlocks) 726 submitC = nil 727 } 728 729 case app := <-c.applyC: 730 if app.soft != nil { 731 newLeader := atomic.LoadUint64(&app.soft.Lead) // etcdraft requires atomic access 732 if newLeader != soft.Lead { 733 c.logger.Infof("Raft leader changed: %d -> %d", soft.Lead, newLeader) 734 c.Metrics.LeaderChanges.Add(1) 735 736 atomic.StoreUint64(&c.lastKnownLeader, newLeader) 737 738 if newLeader == c.raftID { 739 propC, cancelProp = becomeLeader() 740 } 741 742 if soft.Lead == c.raftID { 743 becomeFollower() 744 } 745 } 746 747 foundLeader := soft.Lead == raft.None && newLeader != raft.None 748 quitCandidate := isCandidate(soft.RaftState) && !isCandidate(app.soft.RaftState) 749 750 if foundLeader || quitCandidate { 751 c.errorCLock.Lock() 752 c.errorC = make(chan struct{}) 753 c.errorCLock.Unlock() 754 } 755 756 if isCandidate(app.soft.RaftState) || newLeader == raft.None { 757 atomic.StoreUint64(&c.lastKnownLeader, raft.None) 758 select { 759 case <-c.errorC: 760 default: 761 nodeCount := len(c.opts.BlockMetadata.ConsenterIds) 762 // Only close the error channel (to signal the broadcast/deliver front-end a consensus backend error) 763 // If we are a cluster of size 3 or more, otherwise we can't expand a cluster of size 1 to 2 nodes. 764 if nodeCount > 2 { 765 close(c.errorC) 766 } else { 767 c.logger.Warningf("No leader is present, cluster size is %d", nodeCount) 768 } 769 } 770 } 771 772 soft = raft.SoftState{Lead: newLeader, RaftState: app.soft.RaftState} 773 774 // notify external observer 775 select { 776 case c.observeC <- soft: 777 default: 778 } 779 } 780 781 c.apply(app.entries) 782 783 if c.justElected { 784 msgInflight := c.Node.lastIndex() > c.appliedIndex 785 if msgInflight { 786 c.logger.Debugf("There are in flight blocks, new leader should not serve requests") 787 continue 788 } 789 790 if c.configInflight { 791 c.logger.Debugf("There is config block in flight, new leader should not serve requests") 792 continue 793 } 794 795 c.logger.Infof("Start accepting requests as Raft leader at block [%d]", c.lastBlock.Header.Number) 796 bc = &blockCreator{ 797 hash: protoutil.BlockHeaderHash(c.lastBlock.Header), 798 number: c.lastBlock.Header.Number, 799 logger: c.logger, 800 } 801 submitC = c.submitC 802 c.justElected = false 803 } else if c.configInflight { 804 c.logger.Info("Config block or ConfChange in flight, pause accepting transaction") 805 submitC = nil 806 } else if c.blockInflight < c.opts.MaxInflightBlocks { 807 submitC = c.submitC 808 } 809 810 case <-timer.C(): 811 ticking = false 812 813 batch := c.support.BlockCutter().Cut() 814 if len(batch) == 0 { 815 c.logger.Warningf("Batch timer expired with no pending requests, this might indicate a bug") 816 continue 817 } 818 819 c.logger.Debugf("Batch timer expired, creating block") 820 c.propose(propC, bc, batch) // we are certain this is normal block, no need to block 821 822 case sn := <-c.snapC: 823 if sn.Metadata.Index != 0 { 824 if sn.Metadata.Index <= c.appliedIndex { 825 c.logger.Debugf("Skip snapshot taken at index %d, because it is behind current applied index %d", sn.Metadata.Index, c.appliedIndex) 826 break 827 } 828 829 c.confState = sn.Metadata.ConfState 830 c.appliedIndex = sn.Metadata.Index 831 } else { 832 c.logger.Infof("Received artificial snapshot to trigger catchup") 833 } 834 835 if err := c.catchUp(sn); err != nil { 836 c.logger.Panicf("Failed to recover from snapshot taken at Term %d and Index %d: %s", 837 sn.Metadata.Term, sn.Metadata.Index, err) 838 } 839 840 case <-c.doneC: 841 stopTimer() 842 cancelProp() 843 844 select { 845 case <-c.errorC: // avoid closing closed channel 846 default: 847 close(c.errorC) 848 } 849 850 c.logger.Infof("Stop serving requests") 851 c.periodicChecker.Stop() 852 return 853 } 854 } 855 } 856 857 func (c *Chain) writeBlock(block *common.Block, index uint64) { 858 if block.Header.Number > c.lastBlock.Header.Number+1 { 859 c.logger.Panicf("Got block [%d], expect block [%d]", block.Header.Number, c.lastBlock.Header.Number+1) 860 } else if block.Header.Number < c.lastBlock.Header.Number+1 { 861 c.logger.Infof("Got block [%d], expect block [%d], this node was forced to catch up", block.Header.Number, c.lastBlock.Header.Number+1) 862 return 863 } 864 865 if c.blockInflight > 0 { 866 c.blockInflight-- // only reduce on leader 867 } 868 c.lastBlock = block 869 870 c.logger.Infof("Writing block [%d] (Raft index: %d) to ledger", block.Header.Number, index) 871 872 if protoutil.IsConfigBlock(block) { 873 c.writeConfigBlock(block, index) 874 return 875 } 876 877 c.raftMetadataLock.Lock() 878 c.opts.BlockMetadata.RaftIndex = index 879 m := protoutil.MarshalOrPanic(c.opts.BlockMetadata) 880 c.raftMetadataLock.Unlock() 881 882 c.support.WriteBlock(block, m) 883 } 884 885 // Orders the envelope in the `msg` content. SubmitRequest. 886 // Returns 887 // -- batches [][]*common.Envelope; the batches cut, 888 // -- pending bool; if there are envelopes pending to be ordered, 889 // -- err error; the error encountered, if any. 890 // It takes care of config messages as well as the revalidation of messages if the config sequence has advanced. 891 func (c *Chain) ordered(msg *orderer.SubmitRequest) (batches [][]*common.Envelope, pending bool, err error) { 892 seq := c.support.Sequence() 893 894 isconfig, err := c.isConfig(msg.Payload) 895 if err != nil { 896 return nil, false, errors.Errorf("bad message: %s", err) 897 } 898 899 if isconfig { 900 // ConfigMsg 901 if msg.LastValidationSeq < seq { 902 c.logger.Warnf("Config message was validated against %d, although current config seq has advanced (%d)", msg.LastValidationSeq, seq) 903 msg.Payload, _, err = c.support.ProcessConfigMsg(msg.Payload) 904 if err != nil { 905 c.Metrics.ProposalFailures.Add(1) 906 return nil, true, errors.Errorf("bad config message: %s", err) 907 } 908 } 909 910 batch := c.support.BlockCutter().Cut() 911 batches = [][]*common.Envelope{} 912 if len(batch) != 0 { 913 batches = append(batches, batch) 914 } 915 batches = append(batches, []*common.Envelope{msg.Payload}) 916 return batches, false, nil 917 } 918 // it is a normal message 919 if msg.LastValidationSeq < seq { 920 c.logger.Warnf("Normal message was validated against %d, although current config seq has advanced (%d)", msg.LastValidationSeq, seq) 921 if _, err := c.support.ProcessNormalMsg(msg.Payload); err != nil { 922 c.Metrics.ProposalFailures.Add(1) 923 return nil, true, errors.Errorf("bad normal message: %s", err) 924 } 925 } 926 batches, pending = c.support.BlockCutter().Ordered(msg.Payload) 927 return batches, pending, nil 928 } 929 930 func (c *Chain) propose(ch chan<- *common.Block, bc *blockCreator, batches ...[]*common.Envelope) { 931 for _, batch := range batches { 932 b := bc.createNextBlock(batch) 933 c.logger.Infof("Created block [%d], there are %d blocks in flight", b.Header.Number, c.blockInflight) 934 935 select { 936 case ch <- b: 937 default: 938 c.logger.Panic("Programming error: limit of in-flight blocks does not properly take effect or block is proposed by follower") 939 } 940 941 // if it is config block, then we should wait for the commit of the block 942 if protoutil.IsConfigBlock(b) { 943 c.configInflight = true 944 } 945 946 c.blockInflight++ 947 } 948 } 949 950 func (c *Chain) catchUp(snap *raftpb.Snapshot) error { 951 b, err := protoutil.UnmarshalBlock(snap.Data) 952 if err != nil { 953 return errors.Errorf("failed to unmarshal snapshot data to block: %s", err) 954 } 955 956 if c.lastBlock.Header.Number >= b.Header.Number { 957 c.logger.Warnf("Snapshot is at block [%d], local block number is %d, no sync needed", b.Header.Number, c.lastBlock.Header.Number) 958 return nil 959 } else if b.Header.Number == c.lastBlock.Header.Number+1 { 960 c.logger.Infof("The only missing block [%d] is encapsulated in snapshot, committing it to shortcut catchup process", b.Header.Number) 961 c.commitBlock(b) 962 c.lastBlock = b 963 return nil 964 } 965 966 puller, err := c.createPuller() 967 if err != nil { 968 return errors.Errorf("failed to create block puller: %s", err) 969 } 970 defer puller.Close() 971 972 next := c.lastBlock.Header.Number + 1 973 974 c.logger.Infof("Catching up with snapshot taken at block [%d], starting from block [%d]", b.Header.Number, next) 975 976 for next <= b.Header.Number { 977 block := puller.PullBlock(next) 978 if block == nil { 979 return errors.Errorf("failed to fetch block [%d] from cluster", next) 980 } 981 c.commitBlock(block) 982 c.lastBlock = block 983 next++ 984 } 985 986 c.logger.Infof("Finished syncing with cluster up to and including block [%d]", b.Header.Number) 987 return nil 988 } 989 990 func (c *Chain) commitBlock(block *common.Block) { 991 // read consenters metadata to write into the replicated block 992 blockMeta, err := protoutil.GetConsenterMetadataFromBlock(block) 993 if err != nil { 994 c.logger.Panicf("Failed to obtain metadata: %s", err) 995 } 996 997 if !protoutil.IsConfigBlock(block) { 998 c.support.WriteBlock(block, blockMeta.Value) 999 return 1000 } 1001 1002 c.support.WriteConfigBlock(block, blockMeta.Value) 1003 1004 configMembership := c.detectConfChange(block) 1005 1006 if configMembership != nil && configMembership.Changed() { 1007 c.logger.Infof("Config block [%d] changes consenter set, communication should be reconfigured", block.Header.Number) 1008 1009 c.raftMetadataLock.Lock() 1010 c.opts.BlockMetadata = configMembership.NewBlockMetadata 1011 c.opts.Consenters = configMembership.NewConsenters 1012 c.raftMetadataLock.Unlock() 1013 1014 if err := c.configureComm(); err != nil { 1015 c.logger.Panicf("Failed to configure communication: %s", err) 1016 } 1017 } 1018 } 1019 1020 func (c *Chain) detectConfChange(block *common.Block) *MembershipChanges { 1021 // If config is targeting THIS channel, inspect consenter set and 1022 // propose raft ConfChange if it adds/removes node. 1023 configMetadata := c.newConfigMetadata(block) 1024 1025 if configMetadata == nil { 1026 return nil 1027 } 1028 1029 if configMetadata.Options != nil && 1030 configMetadata.Options.SnapshotIntervalSize != 0 && 1031 configMetadata.Options.SnapshotIntervalSize != c.sizeLimit { 1032 c.logger.Infof("Update snapshot interval size to %d bytes (was %d)", 1033 configMetadata.Options.SnapshotIntervalSize, c.sizeLimit) 1034 c.sizeLimit = configMetadata.Options.SnapshotIntervalSize 1035 } 1036 1037 changes, err := ComputeMembershipChanges(c.opts.BlockMetadata, c.opts.Consenters, configMetadata.Consenters) 1038 if err != nil { 1039 c.logger.Panicf("illegal configuration change detected: %s", err) 1040 } 1041 1042 if changes.Rotated() { 1043 c.logger.Infof("Config block [%d] rotates TLS certificate of node %d", block.Header.Number, changes.RotatedNode) 1044 } 1045 1046 return changes 1047 } 1048 1049 func (c *Chain) apply(ents []raftpb.Entry) { 1050 if len(ents) == 0 { 1051 return 1052 } 1053 1054 if ents[0].Index > c.appliedIndex+1 { 1055 c.logger.Panicf("first index of committed entry[%d] should <= appliedIndex[%d]+1", ents[0].Index, c.appliedIndex) 1056 } 1057 1058 var position int 1059 for i := range ents { 1060 switch ents[i].Type { 1061 case raftpb.EntryNormal: 1062 if len(ents[i].Data) == 0 { 1063 break 1064 } 1065 1066 position = i 1067 c.accDataSize += uint32(len(ents[i].Data)) 1068 1069 // We need to strictly avoid re-applying normal entries, 1070 // otherwise we are writing the same block twice. 1071 if ents[i].Index <= c.appliedIndex { 1072 c.logger.Debugf("Received block with raft index (%d) <= applied index (%d), skip", ents[i].Index, c.appliedIndex) 1073 break 1074 } 1075 1076 block := protoutil.UnmarshalBlockOrPanic(ents[i].Data) 1077 c.writeBlock(block, ents[i].Index) 1078 c.Metrics.CommittedBlockNumber.Set(float64(block.Header.Number)) 1079 1080 case raftpb.EntryConfChange: 1081 var cc raftpb.ConfChange 1082 if err := cc.Unmarshal(ents[i].Data); err != nil { 1083 c.logger.Warnf("Failed to unmarshal ConfChange data: %s", err) 1084 continue 1085 } 1086 1087 c.confState = *c.Node.ApplyConfChange(cc) 1088 1089 switch cc.Type { 1090 case raftpb.ConfChangeAddNode: 1091 c.logger.Infof("Applied config change to add node %d, current nodes in channel: %+v", cc.NodeID, c.confState.Nodes) 1092 case raftpb.ConfChangeRemoveNode: 1093 c.logger.Infof("Applied config change to remove node %d, current nodes in channel: %+v", cc.NodeID, c.confState.Nodes) 1094 default: 1095 c.logger.Panic("Programming error, encountered unsupported raft config change") 1096 } 1097 1098 // This ConfChange was introduced by a previously committed config block, 1099 // we can now unblock submitC to accept envelopes. 1100 var configureComm bool 1101 if c.confChangeInProgress != nil && 1102 c.confChangeInProgress.NodeID == cc.NodeID && 1103 c.confChangeInProgress.Type == cc.Type { 1104 1105 configureComm = true 1106 c.confChangeInProgress = nil 1107 c.configInflight = false 1108 // report the new cluster size 1109 c.Metrics.ClusterSize.Set(float64(len(c.opts.BlockMetadata.ConsenterIds))) 1110 } 1111 1112 lead := atomic.LoadUint64(&c.lastKnownLeader) 1113 removeLeader := cc.Type == raftpb.ConfChangeRemoveNode && cc.NodeID == lead 1114 shouldHalt := cc.Type == raftpb.ConfChangeRemoveNode && cc.NodeID == c.raftID 1115 1116 // unblock `run` go routine so it can still consume Raft messages 1117 go func() { 1118 if removeLeader { 1119 c.logger.Infof("Current leader is being removed from channel, attempt leadership transfer") 1120 c.Node.abdicateLeader(lead) 1121 } 1122 1123 if configureComm && !shouldHalt { // no need to configure comm if this node is going to halt 1124 if err := c.configureComm(); err != nil { 1125 c.logger.Panicf("Failed to configure communication: %s", err) 1126 } 1127 } 1128 1129 if shouldHalt { 1130 c.logger.Infof("This node is being removed from replica set") 1131 c.halt() 1132 return 1133 } 1134 }() 1135 } 1136 1137 if ents[i].Index > c.appliedIndex { 1138 c.appliedIndex = ents[i].Index 1139 } 1140 } 1141 1142 if c.accDataSize >= c.sizeLimit { 1143 b := protoutil.UnmarshalBlockOrPanic(ents[position].Data) 1144 1145 select { 1146 case c.gcC <- &gc{index: c.appliedIndex, state: c.confState, data: ents[position].Data}: 1147 c.logger.Infof("Accumulated %d bytes since last snapshot, exceeding size limit (%d bytes), "+ 1148 "taking snapshot at block [%d] (index: %d), last snapshotted block number is %d, current nodes: %+v", 1149 c.accDataSize, c.sizeLimit, b.Header.Number, c.appliedIndex, c.lastSnapBlockNum, c.confState.Nodes) 1150 c.accDataSize = 0 1151 c.lastSnapBlockNum = b.Header.Number 1152 c.Metrics.SnapshotBlockNumber.Set(float64(b.Header.Number)) 1153 default: 1154 c.logger.Warnf("Snapshotting is in progress, it is very likely that SnapshotIntervalSize is too small") 1155 } 1156 } 1157 } 1158 1159 func (c *Chain) gc() { 1160 for { 1161 select { 1162 case g := <-c.gcC: 1163 c.Node.takeSnapshot(g.index, g.state, g.data) 1164 case <-c.doneC: 1165 c.logger.Infof("Stop garbage collecting") 1166 return 1167 } 1168 } 1169 } 1170 1171 func (c *Chain) isConfig(env *common.Envelope) (bool, error) { 1172 h, err := protoutil.ChannelHeader(env) 1173 if err != nil { 1174 c.logger.Errorf("failed to extract channel header from envelope") 1175 return false, err 1176 } 1177 1178 return h.Type == int32(common.HeaderType_CONFIG) || h.Type == int32(common.HeaderType_ORDERER_TRANSACTION), nil 1179 } 1180 1181 func (c *Chain) configureComm() error { 1182 // Reset unreachable map when communication is reconfigured 1183 c.Node.unreachableLock.Lock() 1184 c.Node.unreachable = make(map[uint64]struct{}) 1185 c.Node.unreachableLock.Unlock() 1186 1187 nodes, err := c.remotePeers() 1188 if err != nil { 1189 return err 1190 } 1191 1192 c.configurator.Configure(c.channelID, nodes) 1193 return nil 1194 } 1195 1196 func (c *Chain) remotePeers() ([]cluster.RemoteNode, error) { 1197 c.raftMetadataLock.RLock() 1198 defer c.raftMetadataLock.RUnlock() 1199 1200 var nodes []cluster.RemoteNode 1201 for raftID, consenter := range c.opts.Consenters { 1202 // No need to know yourself 1203 if raftID == c.raftID { 1204 continue 1205 } 1206 serverCertAsDER, err := pemToDER(consenter.ServerTlsCert, raftID, "server", c.logger) 1207 if err != nil { 1208 return nil, errors.WithStack(err) 1209 } 1210 clientCertAsDER, err := pemToDER(consenter.ClientTlsCert, raftID, "client", c.logger) 1211 if err != nil { 1212 return nil, errors.WithStack(err) 1213 } 1214 nodes = append(nodes, cluster.RemoteNode{ 1215 ID: raftID, 1216 Endpoint: fmt.Sprintf("%s:%d", consenter.Host, consenter.Port), 1217 ServerTLSCert: serverCertAsDER, 1218 ClientTLSCert: clientCertAsDER, 1219 }) 1220 } 1221 return nodes, nil 1222 } 1223 1224 func pemToDER(pemBytes []byte, id uint64, certType string, logger *flogging.FabricLogger) ([]byte, error) { 1225 bl, _ := pem.Decode(pemBytes) 1226 if bl == nil { 1227 logger.Errorf("Rejecting PEM block of %s TLS cert for node %d, offending PEM is: %s", certType, id, string(pemBytes)) 1228 return nil, errors.Errorf("invalid PEM block") 1229 } 1230 return bl.Bytes, nil 1231 } 1232 1233 // writeConfigBlock writes configuration blocks into the ledger in 1234 // addition extracts updates about raft replica set and if there 1235 // are changes updates cluster membership as well 1236 func (c *Chain) writeConfigBlock(block *common.Block, index uint64) { 1237 hdr, err := ConfigChannelHeader(block) 1238 if err != nil { 1239 c.logger.Panicf("Failed to get config header type from config block: %s", err) 1240 } 1241 1242 c.configInflight = false 1243 1244 switch common.HeaderType(hdr.Type) { 1245 case common.HeaderType_CONFIG: 1246 configMembership := c.detectConfChange(block) 1247 1248 c.raftMetadataLock.Lock() 1249 c.opts.BlockMetadata.RaftIndex = index 1250 if configMembership != nil { 1251 c.opts.BlockMetadata = configMembership.NewBlockMetadata 1252 c.opts.Consenters = configMembership.NewConsenters 1253 } 1254 c.raftMetadataLock.Unlock() 1255 1256 blockMetadataBytes := protoutil.MarshalOrPanic(c.opts.BlockMetadata) 1257 1258 // write block with metadata 1259 c.support.WriteConfigBlock(block, blockMetadataBytes) 1260 1261 if configMembership == nil { 1262 return 1263 } 1264 1265 // update membership 1266 if configMembership.ConfChange != nil { 1267 // We need to propose conf change in a go routine, because it may be blocked if raft node 1268 // becomes leaderless, and we should not block `run` so it can keep consuming applyC, 1269 // otherwise we have a deadlock. 1270 go func() { 1271 // ProposeConfChange returns error only if node being stopped. 1272 // This proposal is dropped by followers because DisableProposalForwarding is enabled. 1273 if err := c.Node.ProposeConfChange(context.TODO(), *configMembership.ConfChange); err != nil { 1274 c.logger.Warnf("Failed to propose configuration update to Raft node: %s", err) 1275 } 1276 }() 1277 1278 c.confChangeInProgress = configMembership.ConfChange 1279 1280 switch configMembership.ConfChange.Type { 1281 case raftpb.ConfChangeAddNode: 1282 c.logger.Infof("Config block just committed adds node %d, pause accepting transactions till config change is applied", configMembership.ConfChange.NodeID) 1283 case raftpb.ConfChangeRemoveNode: 1284 c.logger.Infof("Config block just committed removes node %d, pause accepting transactions till config change is applied", configMembership.ConfChange.NodeID) 1285 default: 1286 c.logger.Panic("Programming error, encountered unsupported raft config change") 1287 } 1288 1289 c.configInflight = true 1290 } else if configMembership.Rotated() { 1291 lead := atomic.LoadUint64(&c.lastKnownLeader) 1292 if configMembership.RotatedNode == lead { 1293 c.logger.Infof("Certificate of Raft leader is being rotated, attempt leader transfer before reconfiguring communication") 1294 go func() { 1295 c.Node.abdicateLeader(lead) 1296 if err := c.configureComm(); err != nil { 1297 c.logger.Panicf("Failed to configure communication: %s", err) 1298 } 1299 }() 1300 } else { 1301 if err := c.configureComm(); err != nil { 1302 c.logger.Panicf("Failed to configure communication: %s", err) 1303 } 1304 } 1305 } 1306 1307 case common.HeaderType_ORDERER_TRANSACTION: 1308 // If this config is channel creation, no extra inspection is needed 1309 c.raftMetadataLock.Lock() 1310 c.opts.BlockMetadata.RaftIndex = index 1311 m := protoutil.MarshalOrPanic(c.opts.BlockMetadata) 1312 c.raftMetadataLock.Unlock() 1313 1314 c.support.WriteConfigBlock(block, m) 1315 1316 default: 1317 c.logger.Panicf("Programming error: unexpected config type: %s", common.HeaderType(hdr.Type)) 1318 } 1319 } 1320 1321 // getInFlightConfChange returns ConfChange in-flight if any. 1322 // It returns confChangeInProgress if it is not nil. Otherwise 1323 // it returns ConfChange from the last committed block (might be nil). 1324 func (c *Chain) getInFlightConfChange() *raftpb.ConfChange { 1325 if c.confChangeInProgress != nil { 1326 return c.confChangeInProgress 1327 } 1328 1329 if c.lastBlock.Header.Number == 0 { 1330 return nil // nothing to failover just started the chain 1331 } 1332 1333 if !protoutil.IsConfigBlock(c.lastBlock) { 1334 return nil 1335 } 1336 1337 // extracting current Raft configuration state 1338 confState := c.Node.ApplyConfChange(raftpb.ConfChange{}) 1339 1340 if len(confState.Nodes) == len(c.opts.BlockMetadata.ConsenterIds) { 1341 // Raft configuration change could only add one node or 1342 // remove one node at a time, if raft conf state size is 1343 // equal to membership stored in block metadata field, 1344 // that means everything is in sync and no need to propose 1345 // config update. 1346 return nil 1347 } 1348 1349 return ConfChange(c.opts.BlockMetadata, confState) 1350 } 1351 1352 // newMetadata extract config metadata from the configuration block 1353 func (c *Chain) newConfigMetadata(block *common.Block) *etcdraft.ConfigMetadata { 1354 metadata, err := ConsensusMetadataFromConfigBlock(block) 1355 if err != nil { 1356 c.logger.Panicf("error reading consensus metadata: %s", err) 1357 } 1358 return metadata 1359 } 1360 1361 // ValidateConsensusMetadata determines the validity of a 1362 // ConsensusMetadata update during config updates on the channel. 1363 func (c *Chain) ValidateConsensusMetadata(oldOrdererConfig, newOrdererConfig channelconfig.Orderer, newChannel bool) error { 1364 if newOrdererConfig == nil { 1365 c.logger.Panic("Programming Error: ValidateConsensusMetadata called with nil new channel config") 1366 return nil 1367 } 1368 1369 // metadata was not updated 1370 if newOrdererConfig.ConsensusMetadata() == nil { 1371 return nil 1372 } 1373 1374 if oldOrdererConfig == nil { 1375 c.logger.Panic("Programming Error: ValidateConsensusMetadata called with nil old channel config") 1376 return nil 1377 } 1378 1379 if oldOrdererConfig.ConsensusMetadata() == nil { 1380 c.logger.Panic("Programming Error: ValidateConsensusMetadata called with nil old metadata") 1381 return nil 1382 } 1383 1384 oldMetadata := &etcdraft.ConfigMetadata{} 1385 if err := proto.Unmarshal(oldOrdererConfig.ConsensusMetadata(), oldMetadata); err != nil { 1386 c.logger.Panicf("Programming Error: Failed to unmarshal old etcdraft consensus metadata: %v", err) 1387 } 1388 1389 newMetadata := &etcdraft.ConfigMetadata{} 1390 if err := proto.Unmarshal(newOrdererConfig.ConsensusMetadata(), newMetadata); err != nil { 1391 return errors.Wrap(err, "failed to unmarshal new etcdraft metadata configuration") 1392 } 1393 1394 verifyOpts, err := createX509VerifyOptions(newOrdererConfig) 1395 if err != nil { 1396 return errors.Wrapf(err, "failed to create x509 verify options from old and new orderer config") 1397 } 1398 1399 if err := VerifyConfigMetadata(newMetadata, verifyOpts); err != nil { 1400 return errors.Wrap(err, "invalid new config metadata") 1401 } 1402 1403 if newChannel { 1404 // check if the consenters are a subset of the existing consenters (system channel consenters) 1405 set := ConsentersToMap(oldMetadata.Consenters) 1406 for _, c := range newMetadata.Consenters { 1407 if !set.Exists(c) { 1408 return errors.New("new channel has consenter that is not part of system consenter set") 1409 } 1410 } 1411 return nil 1412 } 1413 1414 // create the dummy parameters for ComputeMembershipChanges 1415 c.raftMetadataLock.RLock() 1416 dummyOldBlockMetadata := proto.Clone(c.opts.BlockMetadata).(*etcdraft.BlockMetadata) 1417 c.raftMetadataLock.RUnlock() 1418 1419 dummyOldConsentersMap := CreateConsentersMap(dummyOldBlockMetadata, oldMetadata) 1420 changes, err := ComputeMembershipChanges(dummyOldBlockMetadata, dummyOldConsentersMap, newMetadata.Consenters) 1421 if err != nil { 1422 return err 1423 } 1424 1425 // new config metadata was verified above. Additionally need to check new consenters for certificates expiration 1426 for _, c := range changes.AddedNodes { 1427 if err := validateConsenterTLSCerts(c, verifyOpts, false); err != nil { 1428 return errors.Wrapf(err, "consenter %s:%d has invalid certificates", c.Host, c.Port) 1429 } 1430 } 1431 1432 active := c.ActiveNodes.Load().([]uint64) 1433 if changes.UnacceptableQuorumLoss(active) { 1434 return errors.Errorf("%d out of %d nodes are alive, configuration will result in quorum loss", len(active), len(dummyOldConsentersMap)) 1435 } 1436 1437 return nil 1438 } 1439 1440 // StatusReport returns the ConsensusRelation & Status 1441 func (c *Chain) StatusReport() (types.ConsensusRelation, types.Status) { 1442 c.statusReportMutex.Lock() 1443 defer c.statusReportMutex.Unlock() 1444 1445 return c.consensusRelation, c.status 1446 } 1447 1448 func (c *Chain) suspectEviction() bool { 1449 if c.isRunning() != nil { 1450 return false 1451 } 1452 1453 return atomic.LoadUint64(&c.lastKnownLeader) == uint64(0) 1454 } 1455 1456 func (c *Chain) newEvictionSuspector() *evictionSuspector { 1457 consenterCertificate := &ConsenterCertificate{ 1458 Logger: c.logger, 1459 ConsenterCertificate: c.opts.Cert, 1460 CryptoProvider: c.CryptoProvider, 1461 } 1462 1463 return &evictionSuspector{ 1464 amIInChannel: consenterCertificate.IsConsenterOfChannel, 1465 evictionSuspicionThreshold: c.opts.EvictionSuspicion, 1466 writeBlock: c.support.Append, 1467 createPuller: c.createPuller, 1468 height: c.support.Height, 1469 triggerCatchUp: c.triggerCatchup, 1470 logger: c.logger, 1471 halt: func() { 1472 c.halt() 1473 }, 1474 } 1475 } 1476 1477 func (c *Chain) triggerCatchup(sn *raftpb.Snapshot) { 1478 select { 1479 case c.snapC <- sn: 1480 case <-c.doneC: 1481 } 1482 }