github.com/koko1123/flow-go-1@v0.29.6/engine/consensus/compliance/engine.go (about)

     1  package compliance
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"time"
     8  
     9  	"github.com/rs/zerolog"
    10  
    11  	"github.com/koko1123/flow-go-1/consensus/hotstuff/model"
    12  	"github.com/koko1123/flow-go-1/engine"
    13  	"github.com/koko1123/flow-go-1/engine/common/fifoqueue"
    14  	"github.com/koko1123/flow-go-1/engine/consensus/sealing/counters"
    15  	"github.com/koko1123/flow-go-1/model/events"
    16  	"github.com/koko1123/flow-go-1/model/flow"
    17  	"github.com/koko1123/flow-go-1/model/flow/filter"
    18  	"github.com/koko1123/flow-go-1/model/messages"
    19  	"github.com/koko1123/flow-go-1/module"
    20  	"github.com/koko1123/flow-go-1/module/irrecoverable"
    21  	"github.com/koko1123/flow-go-1/module/lifecycle"
    22  	"github.com/koko1123/flow-go-1/module/metrics"
    23  	"github.com/koko1123/flow-go-1/network"
    24  	"github.com/koko1123/flow-go-1/network/channels"
    25  	"github.com/koko1123/flow-go-1/state/protocol"
    26  	"github.com/koko1123/flow-go-1/storage"
    27  	"github.com/koko1123/flow-go-1/utils/logging"
    28  )
    29  
    30  // defaultRangeResponseQueueCapacity maximum capacity of block range responses queue
    31  const defaultRangeResponseQueueCapacity = 100
    32  
    33  // defaultBlockQueueCapacity maximum capacity of block proposals queue
    34  const defaultBlockQueueCapacity = 10000
    35  
    36  // defaultVoteQueueCapacity maximum capacity of block votes queue
    37  const defaultVoteQueueCapacity = 1000
    38  
    39  // Engine is a wrapper struct for `Core` which implements consensus algorithm.
    40  // Engine is responsible for handling incoming messages, queueing for processing, broadcasting proposals.
    41  type Engine struct {
    42  	unit                       *engine.Unit
    43  	lm                         *lifecycle.LifecycleManager
    44  	log                        zerolog.Logger
    45  	mempool                    module.MempoolMetrics
    46  	metrics                    module.EngineMetrics
    47  	me                         module.Local
    48  	headers                    storage.Headers
    49  	payloads                   storage.Payloads
    50  	tracer                     module.Tracer
    51  	state                      protocol.State
    52  	prov                       network.Engine
    53  	core                       *Core
    54  	pendingBlocks              engine.MessageStore
    55  	pendingRangeResponses      engine.MessageStore
    56  	pendingVotes               engine.MessageStore
    57  	messageHandler             *engine.MessageHandler
    58  	finalizedView              counters.StrictMonotonousCounter
    59  	finalizationEventsNotifier engine.Notifier
    60  	con                        network.Conduit
    61  	stopHotstuff               context.CancelFunc
    62  }
    63  
    64  func NewEngine(
    65  	log zerolog.Logger,
    66  	net network.Network,
    67  	me module.Local,
    68  	prov network.Engine,
    69  	core *Core) (*Engine, error) {
    70  
    71  	rangeResponseQueue, err := fifoqueue.NewFifoQueue(
    72  		defaultRangeResponseQueueCapacity,
    73  		fifoqueue.WithLengthObserver(func(len int) { core.mempool.MempoolEntries(metrics.ResourceBlockResponseQueue, uint(len)) }),
    74  	)
    75  
    76  	if err != nil {
    77  		return nil, fmt.Errorf("failed to create queue for block responses: %w", err)
    78  	}
    79  
    80  	pendingRangeResponses := &engine.FifoMessageStore{
    81  		FifoQueue: rangeResponseQueue,
    82  	}
    83  
    84  	// FIFO queue for block proposals
    85  	blocksQueue, err := fifoqueue.NewFifoQueue(
    86  		defaultBlockQueueCapacity,
    87  		fifoqueue.WithLengthObserver(func(len int) { core.mempool.MempoolEntries(metrics.ResourceBlockProposalQueue, uint(len)) }),
    88  	)
    89  	if err != nil {
    90  		return nil, fmt.Errorf("failed to create queue for inbound receipts: %w", err)
    91  	}
    92  
    93  	pendingBlocks := &engine.FifoMessageStore{
    94  		FifoQueue: blocksQueue,
    95  	}
    96  
    97  	// FIFO queue for block votes
    98  	votesQueue, err := fifoqueue.NewFifoQueue(
    99  		defaultVoteQueueCapacity,
   100  		fifoqueue.WithLengthObserver(func(len int) { core.mempool.MempoolEntries(metrics.ResourceBlockVoteQueue, uint(len)) }),
   101  	)
   102  	if err != nil {
   103  		return nil, fmt.Errorf("failed to create queue for inbound approvals: %w", err)
   104  	}
   105  	pendingVotes := &engine.FifoMessageStore{FifoQueue: votesQueue}
   106  
   107  	// define message queueing behaviour
   108  	handler := engine.NewMessageHandler(
   109  		log.With().Str("compliance", "engine").Logger(),
   110  		engine.NewNotifier(),
   111  		engine.Pattern{
   112  			Match: func(msg *engine.Message) bool {
   113  				_, ok := msg.Payload.(*messages.BlockResponse)
   114  				if ok {
   115  					core.metrics.MessageReceived(metrics.EngineCompliance, metrics.MessageBlockResponse)
   116  				}
   117  				return ok
   118  			},
   119  			Store: pendingRangeResponses,
   120  		},
   121  		engine.Pattern{
   122  			Match: func(msg *engine.Message) bool {
   123  				_, ok := msg.Payload.(*messages.BlockProposal)
   124  				if ok {
   125  					core.metrics.MessageReceived(metrics.EngineCompliance, metrics.MessageBlockProposal)
   126  				}
   127  				return ok
   128  			},
   129  			Store: pendingBlocks,
   130  		},
   131  		engine.Pattern{
   132  			Match: func(msg *engine.Message) bool {
   133  				_, ok := msg.Payload.(*events.SyncedBlock)
   134  				if ok {
   135  					core.metrics.MessageReceived(metrics.EngineCompliance, metrics.MessageSyncedBlock)
   136  				}
   137  				return ok
   138  			},
   139  			Map: func(msg *engine.Message) (*engine.Message, bool) {
   140  				syncedBlock := msg.Payload.(*events.SyncedBlock)
   141  				msg = &engine.Message{
   142  					OriginID: msg.OriginID,
   143  					Payload: &messages.BlockProposal{
   144  						Block: syncedBlock.Block,
   145  					},
   146  				}
   147  				return msg, true
   148  			},
   149  			Store: pendingBlocks,
   150  		},
   151  		engine.Pattern{
   152  			Match: func(msg *engine.Message) bool {
   153  				_, ok := msg.Payload.(*messages.BlockVote)
   154  				if ok {
   155  					core.metrics.MessageReceived(metrics.EngineCompliance, metrics.MessageBlockVote)
   156  				}
   157  				return ok
   158  			},
   159  			Store: pendingVotes,
   160  		},
   161  	)
   162  
   163  	eng := &Engine{
   164  		unit:                       engine.NewUnit(),
   165  		lm:                         lifecycle.NewLifecycleManager(),
   166  		log:                        log.With().Str("compliance", "engine").Logger(),
   167  		me:                         me,
   168  		mempool:                    core.mempool,
   169  		metrics:                    core.metrics,
   170  		headers:                    core.headers,
   171  		payloads:                   core.payloads,
   172  		pendingRangeResponses:      pendingRangeResponses,
   173  		pendingBlocks:              pendingBlocks,
   174  		pendingVotes:               pendingVotes,
   175  		state:                      core.state,
   176  		tracer:                     core.tracer,
   177  		prov:                       prov,
   178  		core:                       core,
   179  		messageHandler:             handler,
   180  		finalizationEventsNotifier: engine.NewNotifier(),
   181  	}
   182  
   183  	// register the core with the network layer and store the conduit
   184  	eng.con, err = net.Register(channels.ConsensusCommittee, eng)
   185  	if err != nil {
   186  		return nil, fmt.Errorf("could not register core: %w", err)
   187  	}
   188  
   189  	return eng, nil
   190  }
   191  
   192  // WithConsensus adds the consensus algorithm to the engine. This must be
   193  // called before the engine can start.
   194  func (e *Engine) WithConsensus(hot module.HotStuff) *Engine {
   195  	e.core.hotstuff = hot
   196  	return e
   197  }
   198  
   199  // Ready returns a ready channel that is closed once the engine has fully
   200  // started. For consensus engine, this is true once the underlying consensus
   201  // algorithm has started.
   202  func (e *Engine) Ready() <-chan struct{} {
   203  	if e.core.hotstuff == nil {
   204  		panic("must initialize compliance engine with hotstuff engine")
   205  	}
   206  	e.lm.OnStart(func() {
   207  		e.unit.Launch(e.loop)
   208  		e.unit.Launch(e.finalizationProcessingLoop)
   209  
   210  		ctx, cancel := context.WithCancel(context.Background())
   211  		signalerCtx, hotstuffErrChan := irrecoverable.WithSignaler(ctx)
   212  		e.stopHotstuff = cancel
   213  
   214  		// TODO: this workaround for handling fatal HotStuff errors is required only
   215  		//  because this engine and epochmgr do not use the Component pattern yet
   216  		e.unit.Launch(func() {
   217  			e.handleHotStuffError(hotstuffErrChan)
   218  		})
   219  
   220  		e.core.hotstuff.Start(signalerCtx)
   221  		// wait for request handler to startup
   222  
   223  		<-e.core.hotstuff.Ready()
   224  	})
   225  	return e.lm.Started()
   226  }
   227  
   228  // Done returns a done channel that is closed once the engine has fully stopped.
   229  // For the consensus engine, we wait for hotstuff to finish.
   230  func (e *Engine) Done() <-chan struct{} {
   231  	e.lm.OnStop(func() {
   232  		e.log.Info().Msg("shutting down hotstuff eventloop")
   233  		e.stopHotstuff()
   234  		<-e.core.hotstuff.Done()
   235  		e.log.Info().Msg("all components have been shut down")
   236  		<-e.unit.Done()
   237  	})
   238  	return e.lm.Stopped()
   239  }
   240  
   241  // SubmitLocal submits an event originating on the local node.
   242  func (e *Engine) SubmitLocal(event interface{}) {
   243  	err := e.ProcessLocal(event)
   244  	if err != nil {
   245  		e.log.Fatal().Err(err).Msg("internal error processing event")
   246  	}
   247  }
   248  
   249  // Submit submits the given event from the node with the given origin ID
   250  // for processing in a non-blocking manner. It returns instantly and logs
   251  // a potential processing error internally when done.
   252  func (e *Engine) Submit(channel channels.Channel, originID flow.Identifier, event interface{}) {
   253  	err := e.Process(channel, originID, event)
   254  	if err != nil {
   255  		e.log.Fatal().Err(err).Msg("internal error processing event")
   256  	}
   257  }
   258  
   259  // ProcessLocal processes an event originating on the local node.
   260  func (e *Engine) ProcessLocal(event interface{}) error {
   261  	return e.messageHandler.Process(e.me.NodeID(), event)
   262  }
   263  
   264  // Process processes the given event from the node with the given origin ID in
   265  // a blocking manner. It returns the potential processing error when done.
   266  func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error {
   267  	err := e.messageHandler.Process(originID, event)
   268  	if err != nil {
   269  		if engine.IsIncompatibleInputTypeError(err) {
   270  			e.log.Warn().Msgf("%v delivered unsupported message %T through %v", originID, event, channel)
   271  			return nil
   272  		}
   273  		return fmt.Errorf("unexpected error while processing engine message: %w", err)
   274  	}
   275  	return nil
   276  }
   277  
   278  func (e *Engine) loop() {
   279  	for {
   280  		select {
   281  		case <-e.unit.Quit():
   282  			return
   283  		case <-e.messageHandler.GetNotifier():
   284  			err := e.processAvailableMessages()
   285  			if err != nil {
   286  				e.log.Fatal().Err(err).Msg("internal error processing queued message")
   287  			}
   288  		}
   289  	}
   290  }
   291  
   292  func (e *Engine) processAvailableMessages() error {
   293  
   294  	for {
   295  		// TODO prioritization
   296  		// eg: msg := engine.SelectNextMessage()
   297  		msg, ok := e.pendingRangeResponses.Get()
   298  		if ok {
   299  			blockResponse := msg.Payload.(*messages.BlockResponse)
   300  			for _, block := range blockResponse.Blocks {
   301  				// process each block and indicate it's from a range of blocks
   302  				err := e.core.OnBlockProposal(msg.OriginID, &messages.BlockProposal{
   303  					Block: block,
   304  				}, true)
   305  
   306  				if err != nil {
   307  					return fmt.Errorf("could not handle block proposal: %w", err)
   308  				}
   309  			}
   310  			continue
   311  		}
   312  
   313  		msg, ok = e.pendingBlocks.Get()
   314  		if ok {
   315  			err := e.core.OnBlockProposal(msg.OriginID, msg.Payload.(*messages.BlockProposal), true)
   316  			if err != nil {
   317  				return fmt.Errorf("could not handle block proposal: %w", err)
   318  			}
   319  			continue
   320  		}
   321  
   322  		msg, ok = e.pendingVotes.Get()
   323  		if ok {
   324  			err := e.core.OnBlockVote(msg.OriginID, msg.Payload.(*messages.BlockVote))
   325  			if err != nil {
   326  				return fmt.Errorf("could not handle block vote: %w", err)
   327  			}
   328  			continue
   329  		}
   330  
   331  		// when there is no more messages in the queue, back to the loop to wait
   332  		// for the next incoming message to arrive.
   333  		return nil
   334  	}
   335  }
   336  
   337  // SendVote will send a vote to the desired node.
   338  func (e *Engine) SendVote(blockID flow.Identifier, view uint64, sigData []byte, recipientID flow.Identifier) error {
   339  
   340  	log := e.log.With().
   341  		Hex("block_id", blockID[:]).
   342  		Uint64("block_view", view).
   343  		Hex("recipient_id", recipientID[:]).
   344  		Logger()
   345  
   346  	log.Info().Msg("processing vote transmission request from hotstuff")
   347  
   348  	// build the vote message
   349  	vote := &messages.BlockVote{
   350  		BlockID: blockID,
   351  		View:    view,
   352  		SigData: sigData,
   353  	}
   354  
   355  	// TODO: this is a hot-fix to mitigate the effects of the following Unicast call blocking occasionally
   356  	e.unit.Launch(func() {
   357  		// send the vote the desired recipient
   358  		err := e.con.Unicast(vote, recipientID)
   359  		if err != nil {
   360  			log.Warn().Err(err).Msg("could not send vote")
   361  			return
   362  		}
   363  		e.metrics.MessageSent(metrics.EngineCompliance, metrics.MessageBlockVote)
   364  		log.Info().Msg("block vote transmitted")
   365  	})
   366  
   367  	return nil
   368  }
   369  
   370  // BroadcastProposalWithDelay will propagate a block proposal to all non-local consensus nodes.
   371  // Note the header has incomplete fields, because it was converted from a hotstuff.
   372  func (e *Engine) BroadcastProposalWithDelay(header *flow.Header, delay time.Duration) error {
   373  
   374  	// first, check that we are the proposer of the block
   375  	if header.ProposerID != e.me.NodeID() {
   376  		return fmt.Errorf("cannot broadcast proposal with non-local proposer (%x)", header.ProposerID)
   377  	}
   378  
   379  	// get the parent of the block
   380  	parent, err := e.headers.ByBlockID(header.ParentID)
   381  	if err != nil {
   382  		return fmt.Errorf("could not retrieve proposal parent: %w", err)
   383  	}
   384  
   385  	// fill in the fields that can't be populated by HotStuff
   386  	header.ChainID = parent.ChainID
   387  	header.Height = parent.Height + 1
   388  
   389  	// retrieve the payload for the block
   390  	payload, err := e.payloads.ByBlockID(header.ID())
   391  	if err != nil {
   392  		return fmt.Errorf("could not retrieve payload for proposal: %w", err)
   393  	}
   394  
   395  	log := e.log.With().
   396  		Str("chain_id", header.ChainID.String()).
   397  		Uint64("block_height", header.Height).
   398  		Uint64("block_view", header.View).
   399  		Hex("block_id", logging.Entity(header)).
   400  		Hex("parent_id", header.ParentID[:]).
   401  		Hex("payload_hash", header.PayloadHash[:]).
   402  		Int("gaurantees_count", len(payload.Guarantees)).
   403  		Int("seals_count", len(payload.Seals)).
   404  		Int("receipts_count", len(payload.Receipts)).
   405  		Time("timestamp", header.Timestamp).
   406  		Hex("signers", header.ParentVoterIndices).
   407  		Dur("delay", delay).
   408  		Logger()
   409  
   410  	log.Debug().Msg("processing proposal broadcast request from hotstuff")
   411  
   412  	// retrieve all consensus nodes without our ID
   413  	recipients, err := e.state.AtBlockID(header.ParentID).Identities(filter.And(
   414  		filter.HasRole(flow.RoleConsensus),
   415  		filter.Not(filter.HasNodeID(e.me.NodeID())),
   416  	))
   417  	if err != nil {
   418  		return fmt.Errorf("could not get consensus recipients: %w", err)
   419  	}
   420  
   421  	e.unit.LaunchAfter(delay, func() {
   422  
   423  		go e.core.hotstuff.SubmitProposal(header, parent.View)
   424  
   425  		// NOTE: some fields are not needed for the message
   426  		// - proposer ID is conveyed over the network message
   427  		// - the payload hash is deduced from the payload
   428  		block := &flow.Block{
   429  			Header:  header,
   430  			Payload: payload,
   431  		}
   432  		proposal := messages.NewBlockProposal(block)
   433  
   434  		// broadcast the proposal to consensus nodes
   435  		err = e.con.Publish(proposal, recipients.NodeIDs()...)
   436  		if errors.Is(err, network.EmptyTargetList) {
   437  			return
   438  		}
   439  		if err != nil {
   440  			log.Error().Err(err).Msg("could not send proposal message")
   441  		}
   442  
   443  		e.metrics.MessageSent(metrics.EngineCompliance, metrics.MessageBlockProposal)
   444  
   445  		log.Info().Msg("block proposal broadcasted")
   446  
   447  		// submit the proposal to the provider engine to forward it to other
   448  		// node roles
   449  		e.prov.SubmitLocal(proposal)
   450  	})
   451  
   452  	return nil
   453  }
   454  
   455  // BroadcastProposal will propagate a block proposal to all non-local consensus nodes.
   456  // Note the header has incomplete fields, because it was converted from a hotstuff.
   457  func (e *Engine) BroadcastProposal(header *flow.Header) error {
   458  	return e.BroadcastProposalWithDelay(header, 0)
   459  }
   460  
   461  // OnFinalizedBlock implements the `OnFinalizedBlock` callback from the `hotstuff.FinalizationConsumer`
   462  // (1) Informs sealing.Core about finalization of respective block.
   463  //
   464  // CAUTION: the input to this callback is treated as trusted; precautions should be taken that messages
   465  // from external nodes cannot be considered as inputs to this function
   466  func (e *Engine) OnFinalizedBlock(block *model.Block) {
   467  	if e.finalizedView.Set(block.View) {
   468  		e.finalizationEventsNotifier.Notify()
   469  	}
   470  }
   471  
   472  // finalizationProcessingLoop is a separate goroutine that performs processing of finalization events
   473  func (e *Engine) finalizationProcessingLoop() {
   474  	finalizationNotifier := e.finalizationEventsNotifier.Channel()
   475  	for {
   476  		select {
   477  		case <-e.unit.Quit():
   478  			return
   479  		case <-finalizationNotifier:
   480  			e.core.ProcessFinalizedView(e.finalizedView.Value())
   481  		}
   482  	}
   483  }
   484  
   485  // handleHotStuffError accepts the error channel from the HotStuff component and
   486  // crashes the node if any error is detected.
   487  //
   488  // TODO: this function should be removed in favour of refactoring this engine and
   489  // the epochmgr engine to use the Component pattern, so that irrecoverable errors
   490  // can be bubbled all the way to the node scaffold
   491  func (e *Engine) handleHotStuffError(hotstuffErrs <-chan error) {
   492  	for {
   493  		select {
   494  		case <-e.unit.Quit():
   495  			return
   496  		case err := <-hotstuffErrs:
   497  			if err != nil {
   498  				e.log.Fatal().Err(err).Msg("encountered fatal error in HotStuff")
   499  			}
   500  		}
   501  	}
   502  }