github.com/onflow/flow-go@v0.33.17/engine/consensus/message_hub/message_hub.go (about)

     1  package message_hub
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"time"
     8  
     9  	"github.com/rs/zerolog"
    10  
    11  	"github.com/onflow/flow-go/consensus/hotstuff"
    12  	"github.com/onflow/flow-go/consensus/hotstuff/model"
    13  	"github.com/onflow/flow-go/consensus/hotstuff/notifications"
    14  	"github.com/onflow/flow-go/engine"
    15  	"github.com/onflow/flow-go/engine/common/fifoqueue"
    16  	"github.com/onflow/flow-go/engine/consensus"
    17  	"github.com/onflow/flow-go/model/flow"
    18  	"github.com/onflow/flow-go/model/flow/filter"
    19  	"github.com/onflow/flow-go/model/messages"
    20  	"github.com/onflow/flow-go/module"
    21  	"github.com/onflow/flow-go/module/component"
    22  	"github.com/onflow/flow-go/module/irrecoverable"
    23  	"github.com/onflow/flow-go/module/metrics"
    24  	"github.com/onflow/flow-go/network"
    25  	"github.com/onflow/flow-go/network/channels"
    26  	"github.com/onflow/flow-go/state/protocol"
    27  	"github.com/onflow/flow-go/storage"
    28  	"github.com/onflow/flow-go/utils/logging"
    29  )
    30  
    31  // defaultMessageHubRequestsWorkers number of workers to dispatch events for requests
    32  const defaultMessageHubRequestsWorkers = 5
    33  
    34  // defaultProposalQueueCapacity number of pending outgoing proposals stored in queue
    35  const defaultProposalQueueCapacity = 3
    36  
    37  // defaultVoteQueueCapacity number of pending outgoing votes stored in queue
    38  const defaultVoteQueueCapacity = 20
    39  
    40  // defaultTimeoutQueueCapacity number of pending outgoing timeouts stored in queue
    41  const defaultTimeoutQueueCapacity = 3
    42  
    43  // packedVote is a helper structure to pack recipientID and vote into one structure to pass through fifoqueue.FifoQueue
    44  type packedVote struct {
    45  	recipientID flow.Identifier
    46  	vote        *messages.BlockVote
    47  }
    48  
    49  // MessageHub is a central module for handling incoming and outgoing messages via consensus channel.
    50  // It performs message routing for incoming messages by matching them by type and sending to respective engine.
    51  // For incoming messages handling processing looks like this:
    52  //
    53  //	   +-------------------+      +------------+
    54  //	-->| Consensus-Channel |----->| MessageHub |
    55  //	   +-------------------+      +------+-----+
    56  //	                         ------------|------------
    57  //	   +------+---------+    |    +------+-----+     |    +------+------------+
    58  //	   | VoteAggregator |----+    | Compliance |     +----| TimeoutAggregator |
    59  //	   +----------------+         +------------+          +------+------------+
    60  //	          vote                     block                  timeout object
    61  //
    62  // MessageHub acts as communicator and handles hotstuff.Consumer communication events to send votes, broadcast timeouts
    63  // and proposals. It is responsible for communication between consensus participants.
    64  // It implements hotstuff.Consumer interface and needs to be subscribed for notifications via pub/sub.
    65  // All communicator events are handled on worker thread to prevent sender from blocking.
    66  // For outgoing messages processing logic looks like this:
    67  //
    68  //	+-------------------+      +------------+      +----------+      +------------------------+
    69  //	| Consensus-Channel |<-----| MessageHub |<-----| Consumer |<-----|        Hotstuff        |
    70  //	+-------------------+      +------+-----+      +----------+      +------------------------+
    71  //	                                                  pub/sub          vote, timeout, proposal
    72  //
    73  // MessageHub is safe to use in concurrent environment.
    74  type MessageHub struct {
    75  	*component.ComponentManager
    76  	notifications.NoopConsumer
    77  	log                        zerolog.Logger
    78  	me                         module.Local
    79  	engineMetrics              module.EngineMetrics
    80  	state                      protocol.State
    81  	payloads                   storage.Payloads
    82  	con                        network.Conduit
    83  	pushBlocksCon              network.Conduit
    84  	ownOutboundMessageNotifier engine.Notifier
    85  	ownOutboundVotes           *fifoqueue.FifoQueue // queue for handling outgoing vote transmissions
    86  	ownOutboundProposals       *fifoqueue.FifoQueue // queue for handling outgoing proposal transmissions
    87  	ownOutboundTimeouts        *fifoqueue.FifoQueue // queue for handling outgoing timeout transmissions
    88  
    89  	// injected dependencies
    90  	compliance        consensus.Compliance       // handler of incoming block proposals
    91  	hotstuff          module.HotStuff            // used to submit proposals that were previously broadcast
    92  	voteAggregator    hotstuff.VoteAggregator    // handler of incoming votes
    93  	timeoutAggregator hotstuff.TimeoutAggregator // handler of incoming timeouts
    94  }
    95  
    96  var _ network.MessageProcessor = (*MessageHub)(nil)
    97  var _ hotstuff.CommunicatorConsumer = (*MessageHub)(nil)
    98  
    99  // NewMessageHub constructs new instance of message hub
   100  // No errors are expected during normal operations.
   101  func NewMessageHub(log zerolog.Logger,
   102  	engineMetrics module.EngineMetrics,
   103  	net network.EngineRegistry,
   104  	me module.Local,
   105  	compliance consensus.Compliance,
   106  	hotstuff module.HotStuff,
   107  	voteAggregator hotstuff.VoteAggregator,
   108  	timeoutAggregator hotstuff.TimeoutAggregator,
   109  	state protocol.State,
   110  	payloads storage.Payloads,
   111  ) (*MessageHub, error) {
   112  	ownOutboundVotes, err := fifoqueue.NewFifoQueue(defaultVoteQueueCapacity)
   113  	if err != nil {
   114  		return nil, fmt.Errorf("could not initialize votes queue")
   115  	}
   116  	ownOutboundProposals, err := fifoqueue.NewFifoQueue(defaultProposalQueueCapacity)
   117  	if err != nil {
   118  		return nil, fmt.Errorf("could not initialize blocks queue")
   119  	}
   120  	ownOutboundTimeouts, err := fifoqueue.NewFifoQueue(defaultTimeoutQueueCapacity)
   121  	if err != nil {
   122  		return nil, fmt.Errorf("could not initialize timeouts queue")
   123  	}
   124  	hub := &MessageHub{
   125  		log:                        log.With().Str("engine", "message_hub").Logger(),
   126  		me:                         me,
   127  		engineMetrics:              engineMetrics,
   128  		state:                      state,
   129  		payloads:                   payloads,
   130  		compliance:                 compliance,
   131  		hotstuff:                   hotstuff,
   132  		voteAggregator:             voteAggregator,
   133  		timeoutAggregator:          timeoutAggregator,
   134  		ownOutboundMessageNotifier: engine.NewNotifier(),
   135  		ownOutboundVotes:           ownOutboundVotes,
   136  		ownOutboundProposals:       ownOutboundProposals,
   137  		ownOutboundTimeouts:        ownOutboundTimeouts,
   138  	}
   139  
   140  	// register with the network layer and store the conduit
   141  	hub.con, err = net.Register(channels.ConsensusCommittee, hub)
   142  	if err != nil {
   143  		return nil, fmt.Errorf("could not register core: %w", err)
   144  	}
   145  
   146  	// register with the network layer and store the conduit
   147  	hub.pushBlocksCon, err = net.Register(channels.PushBlocks, hub)
   148  	if err != nil {
   149  		return nil, fmt.Errorf("could not register engine: %w", err)
   150  	}
   151  
   152  	componentBuilder := component.NewComponentManagerBuilder()
   153  	// This implementation tolerates if the networking layer sometimes blocks on send requests.
   154  	// We use by default 5 go-routines here. This is fine, because outbound messages are temporally sparse
   155  	// under normal operations. Hence, the go-routines should mostly be asleep waiting for work.
   156  	for i := 0; i < defaultMessageHubRequestsWorkers; i++ {
   157  		componentBuilder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   158  			ready()
   159  			hub.queuedMessagesProcessingLoop(ctx)
   160  		})
   161  	}
   162  	hub.ComponentManager = componentBuilder.Build()
   163  	return hub, nil
   164  }
   165  
   166  // queuedMessagesProcessingLoop orchestrates dispatching of previously queued messages
   167  func (h *MessageHub) queuedMessagesProcessingLoop(ctx irrecoverable.SignalerContext) {
   168  	notifier := h.ownOutboundMessageNotifier.Channel()
   169  	for {
   170  		select {
   171  		case <-ctx.Done():
   172  			return
   173  		case <-notifier:
   174  			err := h.sendOwnMessages(ctx)
   175  			if err != nil {
   176  				ctx.Throw(fmt.Errorf("internal error processing queued messages: %w", err))
   177  				return
   178  			}
   179  		}
   180  	}
   181  }
   182  
   183  // sendOwnMessages is a function which dispatches previously queued messages on worker thread
   184  // This function is called whenever we have queued messages ready to be dispatched.
   185  // No errors are expected during normal operations.
   186  func (h *MessageHub) sendOwnMessages(ctx context.Context) error {
   187  	for {
   188  		select {
   189  		case <-ctx.Done():
   190  			return nil
   191  		default:
   192  		}
   193  
   194  		msg, ok := h.ownOutboundProposals.Pop()
   195  		if ok {
   196  			block := msg.(*flow.Header)
   197  			err := h.sendOwnProposal(block)
   198  			if err != nil {
   199  				return fmt.Errorf("could not process queued block %v: %w", block.ID(), err)
   200  			}
   201  			continue
   202  		}
   203  
   204  		msg, ok = h.ownOutboundVotes.Pop()
   205  		if ok {
   206  			packed := msg.(*packedVote)
   207  			err := h.sendOwnVote(packed)
   208  			if err != nil {
   209  				return fmt.Errorf("could not process queued vote: %w", err)
   210  			}
   211  			continue
   212  		}
   213  
   214  		msg, ok = h.ownOutboundTimeouts.Pop()
   215  		if ok {
   216  			err := h.sendOwnTimeout(msg.(*model.TimeoutObject))
   217  			if err != nil {
   218  				return fmt.Errorf("coult not process queued timeout: %w", err)
   219  			}
   220  			continue
   221  		}
   222  
   223  		// when there is no more messages in the queue, back to the loop to wait
   224  		// for the next incoming message to arrive.
   225  		return nil
   226  	}
   227  }
   228  
   229  // sendOwnTimeout propagates the timeout to the consensus committee (excluding myself)
   230  // No errors are expected during normal operations.
   231  func (h *MessageHub) sendOwnTimeout(timeout *model.TimeoutObject) error {
   232  	log := timeout.LogContext(h.log).Logger()
   233  	log.Info().Msg("processing timeout broadcast request from hotstuff")
   234  
   235  	// Retrieve all consensus nodes (excluding myself).
   236  	// CAUTION: We must include also nodes with weight zero, because otherwise
   237  	//          TCs might not be constructed at epoch switchover.
   238  	recipients, err := h.state.Final().Identities(filter.And(
   239  		filter.Not(filter.Ejected),
   240  		filter.HasRole(flow.RoleConsensus),
   241  		filter.Not(filter.HasNodeID(h.me.NodeID())),
   242  	))
   243  	if err != nil {
   244  		return fmt.Errorf("could not get consensus recipients for broadcasting timeout: %w", err)
   245  	}
   246  
   247  	// create the timeout message
   248  	msg := &messages.TimeoutObject{
   249  		View:        timeout.View,
   250  		NewestQC:    timeout.NewestQC,
   251  		LastViewTC:  timeout.LastViewTC,
   252  		SigData:     timeout.SigData,
   253  		TimeoutTick: timeout.TimeoutTick,
   254  	}
   255  	err = h.con.Publish(msg, recipients.NodeIDs()...)
   256  	if err != nil {
   257  		if !errors.Is(err, network.EmptyTargetList) {
   258  			log.Err(err).Msg("could not broadcast timeout")
   259  		}
   260  		return nil
   261  	}
   262  	log.Info().Msg("consensus timeout was broadcast")
   263  	h.engineMetrics.MessageSent(metrics.EngineConsensusMessageHub, metrics.MessageTimeoutObject)
   264  
   265  	return nil
   266  }
   267  
   268  // sendOwnVote propagates the vote via unicast to another node that is the next leader
   269  // No errors are expected during normal operations.
   270  func (h *MessageHub) sendOwnVote(packed *packedVote) error {
   271  	log := h.log.With().
   272  		Hex("block_id", packed.vote.BlockID[:]).
   273  		Uint64("block_view", packed.vote.View).
   274  		Hex("recipient_id", packed.recipientID[:]).
   275  		Logger()
   276  	log.Info().Msg("processing vote transmission request from hotstuff")
   277  
   278  	// send the vote the desired recipient
   279  	err := h.con.Unicast(packed.vote, packed.recipientID)
   280  	if err != nil {
   281  		log.Err(err).Msg("could not send vote")
   282  		return nil
   283  	}
   284  	h.engineMetrics.MessageSent(metrics.EngineConsensusMessageHub, metrics.MessageBlockVote)
   285  	log.Info().Msg("block vote transmitted")
   286  
   287  	return nil
   288  }
   289  
   290  // sendOwnProposal propagates the block proposal to the consensus committee and submits to non-consensus network:
   291  //   - broadcast to all other consensus participants (excluding myself)
   292  //   - broadcast to all non-consensus participants
   293  //
   294  // No errors are expected during normal operations.
   295  func (h *MessageHub) sendOwnProposal(header *flow.Header) error {
   296  	// first, check that we are the proposer of the block
   297  	if header.ProposerID != h.me.NodeID() {
   298  		return fmt.Errorf("cannot broadcast proposal with non-local proposer (%x)", header.ProposerID)
   299  	}
   300  
   301  	// retrieve the payload for the block
   302  	payload, err := h.payloads.ByBlockID(header.ID())
   303  	if err != nil {
   304  		return fmt.Errorf("could not retrieve payload for proposal: %w", err)
   305  	}
   306  
   307  	log := h.log.With().
   308  		Str("chain_id", header.ChainID.String()).
   309  		Uint64("block_height", header.Height).
   310  		Uint64("block_view", header.View).
   311  		Hex("block_id", logging.Entity(header)).
   312  		Hex("parent_id", header.ParentID[:]).
   313  		Hex("payload_hash", header.PayloadHash[:]).
   314  		Int("guarantees_count", len(payload.Guarantees)).
   315  		Int("seals_count", len(payload.Seals)).
   316  		Int("receipts_count", len(payload.Receipts)).
   317  		Time("timestamp", header.Timestamp).
   318  		Hex("signers", header.ParentVoterIndices).
   319  		//Dur("delay", delay).
   320  		Logger()
   321  
   322  	log.Debug().Msg("processing proposal broadcast request from hotstuff")
   323  
   324  	// Retrieve all consensus nodes (excluding myself).
   325  	// CAUTION: We must include also nodes with weight zero, because otherwise
   326  	//          new consensus nodes for the next epoch are left out.
   327  	// Note: retrieving the final state requires a time-intensive database read.
   328  	//       Therefore, we execute this in a separate routine, because
   329  	//       `OnOwnTimeout` is directly called by the consensus core logic.
   330  	allIdentities, err := h.state.AtBlockID(header.ParentID).Identities(filter.And(
   331  		filter.Not(filter.Ejected),
   332  		filter.Not(filter.HasNodeID(h.me.NodeID())),
   333  	))
   334  	if err != nil {
   335  		return fmt.Errorf("could not get identities for broadcasting proposal: %w", err)
   336  	}
   337  
   338  	consRecipients := allIdentities.Filter(filter.HasRole(flow.RoleConsensus))
   339  
   340  	// NOTE: some fields are not needed for the message
   341  	// - proposer ID is conveyed over the network message
   342  	// - the payload hash is deduced from the payload
   343  	proposal := messages.NewBlockProposal(&flow.Block{
   344  		Header:  header,
   345  		Payload: payload,
   346  	})
   347  
   348  	// broadcast the proposal to consensus nodes
   349  	err = h.con.Publish(proposal, consRecipients.NodeIDs()...)
   350  	if err != nil {
   351  		if !errors.Is(err, network.EmptyTargetList) {
   352  			log.Err(err).Msg("could not send proposal message")
   353  		}
   354  		return nil
   355  	}
   356  	log.Info().Msg("block proposal was broadcast")
   357  
   358  	// submit proposal to non-consensus nodes
   359  	h.provideProposal(proposal, allIdentities.Filter(filter.Not(filter.HasRole(flow.RoleConsensus))))
   360  	h.engineMetrics.MessageSent(metrics.EngineConsensusMessageHub, metrics.MessageBlockProposal)
   361  
   362  	return nil
   363  }
   364  
   365  // provideProposal is used when we want to broadcast a local block to the rest  of the
   366  // network (non-consensus nodes).
   367  func (h *MessageHub) provideProposal(proposal *messages.BlockProposal, recipients flow.IdentityList) {
   368  	header := proposal.Block.Header
   369  	blockID := header.ID()
   370  	log := h.log.With().
   371  		Uint64("block_view", header.View).
   372  		Hex("block_id", blockID[:]).
   373  		Hex("parent_id", header.ParentID[:]).
   374  		Logger()
   375  	log.Info().Msg("block proposal submitted for propagation")
   376  
   377  	// submit the block to the targets
   378  	err := h.pushBlocksCon.Publish(proposal, recipients.NodeIDs()...)
   379  	if err != nil {
   380  		h.log.Err(err).Msg("failed to broadcast block")
   381  		return
   382  	}
   383  
   384  	log.Info().Msg("block proposal propagated to non-consensus nodes")
   385  }
   386  
   387  // OnOwnVote propagates the vote to relevant recipient(s):
   388  //   - [common case] vote is queued and is sent via unicast to another node that is the next leader by worker
   389  //   - [special case] this node is the next leader: vote is directly forwarded to the node's internal `VoteAggregator`
   390  func (h *MessageHub) OnOwnVote(blockID flow.Identifier, view uint64, sigData []byte, recipientID flow.Identifier) {
   391  	vote := &messages.BlockVote{
   392  		BlockID: blockID,
   393  		View:    view,
   394  		SigData: sigData,
   395  	}
   396  
   397  	// special case: I am the next leader
   398  	if recipientID == h.me.NodeID() {
   399  		h.forwardToOwnVoteAggregator(vote, h.me.NodeID()) // forward vote to my own `voteAggregator`
   400  		return
   401  	}
   402  
   403  	// common case: someone else is leader
   404  	packed := &packedVote{
   405  		recipientID: recipientID,
   406  		vote:        vote,
   407  	}
   408  	if ok := h.ownOutboundVotes.Push(packed); ok {
   409  		h.ownOutboundMessageNotifier.Notify()
   410  	} else {
   411  		h.engineMetrics.OutboundMessageDropped(metrics.EngineConsensusMessageHub, metrics.MessageBlockVote)
   412  	}
   413  }
   414  
   415  // OnOwnTimeout forwards timeout to node's internal `timeoutAggregator` and queues timeout for
   416  // subsequent propagation to all consensus participants (excluding this node)
   417  func (h *MessageHub) OnOwnTimeout(timeout *model.TimeoutObject) {
   418  	h.forwardToOwnTimeoutAggregator(timeout) // forward timeout to my own `timeoutAggregator`
   419  	if ok := h.ownOutboundTimeouts.Push(timeout); ok {
   420  		h.ownOutboundMessageNotifier.Notify()
   421  	} else {
   422  		h.engineMetrics.OutboundMessageDropped(metrics.EngineConsensusMessageHub, metrics.MessageTimeoutObject)
   423  	}
   424  }
   425  
   426  // OnOwnProposal directly forwards proposal to HotStuff core logic (skipping compliance engine as we assume our
   427  // own proposals to be correct) and queues proposal for subsequent propagation to all consensus participants (including this node).
   428  // The proposal will only be placed in the queue, after the specified delay (or dropped on shutdown signal).
   429  func (h *MessageHub) OnOwnProposal(proposal *flow.Header, targetPublicationTime time.Time) {
   430  	go func() {
   431  		select {
   432  		case <-time.After(time.Until(targetPublicationTime)):
   433  		case <-h.ShutdownSignal():
   434  			return
   435  		}
   436  
   437  		hotstuffProposal := model.ProposalFromFlow(proposal)
   438  		// notify vote aggregator that new block proposal is available, in case we are next leader
   439  		h.voteAggregator.AddBlock(hotstuffProposal) // non-blocking
   440  
   441  		// TODO(active-pacemaker): replace with pub/sub?
   442  		// submit proposal to our own processing pipeline
   443  		h.hotstuff.SubmitProposal(hotstuffProposal) // non-blocking
   444  
   445  		if ok := h.ownOutboundProposals.Push(proposal); ok {
   446  			h.ownOutboundMessageNotifier.Notify()
   447  		} else {
   448  			h.engineMetrics.OutboundMessageDropped(metrics.EngineConsensusMessageHub, metrics.MessageBlockProposal)
   449  		}
   450  	}()
   451  }
   452  
   453  // Process handles incoming messages from consensus channel. After matching message by type, sends it to the correct
   454  // component for handling.
   455  // No errors are expected during normal operations.
   456  func (h *MessageHub) Process(channel channels.Channel, originID flow.Identifier, message interface{}) error {
   457  	switch msg := message.(type) {
   458  	case *messages.BlockProposal:
   459  		h.compliance.OnBlockProposal(flow.Slashable[*messages.BlockProposal]{
   460  			OriginID: originID,
   461  			Message:  msg,
   462  		})
   463  	case *messages.BlockVote:
   464  		h.forwardToOwnVoteAggregator(msg, originID)
   465  	case *messages.TimeoutObject:
   466  		t := &model.TimeoutObject{
   467  			View:        msg.View,
   468  			NewestQC:    msg.NewestQC,
   469  			LastViewTC:  msg.LastViewTC,
   470  			SignerID:    originID,
   471  			SigData:     msg.SigData,
   472  			TimeoutTick: msg.TimeoutTick,
   473  		}
   474  		h.forwardToOwnTimeoutAggregator(t)
   475  	default:
   476  		h.log.Warn().
   477  			Bool(logging.KeySuspicious, true).
   478  			Hex("origin_id", logging.ID(originID)).
   479  			Str("message_type", fmt.Sprintf("%T", message)).
   480  			Str("channel", channel.String()).
   481  			Msgf("delivered unsupported message type")
   482  	}
   483  	return nil
   484  }
   485  
   486  // forwardToOwnVoteAggregator converts vote to generic `model.Vote`, logs vote and forwards it to own `voteAggregator`.
   487  // Per API convention, timeoutAggregator` is non-blocking, hence, this call returns quickly.
   488  func (h *MessageHub) forwardToOwnVoteAggregator(vote *messages.BlockVote, originID flow.Identifier) {
   489  	h.engineMetrics.MessageReceived(metrics.EngineConsensusMessageHub, metrics.MessageBlockVote)
   490  	v := &model.Vote{
   491  		View:     vote.View,
   492  		BlockID:  vote.BlockID,
   493  		SignerID: originID,
   494  		SigData:  vote.SigData,
   495  	}
   496  	h.log.Info().
   497  		Uint64("block_view", v.View).
   498  		Hex("block_id", v.BlockID[:]).
   499  		Hex("voter", v.SignerID[:]).
   500  		Str("vote_id", v.ID().String()).
   501  		Msg("block vote received, forwarding block vote to hotstuff vote aggregator")
   502  	h.voteAggregator.AddVote(v)
   503  }
   504  
   505  // forwardToOwnTimeoutAggregator logs timeout and forwards it to own `timeoutAggregator`.
   506  // Per API convention, timeoutAggregator` is non-blocking, hence, this call returns quickly.
   507  func (h *MessageHub) forwardToOwnTimeoutAggregator(t *model.TimeoutObject) {
   508  	h.engineMetrics.MessageReceived(metrics.EngineConsensusMessageHub, metrics.MessageTimeoutObject)
   509  	h.log.Info().
   510  		Hex("origin_id", t.SignerID[:]).
   511  		Uint64("view", t.View).
   512  		Str("timeout_id", t.ID().String()).
   513  		Msg("timeout received, forwarding timeout to hotstuff timeout aggregator")
   514  	h.timeoutAggregator.AddTimeout(t)
   515  }