github.com/onflow/flow-go@v0.33.17/engine/consensus/message_hub/message_hub.go (about) 1 package message_hub 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "time" 8 9 "github.com/rs/zerolog" 10 11 "github.com/onflow/flow-go/consensus/hotstuff" 12 "github.com/onflow/flow-go/consensus/hotstuff/model" 13 "github.com/onflow/flow-go/consensus/hotstuff/notifications" 14 "github.com/onflow/flow-go/engine" 15 "github.com/onflow/flow-go/engine/common/fifoqueue" 16 "github.com/onflow/flow-go/engine/consensus" 17 "github.com/onflow/flow-go/model/flow" 18 "github.com/onflow/flow-go/model/flow/filter" 19 "github.com/onflow/flow-go/model/messages" 20 "github.com/onflow/flow-go/module" 21 "github.com/onflow/flow-go/module/component" 22 "github.com/onflow/flow-go/module/irrecoverable" 23 "github.com/onflow/flow-go/module/metrics" 24 "github.com/onflow/flow-go/network" 25 "github.com/onflow/flow-go/network/channels" 26 "github.com/onflow/flow-go/state/protocol" 27 "github.com/onflow/flow-go/storage" 28 "github.com/onflow/flow-go/utils/logging" 29 ) 30 31 // defaultMessageHubRequestsWorkers number of workers to dispatch events for requests 32 const defaultMessageHubRequestsWorkers = 5 33 34 // defaultProposalQueueCapacity number of pending outgoing proposals stored in queue 35 const defaultProposalQueueCapacity = 3 36 37 // defaultVoteQueueCapacity number of pending outgoing votes stored in queue 38 const defaultVoteQueueCapacity = 20 39 40 // defaultTimeoutQueueCapacity number of pending outgoing timeouts stored in queue 41 const defaultTimeoutQueueCapacity = 3 42 43 // packedVote is a helper structure to pack recipientID and vote into one structure to pass through fifoqueue.FifoQueue 44 type packedVote struct { 45 recipientID flow.Identifier 46 vote *messages.BlockVote 47 } 48 49 // MessageHub is a central module for handling incoming and outgoing messages via consensus channel. 50 // It performs message routing for incoming messages by matching them by type and sending to respective engine. 51 // For incoming messages handling processing looks like this: 52 // 53 // +-------------------+ +------------+ 54 // -->| Consensus-Channel |----->| MessageHub | 55 // +-------------------+ +------+-----+ 56 // ------------|------------ 57 // +------+---------+ | +------+-----+ | +------+------------+ 58 // | VoteAggregator |----+ | Compliance | +----| TimeoutAggregator | 59 // +----------------+ +------------+ +------+------------+ 60 // vote block timeout object 61 // 62 // MessageHub acts as communicator and handles hotstuff.Consumer communication events to send votes, broadcast timeouts 63 // and proposals. It is responsible for communication between consensus participants. 64 // It implements hotstuff.Consumer interface and needs to be subscribed for notifications via pub/sub. 65 // All communicator events are handled on worker thread to prevent sender from blocking. 66 // For outgoing messages processing logic looks like this: 67 // 68 // +-------------------+ +------------+ +----------+ +------------------------+ 69 // | Consensus-Channel |<-----| MessageHub |<-----| Consumer |<-----| Hotstuff | 70 // +-------------------+ +------+-----+ +----------+ +------------------------+ 71 // pub/sub vote, timeout, proposal 72 // 73 // MessageHub is safe to use in concurrent environment. 74 type MessageHub struct { 75 *component.ComponentManager 76 notifications.NoopConsumer 77 log zerolog.Logger 78 me module.Local 79 engineMetrics module.EngineMetrics 80 state protocol.State 81 payloads storage.Payloads 82 con network.Conduit 83 pushBlocksCon network.Conduit 84 ownOutboundMessageNotifier engine.Notifier 85 ownOutboundVotes *fifoqueue.FifoQueue // queue for handling outgoing vote transmissions 86 ownOutboundProposals *fifoqueue.FifoQueue // queue for handling outgoing proposal transmissions 87 ownOutboundTimeouts *fifoqueue.FifoQueue // queue for handling outgoing timeout transmissions 88 89 // injected dependencies 90 compliance consensus.Compliance // handler of incoming block proposals 91 hotstuff module.HotStuff // used to submit proposals that were previously broadcast 92 voteAggregator hotstuff.VoteAggregator // handler of incoming votes 93 timeoutAggregator hotstuff.TimeoutAggregator // handler of incoming timeouts 94 } 95 96 var _ network.MessageProcessor = (*MessageHub)(nil) 97 var _ hotstuff.CommunicatorConsumer = (*MessageHub)(nil) 98 99 // NewMessageHub constructs new instance of message hub 100 // No errors are expected during normal operations. 101 func NewMessageHub(log zerolog.Logger, 102 engineMetrics module.EngineMetrics, 103 net network.EngineRegistry, 104 me module.Local, 105 compliance consensus.Compliance, 106 hotstuff module.HotStuff, 107 voteAggregator hotstuff.VoteAggregator, 108 timeoutAggregator hotstuff.TimeoutAggregator, 109 state protocol.State, 110 payloads storage.Payloads, 111 ) (*MessageHub, error) { 112 ownOutboundVotes, err := fifoqueue.NewFifoQueue(defaultVoteQueueCapacity) 113 if err != nil { 114 return nil, fmt.Errorf("could not initialize votes queue") 115 } 116 ownOutboundProposals, err := fifoqueue.NewFifoQueue(defaultProposalQueueCapacity) 117 if err != nil { 118 return nil, fmt.Errorf("could not initialize blocks queue") 119 } 120 ownOutboundTimeouts, err := fifoqueue.NewFifoQueue(defaultTimeoutQueueCapacity) 121 if err != nil { 122 return nil, fmt.Errorf("could not initialize timeouts queue") 123 } 124 hub := &MessageHub{ 125 log: log.With().Str("engine", "message_hub").Logger(), 126 me: me, 127 engineMetrics: engineMetrics, 128 state: state, 129 payloads: payloads, 130 compliance: compliance, 131 hotstuff: hotstuff, 132 voteAggregator: voteAggregator, 133 timeoutAggregator: timeoutAggregator, 134 ownOutboundMessageNotifier: engine.NewNotifier(), 135 ownOutboundVotes: ownOutboundVotes, 136 ownOutboundProposals: ownOutboundProposals, 137 ownOutboundTimeouts: ownOutboundTimeouts, 138 } 139 140 // register with the network layer and store the conduit 141 hub.con, err = net.Register(channels.ConsensusCommittee, hub) 142 if err != nil { 143 return nil, fmt.Errorf("could not register core: %w", err) 144 } 145 146 // register with the network layer and store the conduit 147 hub.pushBlocksCon, err = net.Register(channels.PushBlocks, hub) 148 if err != nil { 149 return nil, fmt.Errorf("could not register engine: %w", err) 150 } 151 152 componentBuilder := component.NewComponentManagerBuilder() 153 // This implementation tolerates if the networking layer sometimes blocks on send requests. 154 // We use by default 5 go-routines here. This is fine, because outbound messages are temporally sparse 155 // under normal operations. Hence, the go-routines should mostly be asleep waiting for work. 156 for i := 0; i < defaultMessageHubRequestsWorkers; i++ { 157 componentBuilder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 158 ready() 159 hub.queuedMessagesProcessingLoop(ctx) 160 }) 161 } 162 hub.ComponentManager = componentBuilder.Build() 163 return hub, nil 164 } 165 166 // queuedMessagesProcessingLoop orchestrates dispatching of previously queued messages 167 func (h *MessageHub) queuedMessagesProcessingLoop(ctx irrecoverable.SignalerContext) { 168 notifier := h.ownOutboundMessageNotifier.Channel() 169 for { 170 select { 171 case <-ctx.Done(): 172 return 173 case <-notifier: 174 err := h.sendOwnMessages(ctx) 175 if err != nil { 176 ctx.Throw(fmt.Errorf("internal error processing queued messages: %w", err)) 177 return 178 } 179 } 180 } 181 } 182 183 // sendOwnMessages is a function which dispatches previously queued messages on worker thread 184 // This function is called whenever we have queued messages ready to be dispatched. 185 // No errors are expected during normal operations. 186 func (h *MessageHub) sendOwnMessages(ctx context.Context) error { 187 for { 188 select { 189 case <-ctx.Done(): 190 return nil 191 default: 192 } 193 194 msg, ok := h.ownOutboundProposals.Pop() 195 if ok { 196 block := msg.(*flow.Header) 197 err := h.sendOwnProposal(block) 198 if err != nil { 199 return fmt.Errorf("could not process queued block %v: %w", block.ID(), err) 200 } 201 continue 202 } 203 204 msg, ok = h.ownOutboundVotes.Pop() 205 if ok { 206 packed := msg.(*packedVote) 207 err := h.sendOwnVote(packed) 208 if err != nil { 209 return fmt.Errorf("could not process queued vote: %w", err) 210 } 211 continue 212 } 213 214 msg, ok = h.ownOutboundTimeouts.Pop() 215 if ok { 216 err := h.sendOwnTimeout(msg.(*model.TimeoutObject)) 217 if err != nil { 218 return fmt.Errorf("coult not process queued timeout: %w", err) 219 } 220 continue 221 } 222 223 // when there is no more messages in the queue, back to the loop to wait 224 // for the next incoming message to arrive. 225 return nil 226 } 227 } 228 229 // sendOwnTimeout propagates the timeout to the consensus committee (excluding myself) 230 // No errors are expected during normal operations. 231 func (h *MessageHub) sendOwnTimeout(timeout *model.TimeoutObject) error { 232 log := timeout.LogContext(h.log).Logger() 233 log.Info().Msg("processing timeout broadcast request from hotstuff") 234 235 // Retrieve all consensus nodes (excluding myself). 236 // CAUTION: We must include also nodes with weight zero, because otherwise 237 // TCs might not be constructed at epoch switchover. 238 recipients, err := h.state.Final().Identities(filter.And( 239 filter.Not(filter.Ejected), 240 filter.HasRole(flow.RoleConsensus), 241 filter.Not(filter.HasNodeID(h.me.NodeID())), 242 )) 243 if err != nil { 244 return fmt.Errorf("could not get consensus recipients for broadcasting timeout: %w", err) 245 } 246 247 // create the timeout message 248 msg := &messages.TimeoutObject{ 249 View: timeout.View, 250 NewestQC: timeout.NewestQC, 251 LastViewTC: timeout.LastViewTC, 252 SigData: timeout.SigData, 253 TimeoutTick: timeout.TimeoutTick, 254 } 255 err = h.con.Publish(msg, recipients.NodeIDs()...) 256 if err != nil { 257 if !errors.Is(err, network.EmptyTargetList) { 258 log.Err(err).Msg("could not broadcast timeout") 259 } 260 return nil 261 } 262 log.Info().Msg("consensus timeout was broadcast") 263 h.engineMetrics.MessageSent(metrics.EngineConsensusMessageHub, metrics.MessageTimeoutObject) 264 265 return nil 266 } 267 268 // sendOwnVote propagates the vote via unicast to another node that is the next leader 269 // No errors are expected during normal operations. 270 func (h *MessageHub) sendOwnVote(packed *packedVote) error { 271 log := h.log.With(). 272 Hex("block_id", packed.vote.BlockID[:]). 273 Uint64("block_view", packed.vote.View). 274 Hex("recipient_id", packed.recipientID[:]). 275 Logger() 276 log.Info().Msg("processing vote transmission request from hotstuff") 277 278 // send the vote the desired recipient 279 err := h.con.Unicast(packed.vote, packed.recipientID) 280 if err != nil { 281 log.Err(err).Msg("could not send vote") 282 return nil 283 } 284 h.engineMetrics.MessageSent(metrics.EngineConsensusMessageHub, metrics.MessageBlockVote) 285 log.Info().Msg("block vote transmitted") 286 287 return nil 288 } 289 290 // sendOwnProposal propagates the block proposal to the consensus committee and submits to non-consensus network: 291 // - broadcast to all other consensus participants (excluding myself) 292 // - broadcast to all non-consensus participants 293 // 294 // No errors are expected during normal operations. 295 func (h *MessageHub) sendOwnProposal(header *flow.Header) error { 296 // first, check that we are the proposer of the block 297 if header.ProposerID != h.me.NodeID() { 298 return fmt.Errorf("cannot broadcast proposal with non-local proposer (%x)", header.ProposerID) 299 } 300 301 // retrieve the payload for the block 302 payload, err := h.payloads.ByBlockID(header.ID()) 303 if err != nil { 304 return fmt.Errorf("could not retrieve payload for proposal: %w", err) 305 } 306 307 log := h.log.With(). 308 Str("chain_id", header.ChainID.String()). 309 Uint64("block_height", header.Height). 310 Uint64("block_view", header.View). 311 Hex("block_id", logging.Entity(header)). 312 Hex("parent_id", header.ParentID[:]). 313 Hex("payload_hash", header.PayloadHash[:]). 314 Int("guarantees_count", len(payload.Guarantees)). 315 Int("seals_count", len(payload.Seals)). 316 Int("receipts_count", len(payload.Receipts)). 317 Time("timestamp", header.Timestamp). 318 Hex("signers", header.ParentVoterIndices). 319 //Dur("delay", delay). 320 Logger() 321 322 log.Debug().Msg("processing proposal broadcast request from hotstuff") 323 324 // Retrieve all consensus nodes (excluding myself). 325 // CAUTION: We must include also nodes with weight zero, because otherwise 326 // new consensus nodes for the next epoch are left out. 327 // Note: retrieving the final state requires a time-intensive database read. 328 // Therefore, we execute this in a separate routine, because 329 // `OnOwnTimeout` is directly called by the consensus core logic. 330 allIdentities, err := h.state.AtBlockID(header.ParentID).Identities(filter.And( 331 filter.Not(filter.Ejected), 332 filter.Not(filter.HasNodeID(h.me.NodeID())), 333 )) 334 if err != nil { 335 return fmt.Errorf("could not get identities for broadcasting proposal: %w", err) 336 } 337 338 consRecipients := allIdentities.Filter(filter.HasRole(flow.RoleConsensus)) 339 340 // NOTE: some fields are not needed for the message 341 // - proposer ID is conveyed over the network message 342 // - the payload hash is deduced from the payload 343 proposal := messages.NewBlockProposal(&flow.Block{ 344 Header: header, 345 Payload: payload, 346 }) 347 348 // broadcast the proposal to consensus nodes 349 err = h.con.Publish(proposal, consRecipients.NodeIDs()...) 350 if err != nil { 351 if !errors.Is(err, network.EmptyTargetList) { 352 log.Err(err).Msg("could not send proposal message") 353 } 354 return nil 355 } 356 log.Info().Msg("block proposal was broadcast") 357 358 // submit proposal to non-consensus nodes 359 h.provideProposal(proposal, allIdentities.Filter(filter.Not(filter.HasRole(flow.RoleConsensus)))) 360 h.engineMetrics.MessageSent(metrics.EngineConsensusMessageHub, metrics.MessageBlockProposal) 361 362 return nil 363 } 364 365 // provideProposal is used when we want to broadcast a local block to the rest of the 366 // network (non-consensus nodes). 367 func (h *MessageHub) provideProposal(proposal *messages.BlockProposal, recipients flow.IdentityList) { 368 header := proposal.Block.Header 369 blockID := header.ID() 370 log := h.log.With(). 371 Uint64("block_view", header.View). 372 Hex("block_id", blockID[:]). 373 Hex("parent_id", header.ParentID[:]). 374 Logger() 375 log.Info().Msg("block proposal submitted for propagation") 376 377 // submit the block to the targets 378 err := h.pushBlocksCon.Publish(proposal, recipients.NodeIDs()...) 379 if err != nil { 380 h.log.Err(err).Msg("failed to broadcast block") 381 return 382 } 383 384 log.Info().Msg("block proposal propagated to non-consensus nodes") 385 } 386 387 // OnOwnVote propagates the vote to relevant recipient(s): 388 // - [common case] vote is queued and is sent via unicast to another node that is the next leader by worker 389 // - [special case] this node is the next leader: vote is directly forwarded to the node's internal `VoteAggregator` 390 func (h *MessageHub) OnOwnVote(blockID flow.Identifier, view uint64, sigData []byte, recipientID flow.Identifier) { 391 vote := &messages.BlockVote{ 392 BlockID: blockID, 393 View: view, 394 SigData: sigData, 395 } 396 397 // special case: I am the next leader 398 if recipientID == h.me.NodeID() { 399 h.forwardToOwnVoteAggregator(vote, h.me.NodeID()) // forward vote to my own `voteAggregator` 400 return 401 } 402 403 // common case: someone else is leader 404 packed := &packedVote{ 405 recipientID: recipientID, 406 vote: vote, 407 } 408 if ok := h.ownOutboundVotes.Push(packed); ok { 409 h.ownOutboundMessageNotifier.Notify() 410 } else { 411 h.engineMetrics.OutboundMessageDropped(metrics.EngineConsensusMessageHub, metrics.MessageBlockVote) 412 } 413 } 414 415 // OnOwnTimeout forwards timeout to node's internal `timeoutAggregator` and queues timeout for 416 // subsequent propagation to all consensus participants (excluding this node) 417 func (h *MessageHub) OnOwnTimeout(timeout *model.TimeoutObject) { 418 h.forwardToOwnTimeoutAggregator(timeout) // forward timeout to my own `timeoutAggregator` 419 if ok := h.ownOutboundTimeouts.Push(timeout); ok { 420 h.ownOutboundMessageNotifier.Notify() 421 } else { 422 h.engineMetrics.OutboundMessageDropped(metrics.EngineConsensusMessageHub, metrics.MessageTimeoutObject) 423 } 424 } 425 426 // OnOwnProposal directly forwards proposal to HotStuff core logic (skipping compliance engine as we assume our 427 // own proposals to be correct) and queues proposal for subsequent propagation to all consensus participants (including this node). 428 // The proposal will only be placed in the queue, after the specified delay (or dropped on shutdown signal). 429 func (h *MessageHub) OnOwnProposal(proposal *flow.Header, targetPublicationTime time.Time) { 430 go func() { 431 select { 432 case <-time.After(time.Until(targetPublicationTime)): 433 case <-h.ShutdownSignal(): 434 return 435 } 436 437 hotstuffProposal := model.ProposalFromFlow(proposal) 438 // notify vote aggregator that new block proposal is available, in case we are next leader 439 h.voteAggregator.AddBlock(hotstuffProposal) // non-blocking 440 441 // TODO(active-pacemaker): replace with pub/sub? 442 // submit proposal to our own processing pipeline 443 h.hotstuff.SubmitProposal(hotstuffProposal) // non-blocking 444 445 if ok := h.ownOutboundProposals.Push(proposal); ok { 446 h.ownOutboundMessageNotifier.Notify() 447 } else { 448 h.engineMetrics.OutboundMessageDropped(metrics.EngineConsensusMessageHub, metrics.MessageBlockProposal) 449 } 450 }() 451 } 452 453 // Process handles incoming messages from consensus channel. After matching message by type, sends it to the correct 454 // component for handling. 455 // No errors are expected during normal operations. 456 func (h *MessageHub) Process(channel channels.Channel, originID flow.Identifier, message interface{}) error { 457 switch msg := message.(type) { 458 case *messages.BlockProposal: 459 h.compliance.OnBlockProposal(flow.Slashable[*messages.BlockProposal]{ 460 OriginID: originID, 461 Message: msg, 462 }) 463 case *messages.BlockVote: 464 h.forwardToOwnVoteAggregator(msg, originID) 465 case *messages.TimeoutObject: 466 t := &model.TimeoutObject{ 467 View: msg.View, 468 NewestQC: msg.NewestQC, 469 LastViewTC: msg.LastViewTC, 470 SignerID: originID, 471 SigData: msg.SigData, 472 TimeoutTick: msg.TimeoutTick, 473 } 474 h.forwardToOwnTimeoutAggregator(t) 475 default: 476 h.log.Warn(). 477 Bool(logging.KeySuspicious, true). 478 Hex("origin_id", logging.ID(originID)). 479 Str("message_type", fmt.Sprintf("%T", message)). 480 Str("channel", channel.String()). 481 Msgf("delivered unsupported message type") 482 } 483 return nil 484 } 485 486 // forwardToOwnVoteAggregator converts vote to generic `model.Vote`, logs vote and forwards it to own `voteAggregator`. 487 // Per API convention, timeoutAggregator` is non-blocking, hence, this call returns quickly. 488 func (h *MessageHub) forwardToOwnVoteAggregator(vote *messages.BlockVote, originID flow.Identifier) { 489 h.engineMetrics.MessageReceived(metrics.EngineConsensusMessageHub, metrics.MessageBlockVote) 490 v := &model.Vote{ 491 View: vote.View, 492 BlockID: vote.BlockID, 493 SignerID: originID, 494 SigData: vote.SigData, 495 } 496 h.log.Info(). 497 Uint64("block_view", v.View). 498 Hex("block_id", v.BlockID[:]). 499 Hex("voter", v.SignerID[:]). 500 Str("vote_id", v.ID().String()). 501 Msg("block vote received, forwarding block vote to hotstuff vote aggregator") 502 h.voteAggregator.AddVote(v) 503 } 504 505 // forwardToOwnTimeoutAggregator logs timeout and forwards it to own `timeoutAggregator`. 506 // Per API convention, timeoutAggregator` is non-blocking, hence, this call returns quickly. 507 func (h *MessageHub) forwardToOwnTimeoutAggregator(t *model.TimeoutObject) { 508 h.engineMetrics.MessageReceived(metrics.EngineConsensusMessageHub, metrics.MessageTimeoutObject) 509 h.log.Info(). 510 Hex("origin_id", t.SignerID[:]). 511 Uint64("view", t.View). 512 Str("timeout_id", t.ID().String()). 513 Msg("timeout received, forwarding timeout to hotstuff timeout aggregator") 514 h.timeoutAggregator.AddTimeout(t) 515 }