github.com/onflow/flow-go@v0.33.17/engine/collection/message_hub/message_hub.go (about) 1 package message_hub 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "time" 8 9 "github.com/rs/zerolog" 10 11 "github.com/onflow/flow-go/consensus/hotstuff" 12 "github.com/onflow/flow-go/consensus/hotstuff/model" 13 "github.com/onflow/flow-go/consensus/hotstuff/notifications" 14 "github.com/onflow/flow-go/engine" 15 "github.com/onflow/flow-go/engine/collection" 16 "github.com/onflow/flow-go/engine/common/fifoqueue" 17 "github.com/onflow/flow-go/model/cluster" 18 "github.com/onflow/flow-go/model/flow" 19 "github.com/onflow/flow-go/model/flow/filter" 20 "github.com/onflow/flow-go/model/messages" 21 "github.com/onflow/flow-go/module" 22 "github.com/onflow/flow-go/module/component" 23 "github.com/onflow/flow-go/module/irrecoverable" 24 "github.com/onflow/flow-go/module/metrics" 25 "github.com/onflow/flow-go/network" 26 "github.com/onflow/flow-go/network/channels" 27 clusterkv "github.com/onflow/flow-go/state/cluster" 28 "github.com/onflow/flow-go/state/protocol" 29 "github.com/onflow/flow-go/storage" 30 "github.com/onflow/flow-go/utils/logging" 31 ) 32 33 // defaultMessageHubRequestsWorkers number of workers to dispatch events for requests 34 const defaultMessageHubRequestsWorkers = 5 35 36 // defaultProposalQueueCapacity number of pending outgoing proposals stored in queue 37 const defaultProposalQueueCapacity = 3 38 39 // defaultVoteQueueCapacity number of pending outgoing votes stored in queue 40 const defaultVoteQueueCapacity = 20 41 42 // defaultTimeoutQueueCapacity number of pending outgoing timeouts stored in queue 43 const defaultTimeoutQueueCapacity = 3 44 45 // packedVote is a helper structure to pack recipientID and vote into one structure to pass through fifoqueue.FifoQueue 46 type packedVote struct { 47 recipientID flow.Identifier 48 vote *messages.ClusterBlockVote 49 } 50 51 // MessageHub is a central module for handling incoming and outgoing messages via cluster consensus channel. 52 // It performs message routing for incoming messages by matching them by type and sending to respective engine. 53 // For incoming messages handling processing looks like this: 54 // 55 // +-------------------+ +------------+ 56 // -->| Cluster-Channel |----->| MessageHub | 57 // +-------------------+ +------+-----+ 58 // ------------|------------ 59 // +------+---------+ | +------+-----+ | +------+------------+ 60 // | VoteAggregator |----+ | Compliance | +----| TimeoutAggregator | 61 // +----------------+ +------------+ +------+------------+ 62 // vote block timeout object 63 // 64 // MessageHub acts as communicator and handles hotstuff.Consumer communication events to send votes, broadcast timeouts 65 // and proposals. It is responsible for communication between cluster consensus participants. 66 // It implements hotstuff.Consumer interface and needs to be subscribed for notifications via pub/sub. 67 // All communicator events are handled on worker thread to prevent sender from blocking. 68 // For outgoing messages processing logic looks like this: 69 // 70 // +-------------------+ +------------+ +----------+ +------------------------+ 71 // | Cluster-Channel |<-----| MessageHub |<-----| Consumer |<-----| Hotstuff | 72 // +-------------------+ +------+-----+ +----------+ +------------------------+ 73 // pub/sub vote, timeout, proposal 74 // 75 // MessageHub is safe to use in concurrent environment. 76 type MessageHub struct { 77 *component.ComponentManager 78 notifications.NoopConsumer 79 log zerolog.Logger 80 me module.Local 81 engineMetrics module.EngineMetrics 82 state protocol.State 83 payloads storage.ClusterPayloads 84 con network.Conduit 85 ownOutboundMessageNotifier engine.Notifier 86 ownOutboundVotes *fifoqueue.FifoQueue // queue for handling outgoing vote transmissions 87 ownOutboundProposals *fifoqueue.FifoQueue // queue for handling outgoing proposal transmissions 88 ownOutboundTimeouts *fifoqueue.FifoQueue // queue for handling outgoing timeout transmissions 89 clusterIdentityFilter flow.IdentityFilter 90 91 // injected dependencies 92 compliance collection.Compliance // handler of incoming block proposals 93 hotstuff module.HotStuff // used to submit proposals that were previously broadcast 94 voteAggregator hotstuff.VoteAggregator // handler of incoming votes 95 timeoutAggregator hotstuff.TimeoutAggregator // handler of incoming timeouts 96 } 97 98 var _ network.MessageProcessor = (*MessageHub)(nil) 99 var _ hotstuff.CommunicatorConsumer = (*MessageHub)(nil) 100 101 // NewMessageHub constructs new instance of message hub 102 // No errors are expected during normal operations. 103 func NewMessageHub(log zerolog.Logger, 104 engineMetrics module.EngineMetrics, 105 net network.EngineRegistry, 106 me module.Local, 107 compliance collection.Compliance, 108 hotstuff module.HotStuff, 109 voteAggregator hotstuff.VoteAggregator, 110 timeoutAggregator hotstuff.TimeoutAggregator, 111 state protocol.State, 112 clusterState clusterkv.State, 113 payloads storage.ClusterPayloads, 114 ) (*MessageHub, error) { 115 // find my cluster for the current epoch 116 // TODO this should flow from cluster state as source of truth 117 clusters, err := state.Final().Epochs().Current().Clustering() 118 if err != nil { 119 return nil, fmt.Errorf("could not get clusters: %w", err) 120 } 121 currentCluster, _, found := clusters.ByNodeID(me.NodeID()) 122 if !found { 123 return nil, fmt.Errorf("could not find cluster for self") 124 } 125 126 ownOutboundVotes, err := fifoqueue.NewFifoQueue(defaultVoteQueueCapacity) 127 if err != nil { 128 return nil, fmt.Errorf("could not initialize votes queue") 129 } 130 ownOutboundProposals, err := fifoqueue.NewFifoQueue(defaultProposalQueueCapacity) 131 if err != nil { 132 return nil, fmt.Errorf("could not initialize blocks queue") 133 } 134 ownOutboundTimeouts, err := fifoqueue.NewFifoQueue(defaultTimeoutQueueCapacity) 135 if err != nil { 136 return nil, fmt.Errorf("could not initialize timeouts queue") 137 } 138 hub := &MessageHub{ 139 log: log.With().Str("engine", "cluster_message_hub").Logger(), 140 me: me, 141 engineMetrics: engineMetrics, 142 state: state, 143 payloads: payloads, 144 compliance: compliance, 145 hotstuff: hotstuff, 146 voteAggregator: voteAggregator, 147 timeoutAggregator: timeoutAggregator, 148 ownOutboundMessageNotifier: engine.NewNotifier(), 149 ownOutboundVotes: ownOutboundVotes, 150 ownOutboundProposals: ownOutboundProposals, 151 ownOutboundTimeouts: ownOutboundTimeouts, 152 clusterIdentityFilter: filter.And( 153 filter.In(currentCluster), 154 filter.Not(filter.HasNodeID(me.NodeID())), 155 ), 156 } 157 158 // register network conduit 159 chainID, err := clusterState.Params().ChainID() 160 if err != nil { 161 return nil, fmt.Errorf("could not get chain ID: %w", err) 162 } 163 conduit, err := net.Register(channels.ConsensusCluster(chainID), hub) 164 if err != nil { 165 return nil, fmt.Errorf("could not register engine: %w", err) 166 } 167 hub.con = conduit 168 169 componentBuilder := component.NewComponentManagerBuilder() 170 // This implementation tolerates if the networking layer sometimes blocks on send requests. 171 // We use by default 5 go-routines here. This is fine, because outbound messages are temporally sparse 172 // under normal operations. Hence, the go-routines should mostly be asleep waiting for work. 173 for i := 0; i < defaultMessageHubRequestsWorkers; i++ { 174 componentBuilder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 175 ready() 176 hub.queuedMessagesProcessingLoop(ctx) 177 }) 178 } 179 hub.ComponentManager = componentBuilder.Build() 180 return hub, nil 181 } 182 183 // queuedMessagesProcessingLoop orchestrates dispatching of previously queued messages 184 func (h *MessageHub) queuedMessagesProcessingLoop(ctx irrecoverable.SignalerContext) { 185 notifier := h.ownOutboundMessageNotifier.Channel() 186 for { 187 select { 188 case <-ctx.Done(): 189 return 190 case <-notifier: 191 err := h.sendOwnMessages(ctx) 192 if err != nil { 193 ctx.Throw(fmt.Errorf("internal error processing queued messages: %w", err)) 194 return 195 } 196 } 197 } 198 } 199 200 // sendOwnMessages is a function which dispatches previously queued messages on worker thread 201 // This function is called whenever we have queued messages ready to be dispatched. 202 // No errors are expected during normal operations. 203 func (h *MessageHub) sendOwnMessages(ctx context.Context) error { 204 for { 205 select { 206 case <-ctx.Done(): 207 return nil 208 default: 209 } 210 211 msg, ok := h.ownOutboundProposals.Pop() 212 if ok { 213 block := msg.(*flow.Header) 214 err := h.sendOwnProposal(block) 215 if err != nil { 216 return fmt.Errorf("could not process queued block %v: %w", block.ID(), err) 217 } 218 continue 219 } 220 221 msg, ok = h.ownOutboundVotes.Pop() 222 if ok { 223 packed := msg.(*packedVote) 224 err := h.sendOwnVote(packed) 225 if err != nil { 226 return fmt.Errorf("could not process queued vote: %w", err) 227 } 228 continue 229 } 230 231 msg, ok = h.ownOutboundTimeouts.Pop() 232 if ok { 233 err := h.sendOwnTimeout(msg.(*model.TimeoutObject)) 234 if err != nil { 235 return fmt.Errorf("coult not process queued timeout: %w", err) 236 } 237 continue 238 } 239 240 // when there is no more messages in the queue, back to the loop to wait 241 // for the next incoming message to arrive. 242 return nil 243 } 244 } 245 246 // sendOwnTimeout propagates the timeout to the consensus committee (excluding myself) 247 // No errors are expected during normal operations. 248 func (h *MessageHub) sendOwnTimeout(timeout *model.TimeoutObject) error { 249 log := timeout.LogContext(h.log).Logger() 250 log.Info().Msg("processing timeout broadcast request from hotstuff") 251 252 // Retrieve all collection nodes in our cluster (excluding myself). 253 recipients, err := h.state.Final().Identities(h.clusterIdentityFilter) 254 if err != nil { 255 return fmt.Errorf("could not get cluster members for broadcasting timeout: %w", err) 256 } 257 // create the timeout message 258 msg := &messages.ClusterTimeoutObject{ 259 View: timeout.View, 260 NewestQC: timeout.NewestQC, 261 LastViewTC: timeout.LastViewTC, 262 SigData: timeout.SigData, 263 TimeoutTick: timeout.TimeoutTick, 264 } 265 266 err = h.con.Publish(msg, recipients.NodeIDs()...) 267 if err != nil { 268 if !errors.Is(err, network.EmptyTargetList) { 269 log.Err(err).Msg("could not broadcast timeout") 270 } 271 return nil 272 } 273 log.Info().Msg("cluster timeout was broadcast") 274 h.engineMetrics.MessageSent(metrics.EngineCollectionMessageHub, metrics.MessageTimeoutObject) 275 276 return nil 277 } 278 279 // sendOwnVote propagates the vote via unicast to another node that is the next leader 280 // No errors are expected during normal operations. 281 func (h *MessageHub) sendOwnVote(packed *packedVote) error { 282 log := h.log.With(). 283 Hex("collection_id", packed.vote.BlockID[:]). 284 Uint64("collection_view", packed.vote.View). 285 Hex("recipient_id", packed.recipientID[:]). 286 Logger() 287 log.Info().Msg("processing vote transmission request from hotstuff") 288 289 // send the vote the desired recipient 290 err := h.con.Unicast(packed.vote, packed.recipientID) 291 if err != nil { 292 log.Err(err).Msg("could not send vote") 293 return nil 294 } 295 log.Info().Msg("collection vote transmitted") 296 h.engineMetrics.MessageSent(metrics.EngineCollectionMessageHub, metrics.MessageBlockVote) 297 298 return nil 299 } 300 301 // sendOwnProposal propagates the block proposal to the consensus committee by broadcasting to all other cluster participants (excluding myself) 302 // No errors are expected during normal operations. 303 func (h *MessageHub) sendOwnProposal(header *flow.Header) error { 304 // first, check that we are the proposer of the block 305 if header.ProposerID != h.me.NodeID() { 306 return fmt.Errorf("cannot broadcast proposal with non-local proposer (%x)", header.ProposerID) 307 } 308 309 // retrieve the payload for the block 310 payload, err := h.payloads.ByBlockID(header.ID()) 311 if err != nil { 312 return fmt.Errorf("could not retrieve payload for proposal: %w", err) 313 } 314 315 log := h.log.With(). 316 Str("chain_id", header.ChainID.String()). 317 Uint64("block_height", header.Height). 318 Uint64("block_view", header.View). 319 Hex("block_id", logging.ID(header.ID())). 320 Hex("parent_id", header.ParentID[:]). 321 Hex("ref_block", payload.ReferenceBlockID[:]). 322 Int("transaction_count", payload.Collection.Len()). 323 Hex("parent_signer_indices", header.ParentVoterIndices). 324 Logger() 325 326 log.Debug().Msg("processing cluster broadcast request from hotstuff") 327 328 // retrieve all collection nodes in our cluster 329 recipients, err := h.state.Final().Identities(h.clusterIdentityFilter) 330 if err != nil { 331 return fmt.Errorf("could not get cluster members for broadcasting collection proposal") 332 } 333 334 // create the proposal message for the collection 335 proposal := messages.NewClusterBlockProposal(&cluster.Block{ 336 Header: header, 337 Payload: payload, 338 }) 339 340 // broadcast the proposal to consensus nodes 341 err = h.con.Publish(proposal, recipients.NodeIDs()...) 342 if err != nil { 343 if !errors.Is(err, network.EmptyTargetList) { 344 log.Err(err).Msg("could not send proposal message") 345 } 346 return nil 347 } 348 log.Info().Msg("cluster proposal was broadcast") 349 h.engineMetrics.MessageSent(metrics.EngineCollectionMessageHub, metrics.MessageBlockProposal) 350 351 return nil 352 } 353 354 // OnOwnVote propagates the vote to relevant recipient(s): 355 // - [common case] vote is queued and is sent via unicast to another node that is the next leader by worker 356 // - [special case] this node is the next leader: vote is directly forwarded to the node's internal `VoteAggregator` 357 func (h *MessageHub) OnOwnVote(blockID flow.Identifier, view uint64, sigData []byte, recipientID flow.Identifier) { 358 vote := &messages.ClusterBlockVote{ 359 BlockID: blockID, 360 View: view, 361 SigData: sigData, 362 } 363 364 // special case: I am the next leader 365 if recipientID == h.me.NodeID() { 366 h.forwardToOwnVoteAggregator(vote, h.me.NodeID()) // forward vote to my own `voteAggregator` 367 return 368 } 369 370 // common case: someone else is leader 371 packed := &packedVote{ 372 recipientID: recipientID, 373 vote: vote, 374 } 375 if ok := h.ownOutboundVotes.Push(packed); ok { 376 h.ownOutboundMessageNotifier.Notify() 377 } else { 378 h.engineMetrics.OutboundMessageDropped(metrics.EngineCollectionMessageHub, metrics.MessageBlockVote) 379 } 380 } 381 382 // OnOwnTimeout forwards timeout to node's internal `timeoutAggregator` and queues timeout for 383 // subsequent propagation to all consensus participants (excluding this node) 384 func (h *MessageHub) OnOwnTimeout(timeout *model.TimeoutObject) { 385 h.forwardToOwnTimeoutAggregator(timeout) // forward timeout to my own `timeoutAggregator` 386 if ok := h.ownOutboundTimeouts.Push(timeout); ok { 387 h.ownOutboundMessageNotifier.Notify() 388 } else { 389 h.engineMetrics.OutboundMessageDropped(metrics.EngineCollectionMessageHub, metrics.MessageTimeoutObject) 390 } 391 } 392 393 // OnOwnProposal directly forwards proposal to HotStuff core logic(skipping compliance engine as we assume our 394 // own proposals to be correct) and queues proposal for subsequent propagation to all consensus participants (including this node). 395 // The proposal will only be placed in the queue, after the specified delay (or dropped on shutdown signal). 396 func (h *MessageHub) OnOwnProposal(proposal *flow.Header, targetPublicationTime time.Time) { 397 go func() { 398 select { 399 case <-time.After(time.Until(targetPublicationTime)): 400 case <-h.ShutdownSignal(): 401 return 402 } 403 404 hotstuffProposal := model.ProposalFromFlow(proposal) 405 // notify vote aggregator that new block proposal is available, in case we are next leader 406 h.voteAggregator.AddBlock(hotstuffProposal) // non-blocking 407 408 // TODO(active-pacemaker): replace with pub/sub? 409 // submit proposal to our own processing pipeline 410 h.hotstuff.SubmitProposal(hotstuffProposal) // non-blocking 411 412 if ok := h.ownOutboundProposals.Push(proposal); ok { 413 h.ownOutboundMessageNotifier.Notify() 414 } else { 415 h.engineMetrics.OutboundMessageDropped(metrics.EngineCollectionMessageHub, metrics.MessageBlockProposal) 416 } 417 }() 418 } 419 420 // Process handles incoming messages from consensus channel. After matching message by type, sends it to the correct 421 // component for handling. 422 // No errors are expected during normal operations. 423 func (h *MessageHub) Process(channel channels.Channel, originID flow.Identifier, message interface{}) error { 424 switch msg := message.(type) { 425 case *messages.ClusterBlockProposal: 426 h.compliance.OnClusterBlockProposal(flow.Slashable[*messages.ClusterBlockProposal]{ 427 OriginID: originID, 428 Message: msg, 429 }) 430 case *messages.ClusterBlockVote: 431 h.forwardToOwnVoteAggregator(msg, originID) 432 case *messages.ClusterTimeoutObject: 433 t := &model.TimeoutObject{ 434 View: msg.View, 435 NewestQC: msg.NewestQC, 436 LastViewTC: msg.LastViewTC, 437 SignerID: originID, 438 SigData: msg.SigData, 439 TimeoutTick: msg.TimeoutTick, 440 } 441 h.forwardToOwnTimeoutAggregator(t) 442 default: 443 h.log.Warn(). 444 Bool(logging.KeySuspicious, true). 445 Hex("origin_id", logging.ID(originID)). 446 Str("message_type", fmt.Sprintf("%T", message)). 447 Str("channel", channel.String()). 448 Msgf("delivered unsupported message type") 449 } 450 return nil 451 } 452 453 // forwardToOwnVoteAggregator converts vote to generic `model.Vote`, logs vote and forwards it to own `voteAggregator`. 454 // Per API convention, timeoutAggregator` is non-blocking, hence, this call returns quickly. 455 func (h *MessageHub) forwardToOwnVoteAggregator(vote *messages.ClusterBlockVote, originID flow.Identifier) { 456 h.engineMetrics.MessageReceived(metrics.EngineCollectionMessageHub, metrics.MessageBlockVote) 457 v := &model.Vote{ 458 View: vote.View, 459 BlockID: vote.BlockID, 460 SignerID: originID, 461 SigData: vote.SigData, 462 } 463 h.log.Info(). 464 Uint64("block_view", v.View). 465 Hex("block_id", v.BlockID[:]). 466 Hex("voter", v.SignerID[:]). 467 Str("vote_id", v.ID().String()). 468 Msg("block vote received, forwarding block vote to hotstuff vote aggregator") 469 h.voteAggregator.AddVote(v) 470 } 471 472 // forwardToOwnTimeoutAggregator logs timeout and forwards it to own `timeoutAggregator`. 473 // Per API convention, timeoutAggregator` is non-blocking, hence, this call returns quickly. 474 func (h *MessageHub) forwardToOwnTimeoutAggregator(t *model.TimeoutObject) { 475 h.engineMetrics.MessageReceived(metrics.EngineCollectionMessageHub, metrics.MessageTimeoutObject) 476 h.log.Info(). 477 Hex("origin_id", t.SignerID[:]). 478 Uint64("view", t.View). 479 Str("timeout_id", t.ID().String()). 480 Msg("timeout received, forwarding timeout to hotstuff timeout aggregator") 481 h.timeoutAggregator.AddTimeout(t) 482 }