github.com/koko1123/flow-go-1@v0.29.6/engine/consensus/compliance/engine.go (about) 1 package compliance 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "time" 8 9 "github.com/rs/zerolog" 10 11 "github.com/koko1123/flow-go-1/consensus/hotstuff/model" 12 "github.com/koko1123/flow-go-1/engine" 13 "github.com/koko1123/flow-go-1/engine/common/fifoqueue" 14 "github.com/koko1123/flow-go-1/engine/consensus/sealing/counters" 15 "github.com/koko1123/flow-go-1/model/events" 16 "github.com/koko1123/flow-go-1/model/flow" 17 "github.com/koko1123/flow-go-1/model/flow/filter" 18 "github.com/koko1123/flow-go-1/model/messages" 19 "github.com/koko1123/flow-go-1/module" 20 "github.com/koko1123/flow-go-1/module/irrecoverable" 21 "github.com/koko1123/flow-go-1/module/lifecycle" 22 "github.com/koko1123/flow-go-1/module/metrics" 23 "github.com/koko1123/flow-go-1/network" 24 "github.com/koko1123/flow-go-1/network/channels" 25 "github.com/koko1123/flow-go-1/state/protocol" 26 "github.com/koko1123/flow-go-1/storage" 27 "github.com/koko1123/flow-go-1/utils/logging" 28 ) 29 30 // defaultRangeResponseQueueCapacity maximum capacity of block range responses queue 31 const defaultRangeResponseQueueCapacity = 100 32 33 // defaultBlockQueueCapacity maximum capacity of block proposals queue 34 const defaultBlockQueueCapacity = 10000 35 36 // defaultVoteQueueCapacity maximum capacity of block votes queue 37 const defaultVoteQueueCapacity = 1000 38 39 // Engine is a wrapper struct for `Core` which implements consensus algorithm. 40 // Engine is responsible for handling incoming messages, queueing for processing, broadcasting proposals. 41 type Engine struct { 42 unit *engine.Unit 43 lm *lifecycle.LifecycleManager 44 log zerolog.Logger 45 mempool module.MempoolMetrics 46 metrics module.EngineMetrics 47 me module.Local 48 headers storage.Headers 49 payloads storage.Payloads 50 tracer module.Tracer 51 state protocol.State 52 prov network.Engine 53 core *Core 54 pendingBlocks engine.MessageStore 55 pendingRangeResponses engine.MessageStore 56 pendingVotes engine.MessageStore 57 messageHandler *engine.MessageHandler 58 finalizedView counters.StrictMonotonousCounter 59 finalizationEventsNotifier engine.Notifier 60 con network.Conduit 61 stopHotstuff context.CancelFunc 62 } 63 64 func NewEngine( 65 log zerolog.Logger, 66 net network.Network, 67 me module.Local, 68 prov network.Engine, 69 core *Core) (*Engine, error) { 70 71 rangeResponseQueue, err := fifoqueue.NewFifoQueue( 72 defaultRangeResponseQueueCapacity, 73 fifoqueue.WithLengthObserver(func(len int) { core.mempool.MempoolEntries(metrics.ResourceBlockResponseQueue, uint(len)) }), 74 ) 75 76 if err != nil { 77 return nil, fmt.Errorf("failed to create queue for block responses: %w", err) 78 } 79 80 pendingRangeResponses := &engine.FifoMessageStore{ 81 FifoQueue: rangeResponseQueue, 82 } 83 84 // FIFO queue for block proposals 85 blocksQueue, err := fifoqueue.NewFifoQueue( 86 defaultBlockQueueCapacity, 87 fifoqueue.WithLengthObserver(func(len int) { core.mempool.MempoolEntries(metrics.ResourceBlockProposalQueue, uint(len)) }), 88 ) 89 if err != nil { 90 return nil, fmt.Errorf("failed to create queue for inbound receipts: %w", err) 91 } 92 93 pendingBlocks := &engine.FifoMessageStore{ 94 FifoQueue: blocksQueue, 95 } 96 97 // FIFO queue for block votes 98 votesQueue, err := fifoqueue.NewFifoQueue( 99 defaultVoteQueueCapacity, 100 fifoqueue.WithLengthObserver(func(len int) { core.mempool.MempoolEntries(metrics.ResourceBlockVoteQueue, uint(len)) }), 101 ) 102 if err != nil { 103 return nil, fmt.Errorf("failed to create queue for inbound approvals: %w", err) 104 } 105 pendingVotes := &engine.FifoMessageStore{FifoQueue: votesQueue} 106 107 // define message queueing behaviour 108 handler := engine.NewMessageHandler( 109 log.With().Str("compliance", "engine").Logger(), 110 engine.NewNotifier(), 111 engine.Pattern{ 112 Match: func(msg *engine.Message) bool { 113 _, ok := msg.Payload.(*messages.BlockResponse) 114 if ok { 115 core.metrics.MessageReceived(metrics.EngineCompliance, metrics.MessageBlockResponse) 116 } 117 return ok 118 }, 119 Store: pendingRangeResponses, 120 }, 121 engine.Pattern{ 122 Match: func(msg *engine.Message) bool { 123 _, ok := msg.Payload.(*messages.BlockProposal) 124 if ok { 125 core.metrics.MessageReceived(metrics.EngineCompliance, metrics.MessageBlockProposal) 126 } 127 return ok 128 }, 129 Store: pendingBlocks, 130 }, 131 engine.Pattern{ 132 Match: func(msg *engine.Message) bool { 133 _, ok := msg.Payload.(*events.SyncedBlock) 134 if ok { 135 core.metrics.MessageReceived(metrics.EngineCompliance, metrics.MessageSyncedBlock) 136 } 137 return ok 138 }, 139 Map: func(msg *engine.Message) (*engine.Message, bool) { 140 syncedBlock := msg.Payload.(*events.SyncedBlock) 141 msg = &engine.Message{ 142 OriginID: msg.OriginID, 143 Payload: &messages.BlockProposal{ 144 Block: syncedBlock.Block, 145 }, 146 } 147 return msg, true 148 }, 149 Store: pendingBlocks, 150 }, 151 engine.Pattern{ 152 Match: func(msg *engine.Message) bool { 153 _, ok := msg.Payload.(*messages.BlockVote) 154 if ok { 155 core.metrics.MessageReceived(metrics.EngineCompliance, metrics.MessageBlockVote) 156 } 157 return ok 158 }, 159 Store: pendingVotes, 160 }, 161 ) 162 163 eng := &Engine{ 164 unit: engine.NewUnit(), 165 lm: lifecycle.NewLifecycleManager(), 166 log: log.With().Str("compliance", "engine").Logger(), 167 me: me, 168 mempool: core.mempool, 169 metrics: core.metrics, 170 headers: core.headers, 171 payloads: core.payloads, 172 pendingRangeResponses: pendingRangeResponses, 173 pendingBlocks: pendingBlocks, 174 pendingVotes: pendingVotes, 175 state: core.state, 176 tracer: core.tracer, 177 prov: prov, 178 core: core, 179 messageHandler: handler, 180 finalizationEventsNotifier: engine.NewNotifier(), 181 } 182 183 // register the core with the network layer and store the conduit 184 eng.con, err = net.Register(channels.ConsensusCommittee, eng) 185 if err != nil { 186 return nil, fmt.Errorf("could not register core: %w", err) 187 } 188 189 return eng, nil 190 } 191 192 // WithConsensus adds the consensus algorithm to the engine. This must be 193 // called before the engine can start. 194 func (e *Engine) WithConsensus(hot module.HotStuff) *Engine { 195 e.core.hotstuff = hot 196 return e 197 } 198 199 // Ready returns a ready channel that is closed once the engine has fully 200 // started. For consensus engine, this is true once the underlying consensus 201 // algorithm has started. 202 func (e *Engine) Ready() <-chan struct{} { 203 if e.core.hotstuff == nil { 204 panic("must initialize compliance engine with hotstuff engine") 205 } 206 e.lm.OnStart(func() { 207 e.unit.Launch(e.loop) 208 e.unit.Launch(e.finalizationProcessingLoop) 209 210 ctx, cancel := context.WithCancel(context.Background()) 211 signalerCtx, hotstuffErrChan := irrecoverable.WithSignaler(ctx) 212 e.stopHotstuff = cancel 213 214 // TODO: this workaround for handling fatal HotStuff errors is required only 215 // because this engine and epochmgr do not use the Component pattern yet 216 e.unit.Launch(func() { 217 e.handleHotStuffError(hotstuffErrChan) 218 }) 219 220 e.core.hotstuff.Start(signalerCtx) 221 // wait for request handler to startup 222 223 <-e.core.hotstuff.Ready() 224 }) 225 return e.lm.Started() 226 } 227 228 // Done returns a done channel that is closed once the engine has fully stopped. 229 // For the consensus engine, we wait for hotstuff to finish. 230 func (e *Engine) Done() <-chan struct{} { 231 e.lm.OnStop(func() { 232 e.log.Info().Msg("shutting down hotstuff eventloop") 233 e.stopHotstuff() 234 <-e.core.hotstuff.Done() 235 e.log.Info().Msg("all components have been shut down") 236 <-e.unit.Done() 237 }) 238 return e.lm.Stopped() 239 } 240 241 // SubmitLocal submits an event originating on the local node. 242 func (e *Engine) SubmitLocal(event interface{}) { 243 err := e.ProcessLocal(event) 244 if err != nil { 245 e.log.Fatal().Err(err).Msg("internal error processing event") 246 } 247 } 248 249 // Submit submits the given event from the node with the given origin ID 250 // for processing in a non-blocking manner. It returns instantly and logs 251 // a potential processing error internally when done. 252 func (e *Engine) Submit(channel channels.Channel, originID flow.Identifier, event interface{}) { 253 err := e.Process(channel, originID, event) 254 if err != nil { 255 e.log.Fatal().Err(err).Msg("internal error processing event") 256 } 257 } 258 259 // ProcessLocal processes an event originating on the local node. 260 func (e *Engine) ProcessLocal(event interface{}) error { 261 return e.messageHandler.Process(e.me.NodeID(), event) 262 } 263 264 // Process processes the given event from the node with the given origin ID in 265 // a blocking manner. It returns the potential processing error when done. 266 func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error { 267 err := e.messageHandler.Process(originID, event) 268 if err != nil { 269 if engine.IsIncompatibleInputTypeError(err) { 270 e.log.Warn().Msgf("%v delivered unsupported message %T through %v", originID, event, channel) 271 return nil 272 } 273 return fmt.Errorf("unexpected error while processing engine message: %w", err) 274 } 275 return nil 276 } 277 278 func (e *Engine) loop() { 279 for { 280 select { 281 case <-e.unit.Quit(): 282 return 283 case <-e.messageHandler.GetNotifier(): 284 err := e.processAvailableMessages() 285 if err != nil { 286 e.log.Fatal().Err(err).Msg("internal error processing queued message") 287 } 288 } 289 } 290 } 291 292 func (e *Engine) processAvailableMessages() error { 293 294 for { 295 // TODO prioritization 296 // eg: msg := engine.SelectNextMessage() 297 msg, ok := e.pendingRangeResponses.Get() 298 if ok { 299 blockResponse := msg.Payload.(*messages.BlockResponse) 300 for _, block := range blockResponse.Blocks { 301 // process each block and indicate it's from a range of blocks 302 err := e.core.OnBlockProposal(msg.OriginID, &messages.BlockProposal{ 303 Block: block, 304 }, true) 305 306 if err != nil { 307 return fmt.Errorf("could not handle block proposal: %w", err) 308 } 309 } 310 continue 311 } 312 313 msg, ok = e.pendingBlocks.Get() 314 if ok { 315 err := e.core.OnBlockProposal(msg.OriginID, msg.Payload.(*messages.BlockProposal), true) 316 if err != nil { 317 return fmt.Errorf("could not handle block proposal: %w", err) 318 } 319 continue 320 } 321 322 msg, ok = e.pendingVotes.Get() 323 if ok { 324 err := e.core.OnBlockVote(msg.OriginID, msg.Payload.(*messages.BlockVote)) 325 if err != nil { 326 return fmt.Errorf("could not handle block vote: %w", err) 327 } 328 continue 329 } 330 331 // when there is no more messages in the queue, back to the loop to wait 332 // for the next incoming message to arrive. 333 return nil 334 } 335 } 336 337 // SendVote will send a vote to the desired node. 338 func (e *Engine) SendVote(blockID flow.Identifier, view uint64, sigData []byte, recipientID flow.Identifier) error { 339 340 log := e.log.With(). 341 Hex("block_id", blockID[:]). 342 Uint64("block_view", view). 343 Hex("recipient_id", recipientID[:]). 344 Logger() 345 346 log.Info().Msg("processing vote transmission request from hotstuff") 347 348 // build the vote message 349 vote := &messages.BlockVote{ 350 BlockID: blockID, 351 View: view, 352 SigData: sigData, 353 } 354 355 // TODO: this is a hot-fix to mitigate the effects of the following Unicast call blocking occasionally 356 e.unit.Launch(func() { 357 // send the vote the desired recipient 358 err := e.con.Unicast(vote, recipientID) 359 if err != nil { 360 log.Warn().Err(err).Msg("could not send vote") 361 return 362 } 363 e.metrics.MessageSent(metrics.EngineCompliance, metrics.MessageBlockVote) 364 log.Info().Msg("block vote transmitted") 365 }) 366 367 return nil 368 } 369 370 // BroadcastProposalWithDelay will propagate a block proposal to all non-local consensus nodes. 371 // Note the header has incomplete fields, because it was converted from a hotstuff. 372 func (e *Engine) BroadcastProposalWithDelay(header *flow.Header, delay time.Duration) error { 373 374 // first, check that we are the proposer of the block 375 if header.ProposerID != e.me.NodeID() { 376 return fmt.Errorf("cannot broadcast proposal with non-local proposer (%x)", header.ProposerID) 377 } 378 379 // get the parent of the block 380 parent, err := e.headers.ByBlockID(header.ParentID) 381 if err != nil { 382 return fmt.Errorf("could not retrieve proposal parent: %w", err) 383 } 384 385 // fill in the fields that can't be populated by HotStuff 386 header.ChainID = parent.ChainID 387 header.Height = parent.Height + 1 388 389 // retrieve the payload for the block 390 payload, err := e.payloads.ByBlockID(header.ID()) 391 if err != nil { 392 return fmt.Errorf("could not retrieve payload for proposal: %w", err) 393 } 394 395 log := e.log.With(). 396 Str("chain_id", header.ChainID.String()). 397 Uint64("block_height", header.Height). 398 Uint64("block_view", header.View). 399 Hex("block_id", logging.Entity(header)). 400 Hex("parent_id", header.ParentID[:]). 401 Hex("payload_hash", header.PayloadHash[:]). 402 Int("gaurantees_count", len(payload.Guarantees)). 403 Int("seals_count", len(payload.Seals)). 404 Int("receipts_count", len(payload.Receipts)). 405 Time("timestamp", header.Timestamp). 406 Hex("signers", header.ParentVoterIndices). 407 Dur("delay", delay). 408 Logger() 409 410 log.Debug().Msg("processing proposal broadcast request from hotstuff") 411 412 // retrieve all consensus nodes without our ID 413 recipients, err := e.state.AtBlockID(header.ParentID).Identities(filter.And( 414 filter.HasRole(flow.RoleConsensus), 415 filter.Not(filter.HasNodeID(e.me.NodeID())), 416 )) 417 if err != nil { 418 return fmt.Errorf("could not get consensus recipients: %w", err) 419 } 420 421 e.unit.LaunchAfter(delay, func() { 422 423 go e.core.hotstuff.SubmitProposal(header, parent.View) 424 425 // NOTE: some fields are not needed for the message 426 // - proposer ID is conveyed over the network message 427 // - the payload hash is deduced from the payload 428 block := &flow.Block{ 429 Header: header, 430 Payload: payload, 431 } 432 proposal := messages.NewBlockProposal(block) 433 434 // broadcast the proposal to consensus nodes 435 err = e.con.Publish(proposal, recipients.NodeIDs()...) 436 if errors.Is(err, network.EmptyTargetList) { 437 return 438 } 439 if err != nil { 440 log.Error().Err(err).Msg("could not send proposal message") 441 } 442 443 e.metrics.MessageSent(metrics.EngineCompliance, metrics.MessageBlockProposal) 444 445 log.Info().Msg("block proposal broadcasted") 446 447 // submit the proposal to the provider engine to forward it to other 448 // node roles 449 e.prov.SubmitLocal(proposal) 450 }) 451 452 return nil 453 } 454 455 // BroadcastProposal will propagate a block proposal to all non-local consensus nodes. 456 // Note the header has incomplete fields, because it was converted from a hotstuff. 457 func (e *Engine) BroadcastProposal(header *flow.Header) error { 458 return e.BroadcastProposalWithDelay(header, 0) 459 } 460 461 // OnFinalizedBlock implements the `OnFinalizedBlock` callback from the `hotstuff.FinalizationConsumer` 462 // (1) Informs sealing.Core about finalization of respective block. 463 // 464 // CAUTION: the input to this callback is treated as trusted; precautions should be taken that messages 465 // from external nodes cannot be considered as inputs to this function 466 func (e *Engine) OnFinalizedBlock(block *model.Block) { 467 if e.finalizedView.Set(block.View) { 468 e.finalizationEventsNotifier.Notify() 469 } 470 } 471 472 // finalizationProcessingLoop is a separate goroutine that performs processing of finalization events 473 func (e *Engine) finalizationProcessingLoop() { 474 finalizationNotifier := e.finalizationEventsNotifier.Channel() 475 for { 476 select { 477 case <-e.unit.Quit(): 478 return 479 case <-finalizationNotifier: 480 e.core.ProcessFinalizedView(e.finalizedView.Value()) 481 } 482 } 483 } 484 485 // handleHotStuffError accepts the error channel from the HotStuff component and 486 // crashes the node if any error is detected. 487 // 488 // TODO: this function should be removed in favour of refactoring this engine and 489 // the epochmgr engine to use the Component pattern, so that irrecoverable errors 490 // can be bubbled all the way to the node scaffold 491 func (e *Engine) handleHotStuffError(hotstuffErrs <-chan error) { 492 for { 493 select { 494 case <-e.unit.Quit(): 495 return 496 case err := <-hotstuffErrs: 497 if err != nil { 498 e.log.Fatal().Err(err).Msg("encountered fatal error in HotStuff") 499 } 500 } 501 } 502 }