github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/collection/compliance/core.go (about) 1 package compliance 2 3 import ( 4 "errors" 5 "fmt" 6 "time" 7 8 "github.com/rs/zerolog" 9 10 "github.com/onflow/flow-go/consensus/hotstuff" 11 "github.com/onflow/flow-go/consensus/hotstuff/model" 12 "github.com/onflow/flow-go/engine" 13 "github.com/onflow/flow-go/model/cluster" 14 "github.com/onflow/flow-go/model/flow" 15 "github.com/onflow/flow-go/model/messages" 16 "github.com/onflow/flow-go/module" 17 "github.com/onflow/flow-go/module/compliance" 18 "github.com/onflow/flow-go/module/counters" 19 "github.com/onflow/flow-go/module/irrecoverable" 20 "github.com/onflow/flow-go/module/mempool" 21 "github.com/onflow/flow-go/module/metrics" 22 "github.com/onflow/flow-go/state" 23 clusterkv "github.com/onflow/flow-go/state/cluster" 24 "github.com/onflow/flow-go/storage" 25 "github.com/onflow/flow-go/utils/logging" 26 ) 27 28 // Core contains the central business logic for the collector clusters' compliance engine. 29 // It is responsible for handling communication for the embedded consensus algorithm. 30 // CAUTION with CONCURRENCY: 31 // - At the moment, compliance.Core _can not_ process blocks concurrently. Callers of `OnBlockProposal` 32 // need to ensure single-threaded access. 33 // - The only exception is calls to `ProcessFinalizedView`, which is the only concurrency-safe 34 // method of compliance.Core 35 type Core struct { 36 log zerolog.Logger // used to log relevant actions with context 37 config compliance.Config 38 engineMetrics module.EngineMetrics 39 mempoolMetrics module.MempoolMetrics 40 hotstuffMetrics module.HotstuffMetrics 41 collectionMetrics module.CollectionMetrics 42 proposalViolationNotifier hotstuff.ProposalViolationConsumer 43 headers storage.Headers 44 state clusterkv.MutableState 45 // track latest finalized view/height - used to efficiently drop outdated or too-far-ahead blocks 46 finalizedView counters.StrictMonotonousCounter 47 finalizedHeight counters.StrictMonotonousCounter 48 pending module.PendingClusterBlockBuffer // pending block cache 49 sync module.BlockRequester 50 hotstuff module.HotStuff 51 validator hotstuff.Validator 52 voteAggregator hotstuff.VoteAggregator 53 timeoutAggregator hotstuff.TimeoutAggregator 54 } 55 56 // NewCore instantiates the business logic for the collector clusters' compliance engine. 57 func NewCore( 58 log zerolog.Logger, 59 collector module.EngineMetrics, 60 mempool module.MempoolMetrics, 61 hotstuffMetrics module.HotstuffMetrics, 62 collectionMetrics module.CollectionMetrics, 63 proposalViolationNotifier hotstuff.ProposalViolationConsumer, 64 headers storage.Headers, 65 state clusterkv.MutableState, 66 pending module.PendingClusterBlockBuffer, 67 sync module.BlockRequester, 68 validator hotstuff.Validator, 69 hotstuff module.HotStuff, 70 voteAggregator hotstuff.VoteAggregator, 71 timeoutAggregator hotstuff.TimeoutAggregator, 72 config compliance.Config, 73 ) (*Core, error) { 74 c := &Core{ 75 log: log.With().Str("cluster_compliance", "core").Logger(), 76 config: config, 77 engineMetrics: collector, 78 mempoolMetrics: mempool, 79 hotstuffMetrics: hotstuffMetrics, 80 collectionMetrics: collectionMetrics, 81 proposalViolationNotifier: proposalViolationNotifier, 82 headers: headers, 83 state: state, 84 pending: pending, 85 sync: sync, 86 hotstuff: hotstuff, 87 validator: validator, 88 voteAggregator: voteAggregator, 89 timeoutAggregator: timeoutAggregator, 90 } 91 92 // initialize finalized boundary cache 93 final, err := c.state.Final().Head() 94 if err != nil { 95 return nil, fmt.Errorf("could not initialized finalized boundary cache: %w", err) 96 } 97 c.ProcessFinalizedBlock(final) 98 99 // log the mempool size off the bat 100 c.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, c.pending.Size()) 101 102 return c, nil 103 } 104 105 // OnBlockProposal handles incoming block proposals. 106 // No errors are expected during normal operation. 107 func (c *Core) OnBlockProposal(proposal flow.Slashable[*messages.ClusterBlockProposal]) error { 108 startTime := time.Now() 109 defer func() { 110 c.hotstuffMetrics.BlockProcessingDuration(time.Since(startTime)) 111 }() 112 113 block := flow.Slashable[*cluster.Block]{ 114 OriginID: proposal.OriginID, 115 Message: proposal.Message.Block.ToInternal(), 116 } 117 header := block.Message.Header 118 blockID := header.ID() 119 finalHeight := c.finalizedHeight.Value() 120 finalView := c.finalizedView.Value() 121 122 log := c.log.With(). 123 Hex("origin_id", proposal.OriginID[:]). 124 Str("chain_id", header.ChainID.String()). 125 Uint64("block_height", header.Height). 126 Uint64("block_view", header.View). 127 Hex("block_id", blockID[:]). 128 Hex("parent_id", header.ParentID[:]). 129 Hex("ref_block_id", block.Message.Payload.ReferenceBlockID[:]). 130 Hex("collection_id", logging.Entity(block.Message.Payload.Collection)). 131 Int("tx_count", block.Message.Payload.Collection.Len()). 132 Time("timestamp", header.Timestamp). 133 Hex("proposer", header.ProposerID[:]). 134 Hex("parent_signer_indices", header.ParentVoterIndices). 135 Uint64("finalized_height", finalHeight). 136 Uint64("finalized_view", finalView). 137 Logger() 138 if log.Debug().Enabled() { 139 log = log.With().Strs("tx_ids", 140 flow.IdentifierList(block.Message.Payload.Collection.Light().Transactions).Strings()).Logger() 141 } 142 log.Info().Msg("block proposal received") 143 144 // drop proposals below the finalized threshold 145 if header.Height <= finalHeight || header.View <= finalView { 146 log.Debug().Msg("dropping block below finalized boundary") 147 return nil 148 } 149 150 skipNewProposalsThreshold := c.config.GetSkipNewProposalsThreshold() 151 // ignore proposals which are too far ahead of our local finalized state 152 // instead, rely on sync engine to catch up finalization more effectively, and avoid 153 // large subtree of blocks to be cached. 154 if header.View > finalView+skipNewProposalsThreshold { 155 log.Debug(). 156 Uint64("skip_new_proposals_threshold", skipNewProposalsThreshold). 157 Msg("dropping block too far ahead of locally finalized view") 158 return nil 159 } 160 161 // first, we reject all blocks that we don't need to process: 162 // 1) blocks already in the cache; they will already be processed later 163 // 2) blocks already on disk; they were processed and await finalization 164 165 // ignore proposals that are already cached 166 _, cached := c.pending.ByID(blockID) 167 if cached { 168 log.Debug().Msg("skipping already cached proposal") 169 return nil 170 } 171 172 // ignore proposals that were already processed 173 _, err := c.headers.ByBlockID(blockID) 174 if err == nil { 175 log.Debug().Msg("skipping already processed proposal") 176 return nil 177 } 178 if !errors.Is(err, storage.ErrNotFound) { 179 return fmt.Errorf("could not check proposal: %w", err) 180 } 181 182 // there are two possibilities if the proposal is neither already pending 183 // processing in the cache, nor has already been processed: 184 // 1) the proposal is unverifiable because the parent is unknown 185 // => we cache the proposal 186 // 2) the proposal is connected to finalized state through an unbroken chain 187 // => we verify the proposal and forward it to hotstuff if valid 188 189 // if the parent is a pending block (disconnected from the incorporated state), we cache this block as well. 190 // we don't have to request its parent block or its ancestor again, because as a 191 // pending block, its parent block must have been requested. 192 // if there was problem requesting its parent or ancestors, the sync engine's forward 193 // syncing with range requests for finalized blocks will request for the blocks. 194 _, found := c.pending.ByID(header.ParentID) 195 if found { 196 // add the block to the cache 197 _ = c.pending.Add(block) 198 c.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, c.pending.Size()) 199 200 return nil 201 } 202 203 // if the proposal is connected to a block that is neither in the cache, nor 204 // in persistent storage, its direct parent is missing; cache the proposal 205 // and request the parent 206 exists, err := c.headers.Exists(header.ParentID) 207 if err != nil { 208 return fmt.Errorf("could not check parent exists: %w", err) 209 } 210 if !exists { 211 _ = c.pending.Add(block) 212 c.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, c.pending.Size()) 213 214 c.sync.RequestBlock(header.ParentID, header.Height-1) 215 log.Debug().Msg("requesting missing parent for proposal") 216 return nil 217 } 218 // At this point, we should be able to connect the proposal to the finalized 219 // state and should process it to see whether to forward to hotstuff or not. 220 // processBlockAndDescendants is a recursive function. Here we trace the 221 // execution of the entire recursion, which might include processing the 222 // proposal's pending children. There is another span within 223 // processBlockProposal that measures the time spent for a single proposal. 224 err = c.processBlockAndDescendants(block) 225 c.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, c.pending.Size()) 226 if err != nil { 227 return fmt.Errorf("could not process block proposal: %w", err) 228 } 229 230 return nil 231 } 232 233 // processBlockAndDescendants is a recursive function that processes a block and 234 // its pending descendants. By induction, any child block of a 235 // valid proposal is itself connected to the finalized state and can be 236 // processed as well. 237 func (c *Core) processBlockAndDescendants(proposal flow.Slashable[*cluster.Block]) error { 238 header := proposal.Message.Header 239 blockID := header.ID() 240 log := c.log.With(). 241 Str("block_id", blockID.String()). 242 Uint64("block_height", header.Height). 243 Uint64("block_view", header.View). 244 Uint64("parent_view", header.ParentView). 245 Logger() 246 247 // process block itself 248 err := c.processBlockProposal(proposal.Message) 249 if err != nil { 250 if checkForAndLogOutdatedInputError(err, log) || checkForAndLogUnverifiableInputError(err, log) { 251 return nil 252 } 253 if invalidBlockErr, ok := model.AsInvalidProposalError(err); ok { 254 log.Err(err).Msg("received invalid block from other node (potential slashing evidence?)") 255 256 // notify consumers about invalid block 257 c.proposalViolationNotifier.OnInvalidBlockDetected(flow.Slashable[model.InvalidProposalError]{ 258 OriginID: proposal.OriginID, 259 Message: *invalidBlockErr, 260 }) 261 262 // notify VoteAggregator about the invalid block 263 err = c.voteAggregator.InvalidBlock(model.ProposalFromFlow(header)) 264 if err != nil { 265 if mempool.IsBelowPrunedThresholdError(err) { 266 log.Warn().Msg("received invalid block, but is below pruned threshold") 267 return nil 268 } 269 return fmt.Errorf("unexpected error notifying vote aggregator about invalid block: %w", err) 270 } 271 return nil 272 } 273 // unexpected error: potentially corrupted internal state => abort processing and escalate error 274 return fmt.Errorf("failed to process block %x: %w", blockID, err) 275 } 276 277 // process all children 278 // do not break on invalid or outdated blocks as they should not prevent us 279 // from processing other valid children 280 children, has := c.pending.ByParentID(blockID) 281 if !has { 282 return nil 283 } 284 for _, child := range children { 285 cpr := c.processBlockAndDescendants(child) 286 if cpr != nil { 287 // unexpected error: potentially corrupted internal state => abort processing and escalate error 288 return cpr 289 } 290 } 291 292 // drop all the children that should have been processed now 293 c.pending.DropForParent(blockID) 294 295 return nil 296 } 297 298 // processBlockProposal processes the given block proposal. The proposal must connect to 299 // the finalized state. 300 // Expected errors during normal operations: 301 // - engine.OutdatedInputError if the block proposal is outdated (e.g. orphaned) 302 // - model.InvalidProposalError if the block proposal is invalid 303 // - engine.UnverifiableInputError if the proposal cannot be validated 304 func (c *Core) processBlockProposal(proposal *cluster.Block) error { 305 header := proposal.Header 306 blockID := header.ID() 307 log := c.log.With(). 308 Str("chain_id", header.ChainID.String()). 309 Uint64("block_height", header.Height). 310 Uint64("block_view", header.View). 311 Hex("block_id", blockID[:]). 312 Hex("parent_id", header.ParentID[:]). 313 Hex("payload_hash", header.PayloadHash[:]). 314 Time("timestamp", header.Timestamp). 315 Hex("proposer", header.ProposerID[:]). 316 Hex("parent_signer_indices", header.ParentVoterIndices). 317 Logger() 318 log.Info().Msg("processing block proposal") 319 320 hotstuffProposal := model.ProposalFromFlow(header) 321 err := c.validator.ValidateProposal(hotstuffProposal) 322 if err != nil { 323 if model.IsInvalidProposalError(err) { 324 return err 325 } 326 if errors.Is(err, model.ErrViewForUnknownEpoch) { 327 // The cluster committee never returns ErrViewForUnknownEpoch, therefore this case 328 // is an unexpected error in cluster consensus. 329 return fmt.Errorf("unexpected error: cluster committee reported unknown epoch : %w", irrecoverable.NewException(err)) 330 } 331 return fmt.Errorf("unexpected error validating proposal: %w", err) 332 } 333 334 // see if the block is a valid extension of the protocol state 335 err = c.state.Extend(proposal) 336 if err != nil { 337 if state.IsInvalidExtensionError(err) { 338 // if the block proposes an invalid extension of the cluster state, then the block is invalid 339 return model.NewInvalidProposalErrorf(hotstuffProposal, "invalid extension of cluster state (block: %x, height: %d): %w", blockID, header.Height, err) 340 } else if state.IsOutdatedExtensionError(err) { 341 // cluster state aborted processing of block as it is on an abandoned fork: block is outdated 342 return engine.NewOutdatedInputErrorf("outdated extension of cluster state: %w", err) 343 } else if state.IsUnverifiableExtensionError(err) { 344 return engine.NewUnverifiableInputError("unverifiable extension of cluster state (block_id: %x, height: %d): %w", 345 header.ID(), header.Height, err) 346 } else { 347 // unexpected error: potentially corrupted internal state => abort processing and escalate error 348 return fmt.Errorf("unexpected exception while extending cluster state with block %x at height %d: %w", blockID, header.Height, err) 349 } 350 } 351 352 // notify vote aggregator about a new block, so that it can start verifying 353 // votes for it. 354 c.voteAggregator.AddBlock(hotstuffProposal) 355 356 // submit the model to hotstuff for processing 357 // TODO replace with pubsub https://github.com/dapperlabs/flow-go/issues/6395 358 log.Info().Msg("forwarding block proposal to hotstuff") 359 c.hotstuff.SubmitProposal(hotstuffProposal) 360 361 return nil 362 } 363 364 // ProcessFinalizedBlock performs pruning of stale data based on finalization event 365 // removes pending blocks below the finalized view 366 func (c *Core) ProcessFinalizedBlock(finalized *flow.Header) { 367 // remove all pending blocks at or below the finalized view 368 c.pending.PruneByView(finalized.View) 369 c.finalizedHeight.Set(finalized.Height) 370 c.finalizedView.Set(finalized.View) 371 372 // always record the metric 373 c.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, c.pending.Size()) 374 } 375 376 // checkForAndLogOutdatedInputError checks whether error is an `engine.OutdatedInputError`. 377 // If this is the case, we emit a log message and return true. 378 // For any error other than `engine.OutdatedInputError`, this function is a no-op 379 // and returns false. 380 func checkForAndLogOutdatedInputError(err error, log zerolog.Logger) bool { 381 if engine.IsOutdatedInputError(err) { 382 // child is outdated by the time we started processing it 383 // => node was probably behind and is catching up. Log as warning 384 log.Info().Msg("dropped processing of abandoned fork; this might be an indicator that the node is slightly behind") 385 return true 386 } 387 return false 388 } 389 390 // checkForAndLogUnverifiableInputError checks whether error is an `engine.UnverifiableInputError`. 391 // If this is the case, we emit a log message and return true. 392 // For any error other than `engine.UnverifiableInputError`, this function is a no-op 393 // and returns false. 394 func checkForAndLogUnverifiableInputError(err error, log zerolog.Logger) bool { 395 if engine.IsUnverifiableInputError(err) { 396 // the block cannot be validated 397 log.Warn().Err(err).Msg("received collection proposal with unknown reference block; " + 398 "this might be an indicator that the node is slightly behind or the proposer published an invalid collection") 399 return true 400 } 401 return false 402 }