github.com/onflow/flow-go@v0.33.17/engine/collection/compliance/core.go (about) 1 // (c) 2019 Dapper Labs - ALL RIGHTS RESERVED 2 3 package compliance 4 5 import ( 6 "errors" 7 "fmt" 8 "time" 9 10 "github.com/rs/zerolog" 11 12 "github.com/onflow/flow-go/consensus/hotstuff" 13 "github.com/onflow/flow-go/consensus/hotstuff/model" 14 "github.com/onflow/flow-go/engine" 15 "github.com/onflow/flow-go/model/cluster" 16 "github.com/onflow/flow-go/model/flow" 17 "github.com/onflow/flow-go/model/messages" 18 "github.com/onflow/flow-go/module" 19 "github.com/onflow/flow-go/module/compliance" 20 "github.com/onflow/flow-go/module/counters" 21 "github.com/onflow/flow-go/module/irrecoverable" 22 "github.com/onflow/flow-go/module/mempool" 23 "github.com/onflow/flow-go/module/metrics" 24 "github.com/onflow/flow-go/state" 25 clusterkv "github.com/onflow/flow-go/state/cluster" 26 "github.com/onflow/flow-go/storage" 27 "github.com/onflow/flow-go/utils/logging" 28 ) 29 30 // Core contains the central business logic for the collector clusters' compliance engine. 31 // It is responsible for handling communication for the embedded consensus algorithm. 32 // CAUTION with CONCURRENCY: 33 // - At the moment, compliance.Core _can not_ process blocks concurrently. Callers of `OnBlockProposal` 34 // need to ensure single-threaded access. 35 // - The only exception is calls to `ProcessFinalizedView`, which is the only concurrency-safe 36 // method of compliance.Core 37 type Core struct { 38 log zerolog.Logger // used to log relevant actions with context 39 config compliance.Config 40 engineMetrics module.EngineMetrics 41 mempoolMetrics module.MempoolMetrics 42 hotstuffMetrics module.HotstuffMetrics 43 collectionMetrics module.CollectionMetrics 44 proposalViolationNotifier hotstuff.ProposalViolationConsumer 45 headers storage.Headers 46 state clusterkv.MutableState 47 // track latest finalized view/height - used to efficiently drop outdated or too-far-ahead blocks 48 finalizedView counters.StrictMonotonousCounter 49 finalizedHeight counters.StrictMonotonousCounter 50 pending module.PendingClusterBlockBuffer // pending block cache 51 sync module.BlockRequester 52 hotstuff module.HotStuff 53 validator hotstuff.Validator 54 voteAggregator hotstuff.VoteAggregator 55 timeoutAggregator hotstuff.TimeoutAggregator 56 } 57 58 // NewCore instantiates the business logic for the collector clusters' compliance engine. 59 func NewCore( 60 log zerolog.Logger, 61 collector module.EngineMetrics, 62 mempool module.MempoolMetrics, 63 hotstuffMetrics module.HotstuffMetrics, 64 collectionMetrics module.CollectionMetrics, 65 proposalViolationNotifier hotstuff.ProposalViolationConsumer, 66 headers storage.Headers, 67 state clusterkv.MutableState, 68 pending module.PendingClusterBlockBuffer, 69 sync module.BlockRequester, 70 validator hotstuff.Validator, 71 hotstuff module.HotStuff, 72 voteAggregator hotstuff.VoteAggregator, 73 timeoutAggregator hotstuff.TimeoutAggregator, 74 config compliance.Config, 75 ) (*Core, error) { 76 c := &Core{ 77 log: log.With().Str("cluster_compliance", "core").Logger(), 78 config: config, 79 engineMetrics: collector, 80 mempoolMetrics: mempool, 81 hotstuffMetrics: hotstuffMetrics, 82 collectionMetrics: collectionMetrics, 83 proposalViolationNotifier: proposalViolationNotifier, 84 headers: headers, 85 state: state, 86 pending: pending, 87 sync: sync, 88 hotstuff: hotstuff, 89 validator: validator, 90 voteAggregator: voteAggregator, 91 timeoutAggregator: timeoutAggregator, 92 } 93 94 // initialize finalized boundary cache 95 final, err := c.state.Final().Head() 96 if err != nil { 97 return nil, fmt.Errorf("could not initialized finalized boundary cache: %w", err) 98 } 99 c.ProcessFinalizedBlock(final) 100 101 // log the mempool size off the bat 102 c.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, c.pending.Size()) 103 104 return c, nil 105 } 106 107 // OnBlockProposal handles incoming block proposals. 108 // No errors are expected during normal operation. 109 func (c *Core) OnBlockProposal(proposal flow.Slashable[*messages.ClusterBlockProposal]) error { 110 startTime := time.Now() 111 defer func() { 112 c.hotstuffMetrics.BlockProcessingDuration(time.Since(startTime)) 113 }() 114 115 block := flow.Slashable[*cluster.Block]{ 116 OriginID: proposal.OriginID, 117 Message: proposal.Message.Block.ToInternal(), 118 } 119 header := block.Message.Header 120 blockID := header.ID() 121 finalHeight := c.finalizedHeight.Value() 122 finalView := c.finalizedView.Value() 123 124 log := c.log.With(). 125 Hex("origin_id", proposal.OriginID[:]). 126 Str("chain_id", header.ChainID.String()). 127 Uint64("block_height", header.Height). 128 Uint64("block_view", header.View). 129 Hex("block_id", blockID[:]). 130 Hex("parent_id", header.ParentID[:]). 131 Hex("ref_block_id", block.Message.Payload.ReferenceBlockID[:]). 132 Hex("collection_id", logging.Entity(block.Message.Payload.Collection)). 133 Int("tx_count", block.Message.Payload.Collection.Len()). 134 Time("timestamp", header.Timestamp). 135 Hex("proposer", header.ProposerID[:]). 136 Hex("parent_signer_indices", header.ParentVoterIndices). 137 Uint64("finalized_height", finalHeight). 138 Uint64("finalized_view", finalView). 139 Logger() 140 if log.Debug().Enabled() { 141 log = log.With().Strs("tx_ids", 142 flow.IdentifierList(block.Message.Payload.Collection.Light().Transactions).Strings()).Logger() 143 } 144 log.Info().Msg("block proposal received") 145 146 // drop proposals below the finalized threshold 147 if header.Height <= finalHeight || header.View <= finalView { 148 log.Debug().Msg("dropping block below finalized boundary") 149 return nil 150 } 151 152 skipNewProposalsThreshold := c.config.GetSkipNewProposalsThreshold() 153 // ignore proposals which are too far ahead of our local finalized state 154 // instead, rely on sync engine to catch up finalization more effectively, and avoid 155 // large subtree of blocks to be cached. 156 if header.View > finalView+skipNewProposalsThreshold { 157 log.Debug(). 158 Uint64("skip_new_proposals_threshold", skipNewProposalsThreshold). 159 Msg("dropping block too far ahead of locally finalized view") 160 return nil 161 } 162 163 // first, we reject all blocks that we don't need to process: 164 // 1) blocks already in the cache; they will already be processed later 165 // 2) blocks already on disk; they were processed and await finalization 166 167 // ignore proposals that are already cached 168 _, cached := c.pending.ByID(blockID) 169 if cached { 170 log.Debug().Msg("skipping already cached proposal") 171 return nil 172 } 173 174 // ignore proposals that were already processed 175 _, err := c.headers.ByBlockID(blockID) 176 if err == nil { 177 log.Debug().Msg("skipping already processed proposal") 178 return nil 179 } 180 if !errors.Is(err, storage.ErrNotFound) { 181 return fmt.Errorf("could not check proposal: %w", err) 182 } 183 184 // there are two possibilities if the proposal is neither already pending 185 // processing in the cache, nor has already been processed: 186 // 1) the proposal is unverifiable because the parent is unknown 187 // => we cache the proposal 188 // 2) the proposal is connected to finalized state through an unbroken chain 189 // => we verify the proposal and forward it to hotstuff if valid 190 191 // if the parent is a pending block (disconnected from the incorporated state), we cache this block as well. 192 // we don't have to request its parent block or its ancestor again, because as a 193 // pending block, its parent block must have been requested. 194 // if there was problem requesting its parent or ancestors, the sync engine's forward 195 // syncing with range requests for finalized blocks will request for the blocks. 196 _, found := c.pending.ByID(header.ParentID) 197 if found { 198 // add the block to the cache 199 _ = c.pending.Add(block) 200 c.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, c.pending.Size()) 201 202 return nil 203 } 204 205 // if the proposal is connected to a block that is neither in the cache, nor 206 // in persistent storage, its direct parent is missing; cache the proposal 207 // and request the parent 208 exists, err := c.headers.Exists(header.ParentID) 209 if err != nil { 210 return fmt.Errorf("could not check parent exists: %w", err) 211 } 212 if !exists { 213 _ = c.pending.Add(block) 214 c.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, c.pending.Size()) 215 216 c.sync.RequestBlock(header.ParentID, header.Height-1) 217 log.Debug().Msg("requesting missing parent for proposal") 218 return nil 219 } 220 // At this point, we should be able to connect the proposal to the finalized 221 // state and should process it to see whether to forward to hotstuff or not. 222 // processBlockAndDescendants is a recursive function. Here we trace the 223 // execution of the entire recursion, which might include processing the 224 // proposal's pending children. There is another span within 225 // processBlockProposal that measures the time spent for a single proposal. 226 err = c.processBlockAndDescendants(block) 227 c.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, c.pending.Size()) 228 if err != nil { 229 return fmt.Errorf("could not process block proposal: %w", err) 230 } 231 232 return nil 233 } 234 235 // processBlockAndDescendants is a recursive function that processes a block and 236 // its pending descendants. By induction, any child block of a 237 // valid proposal is itself connected to the finalized state and can be 238 // processed as well. 239 func (c *Core) processBlockAndDescendants(proposal flow.Slashable[*cluster.Block]) error { 240 header := proposal.Message.Header 241 blockID := header.ID() 242 log := c.log.With(). 243 Str("block_id", blockID.String()). 244 Uint64("block_height", header.Height). 245 Uint64("block_view", header.View). 246 Uint64("parent_view", header.ParentView). 247 Logger() 248 249 // process block itself 250 err := c.processBlockProposal(proposal.Message) 251 if err != nil { 252 if checkForAndLogOutdatedInputError(err, log) || checkForAndLogUnverifiableInputError(err, log) { 253 return nil 254 } 255 if invalidBlockErr, ok := model.AsInvalidProposalError(err); ok { 256 log.Err(err).Msg("received invalid block from other node (potential slashing evidence?)") 257 258 // notify consumers about invalid block 259 c.proposalViolationNotifier.OnInvalidBlockDetected(flow.Slashable[model.InvalidProposalError]{ 260 OriginID: proposal.OriginID, 261 Message: *invalidBlockErr, 262 }) 263 264 // notify VoteAggregator about the invalid block 265 err = c.voteAggregator.InvalidBlock(model.ProposalFromFlow(header)) 266 if err != nil { 267 if mempool.IsBelowPrunedThresholdError(err) { 268 log.Warn().Msg("received invalid block, but is below pruned threshold") 269 return nil 270 } 271 return fmt.Errorf("unexpected error notifying vote aggregator about invalid block: %w", err) 272 } 273 return nil 274 } 275 // unexpected error: potentially corrupted internal state => abort processing and escalate error 276 return fmt.Errorf("failed to process block %x: %w", blockID, err) 277 } 278 279 // process all children 280 // do not break on invalid or outdated blocks as they should not prevent us 281 // from processing other valid children 282 children, has := c.pending.ByParentID(blockID) 283 if !has { 284 return nil 285 } 286 for _, child := range children { 287 cpr := c.processBlockAndDescendants(child) 288 if cpr != nil { 289 // unexpected error: potentially corrupted internal state => abort processing and escalate error 290 return cpr 291 } 292 } 293 294 // drop all the children that should have been processed now 295 c.pending.DropForParent(blockID) 296 297 return nil 298 } 299 300 // processBlockProposal processes the given block proposal. The proposal must connect to 301 // the finalized state. 302 // Expected errors during normal operations: 303 // - engine.OutdatedInputError if the block proposal is outdated (e.g. orphaned) 304 // - model.InvalidProposalError if the block proposal is invalid 305 // - engine.UnverifiableInputError if the proposal cannot be validated 306 func (c *Core) processBlockProposal(proposal *cluster.Block) error { 307 header := proposal.Header 308 blockID := header.ID() 309 log := c.log.With(). 310 Str("chain_id", header.ChainID.String()). 311 Uint64("block_height", header.Height). 312 Uint64("block_view", header.View). 313 Hex("block_id", blockID[:]). 314 Hex("parent_id", header.ParentID[:]). 315 Hex("payload_hash", header.PayloadHash[:]). 316 Time("timestamp", header.Timestamp). 317 Hex("proposer", header.ProposerID[:]). 318 Hex("parent_signer_indices", header.ParentVoterIndices). 319 Logger() 320 log.Info().Msg("processing block proposal") 321 322 hotstuffProposal := model.ProposalFromFlow(header) 323 err := c.validator.ValidateProposal(hotstuffProposal) 324 if err != nil { 325 if model.IsInvalidProposalError(err) { 326 return err 327 } 328 if errors.Is(err, model.ErrViewForUnknownEpoch) { 329 // The cluster committee never returns ErrViewForUnknownEpoch, therefore this case 330 // is an unexpected error in cluster consensus. 331 return fmt.Errorf("unexpected error: cluster committee reported unknown epoch : %w", irrecoverable.NewException(err)) 332 } 333 return fmt.Errorf("unexpected error validating proposal: %w", err) 334 } 335 336 // see if the block is a valid extension of the protocol state 337 err = c.state.Extend(proposal) 338 if err != nil { 339 if state.IsInvalidExtensionError(err) { 340 // if the block proposes an invalid extension of the cluster state, then the block is invalid 341 return model.NewInvalidProposalErrorf(hotstuffProposal, "invalid extension of cluster state (block: %x, height: %d): %w", blockID, header.Height, err) 342 } else if state.IsOutdatedExtensionError(err) { 343 // cluster state aborted processing of block as it is on an abandoned fork: block is outdated 344 return engine.NewOutdatedInputErrorf("outdated extension of cluster state: %w", err) 345 } else if state.IsUnverifiableExtensionError(err) { 346 return engine.NewUnverifiableInputError("unverifiable extension of cluster state (block_id: %x, height: %d): %w", 347 header.ID(), header.Height, err) 348 } else { 349 // unexpected error: potentially corrupted internal state => abort processing and escalate error 350 return fmt.Errorf("unexpected exception while extending cluster state with block %x at height %d: %w", blockID, header.Height, err) 351 } 352 } 353 354 // notify vote aggregator about a new block, so that it can start verifying 355 // votes for it. 356 c.voteAggregator.AddBlock(hotstuffProposal) 357 358 // submit the model to hotstuff for processing 359 // TODO replace with pubsub https://github.com/dapperlabs/flow-go/issues/6395 360 log.Info().Msg("forwarding block proposal to hotstuff") 361 c.hotstuff.SubmitProposal(hotstuffProposal) 362 363 return nil 364 } 365 366 // ProcessFinalizedBlock performs pruning of stale data based on finalization event 367 // removes pending blocks below the finalized view 368 func (c *Core) ProcessFinalizedBlock(finalized *flow.Header) { 369 // remove all pending blocks at or below the finalized view 370 c.pending.PruneByView(finalized.View) 371 c.finalizedHeight.Set(finalized.Height) 372 c.finalizedView.Set(finalized.View) 373 374 // always record the metric 375 c.mempoolMetrics.MempoolEntries(metrics.ResourceClusterProposal, c.pending.Size()) 376 } 377 378 // checkForAndLogOutdatedInputError checks whether error is an `engine.OutdatedInputError`. 379 // If this is the case, we emit a log message and return true. 380 // For any error other than `engine.OutdatedInputError`, this function is a no-op 381 // and returns false. 382 func checkForAndLogOutdatedInputError(err error, log zerolog.Logger) bool { 383 if engine.IsOutdatedInputError(err) { 384 // child is outdated by the time we started processing it 385 // => node was probably behind and is catching up. Log as warning 386 log.Info().Msg("dropped processing of abandoned fork; this might be an indicator that the node is slightly behind") 387 return true 388 } 389 return false 390 } 391 392 // checkForAndLogUnverifiableInputError checks whether error is an `engine.UnverifiableInputError`. 393 // If this is the case, we emit a log message and return true. 394 // For any error other than `engine.UnverifiableInputError`, this function is a no-op 395 // and returns false. 396 func checkForAndLogUnverifiableInputError(err error, log zerolog.Logger) bool { 397 if engine.IsUnverifiableInputError(err) { 398 // the block cannot be validated 399 log.Warn().Err(err).Msg("received collection proposal with unknown reference block; " + 400 "this might be an indicator that the node is slightly behind or the proposer published an invalid collection") 401 return true 402 } 403 return false 404 }