github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/common/follower/cache/cache.go (about)

     1  package cache
     2  
     3  import (
     4  	"errors"
     5  	"sync"
     6  
     7  	"github.com/rs/zerolog"
     8  
     9  	"github.com/onflow/flow-go/consensus/hotstuff"
    10  	"github.com/onflow/flow-go/consensus/hotstuff/model"
    11  	"github.com/onflow/flow-go/model/flow"
    12  	"github.com/onflow/flow-go/module"
    13  	"github.com/onflow/flow-go/module/counters"
    14  	herocache "github.com/onflow/flow-go/module/mempool/herocache/backdata"
    15  	"github.com/onflow/flow-go/module/mempool/herocache/backdata/heropool"
    16  )
    17  
    18  var (
    19  	ErrDisconnectedBatch = errors.New("batch must be a sequence of connected blocks")
    20  )
    21  
    22  type BlocksByID map[flow.Identifier]*flow.Block
    23  
    24  // batchContext contains contextual data for batch of blocks. Per convention, a batch is
    25  // a continuous sequence of blocks, i.e. `batch[k]` is the parent block of `batch[k+1]`.
    26  type batchContext struct {
    27  	batchParent *flow.Block // immediate parent of the first block in batch, i.e. `batch[0]`
    28  	batchChild  *flow.Block // immediate child of the last block in batch, i.e. `batch[len(batch)-1]`
    29  
    30  	// equivocatingBlocks holds the list of equivocations that the batch contained, when comparing to the
    31  	// cached blocks. An equivocation are two blocks for the same view that have different block IDs.
    32  	equivocatingBlocks [][2]*flow.Block
    33  
    34  	// redundant marks if ALL blocks in batch are already stored in cache, meaning that
    35  	// such input is identical to what was previously processed.
    36  	redundant bool
    37  }
    38  
    39  // Cache stores pending blocks received from other replicas, caches blocks by blockID, and maintains
    40  // secondary indices to look up blocks by view or by parent ID. Additional indices are used to track proposal equivocation
    41  // (multiple valid proposals for same block) and find blocks not only by parent but also by child.
    42  // Resolves certified blocks when processing incoming batches.
    43  // Concurrency safe.
    44  type Cache struct {
    45  	backend *herocache.Cache // cache with random ejection
    46  	lock    sync.RWMutex
    47  
    48  	// secondary indices
    49  	byView   map[uint64]BlocksByID          // lookup of blocks by their respective view; used to detect equivocation
    50  	byParent map[flow.Identifier]BlocksByID // lookup of blocks by their parentID, for finding a block's known children
    51  
    52  	notifier   hotstuff.ProposalViolationConsumer // equivocations will be reported using this notifier
    53  	lowestView counters.StrictMonotonousCounter   // lowest view that the cache accepts blocks for
    54  }
    55  
    56  // Peek performs lookup of cached block by blockID.
    57  // Concurrency safe
    58  func (c *Cache) Peek(blockID flow.Identifier) *flow.Block {
    59  	c.lock.RLock()
    60  	defer c.lock.RUnlock()
    61  	if block, found := c.backend.ByID(blockID); found {
    62  		return block.(*flow.Block)
    63  	} else {
    64  		return nil
    65  	}
    66  }
    67  
    68  // NewCache creates new instance of Cache
    69  func NewCache(log zerolog.Logger, limit uint32, collector module.HeroCacheMetrics, notifier hotstuff.ProposalViolationConsumer) *Cache {
    70  	// We consume ejection event from HeroCache to here to drop ejected blocks from our secondary indices.
    71  	distributor := NewDistributor()
    72  	cache := &Cache{
    73  		backend: herocache.NewCache(
    74  			limit,
    75  			herocache.DefaultOversizeFactor,
    76  			heropool.RandomEjection,
    77  			log.With().Str("component", "follower.cache").Logger(),
    78  			collector,
    79  			herocache.WithTracer(distributor),
    80  		),
    81  		byView:   make(map[uint64]BlocksByID),
    82  		byParent: make(map[flow.Identifier]BlocksByID),
    83  		notifier: notifier,
    84  	}
    85  	distributor.AddConsumer(cache.handleEjectedEntity)
    86  	return cache
    87  }
    88  
    89  // handleEjectedEntity performs cleanup of secondary indexes to prevent memory leaks.
    90  // WARNING: Concurrency safety of this function is guaranteed by `c.lock`. This method is only called
    91  // by `herocache.Cache.Add` and we perform this call while `c.lock` is in locked state.
    92  func (c *Cache) handleEjectedEntity(entity flow.Entity) {
    93  	block := entity.(*flow.Block)
    94  	blockID := block.ID()
    95  
    96  	// remove block from the set of blocks for this view
    97  	blocksForView := c.byView[block.Header.View]
    98  	delete(blocksForView, blockID)
    99  	if len(blocksForView) == 0 {
   100  		delete(c.byView, block.Header.View)
   101  	}
   102  
   103  	// remove block from the parent's set of its children
   104  	siblings := c.byParent[block.Header.ParentID]
   105  	delete(siblings, blockID)
   106  	if len(siblings) == 0 {
   107  		delete(c.byParent, block.Header.ParentID)
   108  	}
   109  }
   110  
   111  // AddBlocks atomically adds the given batch of blocks to the cache.
   112  // We require that incoming batch is sorted in ascending height order and doesn't have skipped blocks;
   113  // otherwise the cache returns a `ErrDisconnectedBatch` error. When receiving batch: [first, ..., last],
   114  // we are only interested in the first and last blocks. All blocks before `last` are certified by
   115  // construction (by the QC included in `last`). The following two cases are possible:
   116  // - for first block:
   117  //   - no parent available for first block.
   118  //   - parent for first block available in cache allowing to certify it, we can certify one extra block(parent).
   119  //
   120  // - for last block:
   121  //   - no child available for last block, need to wait for child to certify it.
   122  //   - child for last block available in cache allowing to certify it, we can certify one extra block(child).
   123  //
   124  // All blocks from the batch are stored in the cache to provide deduplication.
   125  // The function returns any new certified chain of blocks created by addition of the batch.
   126  // Returns `certifiedBatch, certifyingQC` if the input batch has more than one block, and/or if either a child
   127  // or parent of the batch is in the cache. The implementation correctly handles cases with `len(batch) == 1`
   128  // or `len(batch) == 0`, where it returns `nil, nil` in the following cases:
   129  //   - the input batch has exactly one block and neither its parent nor child is in the cache.
   130  //   - the input batch is empty
   131  //
   132  // If message equivocation was detected it will be reported using a notification.
   133  // Concurrency safe.
   134  //
   135  // Expected errors during normal operations:
   136  //   - ErrDisconnectedBatch
   137  func (c *Cache) AddBlocks(batch []*flow.Block) (certifiedBatch []*flow.Block, certifyingQC *flow.QuorumCertificate, err error) {
   138  	batch = c.trimLeadingBlocksBelowPruningThreshold(batch)
   139  
   140  	batchSize := len(batch)
   141  	if batchSize < 1 { // empty batch is no-op
   142  		return nil, nil, nil
   143  	}
   144  
   145  	// precompute block IDs (outside of lock) and sanity-check batch itself that blocks are connected
   146  	blockIDs, err := enforceSequentialBlocks(batch)
   147  	if err != nil {
   148  		return nil, nil, err
   149  	}
   150  
   151  	// Single atomic operation (main logic), with result returned as `batchContext`
   152  	//  * add the given batch of blocks to the cache
   153  	//  * check for equivocating blocks (result stored in `batchContext.equivocatingBlocks`)
   154  	//  * check whether first block in batch (index 0) has a parent already in the cache
   155  	//    (result stored in `batchContext.batchParent`)
   156  	//  * check whether last block in batch has a child already in the cache
   157  	//    (result stored in `batchContext.batchChild`)
   158  	//  * check if input is redundant (indicated by `batchContext.redundant`), i.e. ALL blocks
   159  	//    are already known: then skip further processing
   160  	bc := c.unsafeAtomicAdd(blockIDs, batch)
   161  	if bc.redundant {
   162  		return nil, nil, nil
   163  	}
   164  
   165  	// If there exists a child of the last block in the batch, then the entire batch is certified.
   166  	// Otherwise, all blocks in the batch _except_ for the last one are certified
   167  	if bc.batchChild != nil {
   168  		certifiedBatch = batch
   169  		certifyingQC = bc.batchChild.Header.QuorumCertificate()
   170  	} else {
   171  		certifiedBatch = batch[:batchSize-1]
   172  		certifyingQC = batch[batchSize-1].Header.QuorumCertificate()
   173  	}
   174  	// caution: in the case `len(batch) == 1`, the `certifiedBatch` might be empty now (else-case)
   175  
   176  	// If there exists a parent for the batch's first block, then this is parent is certified
   177  	//  by the batch. Hence, we prepend certifiedBatch by the parent.
   178  	if bc.batchParent != nil {
   179  		s := make([]*flow.Block, 0, 1+len(certifiedBatch))
   180  		s = append(s, bc.batchParent)
   181  		certifiedBatch = append(s, certifiedBatch...)
   182  	}
   183  
   184  	// report equivocations
   185  	for _, pair := range bc.equivocatingBlocks {
   186  		c.notifier.OnDoubleProposeDetected(model.BlockFromFlow(pair[0].Header), model.BlockFromFlow(pair[1].Header))
   187  	}
   188  
   189  	if len(certifiedBatch) < 1 {
   190  		return nil, nil, nil
   191  	}
   192  
   193  	return certifiedBatch, certifyingQC, nil
   194  }
   195  
   196  // PruneUpToView sets the lowest view that we are accepting blocks for. Any blocks
   197  // with view _strictly smaller_ that the given threshold are removed from the cache.
   198  // Concurrency safe.
   199  func (c *Cache) PruneUpToView(view uint64) {
   200  	previousPruningThreshold := c.lowestView.Value()
   201  	if previousPruningThreshold >= view {
   202  		return // removing all entries up to view was already done in an earlier call
   203  	}
   204  
   205  	c.lock.Lock()
   206  	defer c.lock.Unlock()
   207  	if !c.lowestView.Set(view) {
   208  		return // some other concurrent call to `PruneUpToView` did the work already
   209  	}
   210  	if len(c.byView) == 0 {
   211  		return // empty, noting to prune
   212  	}
   213  
   214  	// Optimization: if there are less elements in the `byView` map
   215  	// than the view range to prune: inspect each map element.
   216  	// Otherwise, go through each view to prune.
   217  	if uint64(len(c.byView)) < view-previousPruningThreshold {
   218  		for v, blocks := range c.byView {
   219  			if v < view {
   220  				c.removeByView(v, blocks)
   221  			}
   222  		}
   223  	} else {
   224  		for v := previousPruningThreshold; v < view; v++ {
   225  			if blocks, found := c.byView[v]; found {
   226  				c.removeByView(v, blocks)
   227  			}
   228  		}
   229  	}
   230  }
   231  
   232  // removeByView removes all blocks for the given view.
   233  // NOT concurrency safe: execute within Cache's lock.
   234  func (c *Cache) removeByView(view uint64, blocks BlocksByID) {
   235  	for blockID, block := range blocks {
   236  		c.backend.Remove(blockID)
   237  
   238  		siblings := c.byParent[block.Header.ParentID]
   239  		delete(siblings, blockID)
   240  		if len(siblings) == 0 {
   241  			delete(c.byParent, block.Header.ParentID)
   242  		}
   243  	}
   244  
   245  	delete(c.byView, view)
   246  }
   247  
   248  // unsafeAtomicAdd does the following within a single atomic operation:
   249  //   - add the given batch of blocks to the cache
   250  //   - check for equivocating blocks
   251  //   - check whether first block in batch (index 0) has a parent already in the cache
   252  //   - check whether last block in batch has a child already in the cache
   253  //   - check whether all blocks were previously stored in the cache
   254  //
   255  // Concurrency SAFE.
   256  //
   257  // For internal use only and unsafe in the following aspects
   258  //   - assumes batch is _not empty_
   259  //   - batch must form a sequence of sequential blocks, i.e. `batch[k]` is parent of `batch[k+1]`
   260  //   - requires pre-computed blockIDs in the same order as fullBlocks
   261  //
   262  // Any errors are symptoms of internal state corruption.
   263  func (c *Cache) unsafeAtomicAdd(blockIDs []flow.Identifier, fullBlocks []*flow.Block) (bc batchContext) {
   264  	c.lock.Lock()
   265  	defer c.lock.Unlock()
   266  
   267  	// check whether we have the parent of first block already in our cache:
   268  	if parent, ok := c.backend.ByID(fullBlocks[0].Header.ParentID); ok {
   269  		bc.batchParent = parent.(*flow.Block)
   270  	}
   271  
   272  	// check whether we have a child of last block already in our cache:
   273  	lastBlockID := blockIDs[len(blockIDs)-1]
   274  	if children, ok := c.byParent[lastBlockID]; ok {
   275  		// Due to forks, it is possible that we have multiple children for same parent. Conceptually we only
   276  		// care for the QC that is contained in the child, which serves as proof that the parent has been
   277  		// certified. Therefore, we don't care which child we find here, as long as we find one at all.
   278  		for _, child := range children {
   279  			bc.batchChild = child
   280  			break
   281  		}
   282  	}
   283  
   284  	// add blocks to underlying cache, check for equivocation and report if detected
   285  	storedBlocks := uint64(0)
   286  	for i, block := range fullBlocks {
   287  		equivocation, cached := c.cache(blockIDs[i], block)
   288  		if equivocation != nil {
   289  			bc.equivocatingBlocks = append(bc.equivocatingBlocks, [2]*flow.Block{equivocation, block})
   290  		}
   291  		if cached {
   292  			storedBlocks++
   293  		}
   294  	}
   295  	bc.redundant = storedBlocks < 1
   296  
   297  	return bc
   298  }
   299  
   300  // cache adds the given block to the underlying block cache. By indexing blocks by view, we can detect
   301  // equivocation. The first return value contains the already-cached equivocating block or `nil` otherwise.
   302  // Repeated calls with the same block are no-ops.
   303  // CAUTION: not concurrency safe: execute within Cache's lock.
   304  func (c *Cache) cache(blockID flow.Identifier, block *flow.Block) (equivocation *flow.Block, stored bool) {
   305  	cachedBlocksAtView, haveCachedBlocksAtView := c.byView[block.Header.View]
   306  	// Check whether there is a block with the same view already in the cache.
   307  	// During happy-path operations `cachedBlocksAtView` contains usually zero blocks or exactly one block, which
   308  	// is our input `block` (duplicate). Larger sets of blocks can only be caused by slashable byzantine actions.
   309  	for otherBlockID, otherBlock := range cachedBlocksAtView {
   310  		if otherBlockID == blockID {
   311  			return nil, false // already stored
   312  		}
   313  		// have two blocks for the same view but with different IDs => equivocation!
   314  		equivocation = otherBlock
   315  		break // we care whether we find an equivocation, but don't need to enumerate all equivocations
   316  	}
   317  	// Note: Even if this node detects an equivocation, we still have to process the block. This is because
   318  	// the node might be the only one seeing the equivocation, and other nodes might certify the block,
   319  	// in which case also this node needs to process the block to continue following consensus.
   320  
   321  	// block is not a duplicate: store in the underlying HeroCache and add it to secondary indices
   322  	stored = c.backend.Add(blockID, block)
   323  	if !stored { // future proofing code: we allow an overflowing HeroCache to potentially eject the newly added element.
   324  		return
   325  	}
   326  
   327  	// populate `byView` index
   328  	if !haveCachedBlocksAtView {
   329  		cachedBlocksAtView = make(BlocksByID)
   330  		c.byView[block.Header.View] = cachedBlocksAtView
   331  	}
   332  	cachedBlocksAtView[blockID] = block
   333  
   334  	// populate `byParent` index
   335  	siblings, ok := c.byParent[block.Header.ParentID]
   336  	if !ok {
   337  		siblings = make(BlocksByID)
   338  		c.byParent[block.Header.ParentID] = siblings
   339  	}
   340  	siblings[blockID] = block
   341  
   342  	return
   343  }
   344  
   345  // enforceSequentialBlocks enforces that batch is a continuous sequence of blocks, i.e. `batch[k]`
   346  // is the parent block of `batch[k+1]`. Returns a slice with IDs of the blocks in the same order
   347  // as batch. Returns `ErrDisconnectedBatch` if blocks are not a continuous sequence.
   348  // Pure function, hence concurrency safe.
   349  func enforceSequentialBlocks(batch []*flow.Block) ([]flow.Identifier, error) {
   350  	blockIDs := make([]flow.Identifier, 0, len(batch))
   351  	parentID := batch[0].ID()
   352  	blockIDs = append(blockIDs, parentID)
   353  	for _, b := range batch[1:] {
   354  		if b.Header.ParentID != parentID {
   355  			return nil, ErrDisconnectedBatch
   356  		}
   357  		parentID = b.ID()
   358  		blockIDs = append(blockIDs, parentID)
   359  	}
   360  	return blockIDs, nil
   361  }
   362  
   363  // trimLeadingFinalizedBlocks trims the blocks at the _beginning_ of the batch, whose views
   364  // are smaller or equal to the lowest pruned view. Formally, let i be the _smallest_ index such that
   365  //
   366  //	batch[i].View ≥ lowestView
   367  //
   368  // Hence, for all k < i: batch[k].View < lowestView (otherwise, a smaller value for i exists).
   369  // Note:
   370  //   - For this method, we do _not_ assume any specific ordering of the blocks.
   371  //   - We drop all blocks at the _beginning_ that we anyway would not want to cache.
   372  //   - The returned slice of blocks could still contain blocks with views below the cutoff.
   373  func (c *Cache) trimLeadingBlocksBelowPruningThreshold(batch []*flow.Block) []*flow.Block {
   374  	lowestView := c.lowestView.Value()
   375  	for i, block := range batch {
   376  		if block.Header.View >= lowestView {
   377  			return batch[i:]
   378  		}
   379  	}
   380  	return nil
   381  }