github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/chainsync/core.go (about)

     1  package chainsync
     2  
     3  import (
     4  	"fmt"
     5  	"sort"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/rs/zerolog"
    10  
    11  	"github.com/onflow/flow-go/model/chainsync"
    12  	"github.com/onflow/flow-go/model/flow"
    13  	"github.com/onflow/flow-go/module"
    14  )
    15  
    16  const (
    17  	// DefaultPollNodes is the default number of nodes we send a message to on
    18  	// each poll interval.
    19  	DefaultPollNodes uint = 3
    20  
    21  	// DefaultBlockRequestNodes is the default number of nodes we request a
    22  	// block resource from.
    23  	DefaultBlockRequestNodes uint = 3
    24  
    25  	// DefaultQueuedHeightMultiplicity limits the number of heights we queue
    26  	// above the current finalized height.
    27  	DefaultQueuedHeightMultiplicity uint = 4
    28  )
    29  
    30  type Config struct {
    31  	RetryInterval time.Duration // the initial interval before we retry a request, uses exponential backoff
    32  	Tolerance     uint          // determines how big of a difference in block heights we tolerated before actively syncing with range requests
    33  	MaxAttempts   uint          // the maximum number of attempts we make for each requested block/height before discarding
    34  	MaxSize       uint          // the maximum number of blocks we request in the same block request message
    35  	MaxRequests   uint          // the maximum number of requests we send during each scanning period
    36  }
    37  
    38  func DefaultConfig() Config {
    39  	return Config{
    40  		RetryInterval: 4 * time.Second,
    41  		Tolerance:     10,
    42  		MaxAttempts:   5,
    43  		MaxSize:       64,
    44  		MaxRequests:   3,
    45  	}
    46  }
    47  
    48  // Core contains core logic, configuration, and state for chain state
    49  // synchronization. It is generic to chain type, so it works for both consensus
    50  // and collection nodes.
    51  //
    52  // Core should be wrapped by a type-aware engine that manages the specifics of
    53  // each chain. Example: https://github.com/onflow/flow-go/blob/master/engine/common/synchronization/engine.go
    54  //
    55  // Core is safe for concurrent use by multiple goroutines.
    56  type Core struct {
    57  	log                  zerolog.Logger
    58  	Config               Config
    59  	mu                   sync.Mutex
    60  	heights              map[uint64]*chainsync.Status
    61  	blockIDs             map[flow.Identifier]*chainsync.Status
    62  	metrics              module.ChainSyncMetrics
    63  	localFinalizedHeight uint64
    64  }
    65  
    66  func New(log zerolog.Logger, config Config, metrics module.ChainSyncMetrics, chainID flow.ChainID) (*Core, error) {
    67  	core := &Core{
    68  		log:                  log.With().Str("sync_core", chainID.String()).Logger(),
    69  		Config:               config,
    70  		heights:              make(map[uint64]*chainsync.Status),
    71  		blockIDs:             make(map[flow.Identifier]*chainsync.Status),
    72  		metrics:              metrics,
    73  		localFinalizedHeight: 0,
    74  	}
    75  	return core, nil
    76  }
    77  
    78  // HandleBlock handles receiving a new block from another node. It returns
    79  // true if the block should be processed by the compliance layer and false
    80  // if it should be ignored.
    81  func (c *Core) HandleBlock(header *flow.Header) bool {
    82  	log := c.log
    83  	if c.log.Debug().Enabled() {
    84  		log = c.log.With().Str("block_id", header.ID().String()).Uint64("block_height", header.Height).Logger()
    85  	}
    86  	c.mu.Lock()
    87  	defer c.mu.Unlock()
    88  
    89  	status := c.getRequestStatus(header.Height, header.ID())
    90  
    91  	// if we never asked for this block, discard it
    92  	if !status.WasQueued() {
    93  		log.Debug().Msg("discarding not queued block")
    94  		return false
    95  	}
    96  	// if we have already received this block, exit
    97  	if status.WasReceived() {
    98  		log.Debug().Msg("discarding not received block")
    99  		return false
   100  	}
   101  
   102  	// this is a new block, remember that we've seen it
   103  	status.Header = header
   104  	status.Received = time.Now()
   105  
   106  	// track it by ID and by height so we don't accidentally request it again
   107  	c.blockIDs[header.ID()] = status
   108  	c.heights[header.Height] = status
   109  
   110  	log.Debug().Msg("handled block")
   111  	return true
   112  }
   113  
   114  // HandleHeight handles receiving a new highest finalized height from another node.
   115  // If the height difference between local and the reported height is outside tolerance, we do nothing.
   116  // Otherwise, we queue each missing height.
   117  func (c *Core) HandleHeight(final *flow.Header, height uint64) {
   118  	log := c.log.With().Uint64("final_height", final.Height).Uint64("recv_height", height).Logger()
   119  	log.Debug().Msg("received height")
   120  	// don't bother queueing anything if we're within tolerance
   121  	if c.WithinTolerance(final, height) {
   122  		log.Debug().Msg("height within tolerance - discarding")
   123  		return
   124  	}
   125  
   126  	// if we are sufficiently behind, we want to sync the missing blocks
   127  	if height > final.Height {
   128  		c.mu.Lock()
   129  		defer c.mu.Unlock()
   130  
   131  		// limit to request up to DefaultQueuedHeightMultiplicity*MaxRequests*MaxSize blocks from the peer.
   132  		// without this limit, then if we are falling far behind,
   133  		// we would queue up too many heights.
   134  		heightLimit := final.Height + uint64(DefaultQueuedHeightMultiplicity*c.Config.MaxRequests*c.Config.MaxSize)
   135  		if height > heightLimit {
   136  			height = heightLimit
   137  		}
   138  
   139  		for h := final.Height + 1; h <= height; h++ {
   140  			c.requeueHeight(h)
   141  		}
   142  		log.Debug().Msgf("requeued heights [%d-%d]", final.Height+1, height)
   143  	}
   144  }
   145  
   146  func (c *Core) RequestBlock(blockID flow.Identifier, height uint64) {
   147  	log := c.log.With().Str("block_id", blockID.String()).Uint64("height", height).Logger()
   148  	// requesting a block by its ID storing the height to prune more efficiently
   149  	c.mu.Lock()
   150  	defer c.mu.Unlock()
   151  
   152  	// if we already received this block, reset the status so we can re-queue
   153  	status := c.blockIDs[blockID]
   154  	if status.WasReceived() {
   155  		log.Debug().Msgf("requested block was already received")
   156  		delete(c.blockIDs, status.Header.ID())
   157  		delete(c.heights, status.Header.Height)
   158  	}
   159  
   160  	c.queueByBlockID(blockID, height)
   161  	log.Debug().Msgf("enqueued requested block")
   162  }
   163  
   164  func (c *Core) RequestHeight(height uint64) {
   165  	c.mu.Lock()
   166  	defer c.mu.Unlock()
   167  
   168  	c.requeueHeight(height)
   169  	c.log.Debug().Uint64("height", height).Msg("enqueued requested height")
   170  }
   171  
   172  // requeueHeight queues the given height, ignoring any previously received
   173  // blocks at that height
   174  func (c *Core) requeueHeight(height uint64) {
   175  	// if we already received this block, reset the status so we can re-queue
   176  	status := c.heights[height]
   177  	if status.WasReceived() {
   178  		delete(c.blockIDs, status.Header.ID())
   179  		delete(c.heights, status.Header.Height)
   180  	}
   181  
   182  	c.queueByHeight(height)
   183  }
   184  
   185  // ScanPending scans all pending block statuses for blocks that should be
   186  // requested. It apportions requestable items into range and batch requests
   187  // according to configured maximums, giving precedence to range requests.
   188  func (c *Core) ScanPending(final *flow.Header) ([]chainsync.Range, []chainsync.Batch) {
   189  	c.mu.Lock()
   190  	defer c.mu.Unlock()
   191  
   192  	log := c.log.With().Uint64("final_height", final.Height).Logger()
   193  
   194  	// prune if the current height is less than the new height
   195  	c.prune(final)
   196  
   197  	// get all items that are eligible for initial or re-requesting
   198  	heights, blockIDs := c.getRequestableItems()
   199  	c.log.Debug().Msgf("scan found %d requestable heights, %d requestable block IDs", len(heights), len(blockIDs))
   200  
   201  	// convert to valid range and batch requests
   202  	ranges := c.getRanges(heights)
   203  	batches := c.getBatches(blockIDs)
   204  	log.Debug().Str("ranges", fmt.Sprintf("%v", ranges)).Str("batches", fmt.Sprintf("%v", batches)).Msg("compiled range and batch requests")
   205  
   206  	return c.selectRequests(ranges, batches)
   207  }
   208  
   209  // WithinTolerance returns whether or not the given height is within configured
   210  // height tolerance, wrt the given local finalized header.
   211  func (c *Core) WithinTolerance(final *flow.Header, height uint64) bool {
   212  
   213  	lower := final.Height - uint64(c.Config.Tolerance)
   214  	if lower > final.Height { // underflow check
   215  		lower = 0
   216  	}
   217  	upper := final.Height + uint64(c.Config.Tolerance)
   218  
   219  	return height >= lower && height <= upper
   220  }
   221  
   222  // queueByHeight queues a request for the finalized block at the given height,
   223  // only if no equivalent request has been queued before.
   224  func (c *Core) queueByHeight(height uint64) {
   225  	// do not queue the block if the height is lower or the same as the local finalized height
   226  	// the check != 0 is necessary or we will never queue blocks at height 0
   227  	if height <= c.localFinalizedHeight && c.localFinalizedHeight != 0 {
   228  		return
   229  	}
   230  
   231  	// only queue the request if have never queued it before
   232  	if c.heights[height].WasQueued() {
   233  		return
   234  	}
   235  
   236  	// queue the request
   237  	c.heights[height] = chainsync.NewQueuedStatus(height)
   238  }
   239  
   240  // queueByBlockID queues a request for a block by block ID, only if no
   241  // equivalent request has been queued before.
   242  func (c *Core) queueByBlockID(blockID flow.Identifier, height uint64) {
   243  	// do not queue the block if the height is lower or the same as the local finalized height
   244  	// the check != 0 is necessary or we will never queue blocks at height 0
   245  	if height <= c.localFinalizedHeight && c.localFinalizedHeight != 0 {
   246  		return
   247  	}
   248  
   249  	// only queue the request if have never queued it before
   250  	if c.blockIDs[blockID].WasQueued() {
   251  		return
   252  	}
   253  
   254  	// queue the request
   255  	c.blockIDs[blockID] = chainsync.NewQueuedStatus(height)
   256  }
   257  
   258  // getRequestStatus retrieves a request status for a block, regardless of
   259  // whether it was queued by height or by block ID.
   260  func (c *Core) getRequestStatus(height uint64, blockID flow.Identifier) *chainsync.Status {
   261  	heightStatus := c.heights[height]
   262  	idStatus := c.blockIDs[blockID]
   263  
   264  	if idStatus.WasQueued() {
   265  		return idStatus
   266  	}
   267  	// Only return the height status if there is no matching status for the ID
   268  	if heightStatus.WasQueued() {
   269  		return heightStatus
   270  	}
   271  
   272  	return nil
   273  }
   274  
   275  // prune removes any pending requests which we have received and which is below
   276  // the finalized height, or which we received sufficiently long ago.
   277  func (c *Core) prune(final *flow.Header) {
   278  	if c.localFinalizedHeight >= final.Height {
   279  		return
   280  	}
   281  
   282  	c.localFinalizedHeight = final.Height
   283  
   284  	// track how many statuses we are pruning
   285  	initialHeights := len(c.heights)
   286  	initialBlockIDs := len(c.blockIDs)
   287  
   288  	for height, status := range c.heights {
   289  		if height <= final.Height {
   290  			delete(c.heights, height)
   291  			c.metrics.PrunedBlockByHeight(status)
   292  		}
   293  	}
   294  
   295  	for blockID, status := range c.blockIDs {
   296  		if status.BlockHeight <= final.Height {
   297  			delete(c.blockIDs, blockID)
   298  			c.metrics.PrunedBlockById(status)
   299  		}
   300  	}
   301  
   302  	currentHeights := len(c.heights)
   303  	currentBlockIDs := len(c.blockIDs)
   304  
   305  	prunedHeights := initialHeights - currentHeights
   306  	prunedBlockIDs := initialBlockIDs - currentBlockIDs
   307  
   308  	c.metrics.PrunedBlocks(prunedHeights, prunedBlockIDs, currentHeights, currentBlockIDs)
   309  
   310  	c.log.Debug().
   311  		Uint64("final_height", final.Height).
   312  		Msgf("pruned %d heights, %d block IDs", prunedHeights, prunedBlockIDs)
   313  }
   314  
   315  func (c *Core) Prune(final *flow.Header) {
   316  	c.mu.Lock()
   317  	defer c.mu.Unlock()
   318  	c.prune(final)
   319  }
   320  
   321  // getRequestableItems will find all block IDs and heights that are eligible
   322  // to be requested.
   323  func (c *Core) getRequestableItems() ([]uint64, []flow.Identifier) {
   324  
   325  	// TODO: we will probably want to limit the maximum amount of in-flight
   326  	// requests and maximum amount of blocks requested at the same time here;
   327  	// for now, we just ignore that problem, but once we do, we should always
   328  	// prioritize range requests over batch requests
   329  
   330  	now := time.Now()
   331  
   332  	// create a list of all height requests that should be sent
   333  	var heights []uint64
   334  	for height, status := range c.heights {
   335  
   336  		// if the last request is young enough, skip
   337  		retryAfter := status.Requested.Add(c.Config.RetryInterval << status.Attempts)
   338  		if now.Before(retryAfter) {
   339  			continue
   340  		}
   341  
   342  		// if we've already received this block, skip
   343  		if status.WasReceived() {
   344  			continue
   345  		}
   346  
   347  		// if we reached maximum number of attempts, delete
   348  		if status.Attempts >= c.Config.MaxAttempts {
   349  			delete(c.heights, height)
   350  			continue
   351  		}
   352  
   353  		// otherwise, append to heights to be requested
   354  		heights = append(heights, height)
   355  	}
   356  
   357  	// create list of all the block IDs blocks that are missing
   358  	var blockIDs []flow.Identifier
   359  	for blockID, status := range c.blockIDs {
   360  
   361  		// if the last request is young enough, skip
   362  		retryAfter := status.Requested.Add(c.Config.RetryInterval << status.Attempts)
   363  		if now.Before(retryAfter) {
   364  			continue
   365  		}
   366  
   367  		// if we've already received this block, skip
   368  		if status.WasReceived() {
   369  			continue
   370  		}
   371  
   372  		// if we reached the maximum number of attempts for a queue item, drop
   373  		if status.Attempts >= c.Config.MaxAttempts {
   374  			delete(c.blockIDs, blockID)
   375  			continue
   376  		}
   377  
   378  		// otherwise, append to blockIDs to be requested
   379  		blockIDs = append(blockIDs, blockID)
   380  	}
   381  
   382  	return heights, blockIDs
   383  }
   384  
   385  // RangeRequested updates status state for a range of block heights that has
   386  // been successfully requested. Must be called when a range request is submitted.
   387  func (c *Core) RangeRequested(ran chainsync.Range) {
   388  	c.mu.Lock()
   389  	defer c.mu.Unlock()
   390  	c.metrics.RangeRequested(ran)
   391  
   392  	for height := ran.From; height <= ran.To; height++ {
   393  		status, exists := c.heights[height]
   394  		if !exists {
   395  			return
   396  		}
   397  		status.Requested = time.Now()
   398  		status.Attempts++
   399  	}
   400  }
   401  
   402  // BatchRequested updates status state for a batch of block IDs that has been
   403  // successfully requested. Must be called when a batch request is submitted.
   404  func (c *Core) BatchRequested(batch chainsync.Batch) {
   405  	c.mu.Lock()
   406  	defer c.mu.Unlock()
   407  	c.metrics.BatchRequested(batch)
   408  
   409  	for _, blockID := range batch.BlockIDs {
   410  		status, exists := c.blockIDs[blockID]
   411  		if !exists {
   412  			return
   413  		}
   414  		status.Requested = time.Now()
   415  		status.Attempts++
   416  	}
   417  }
   418  
   419  // getRanges returns a set of ranges of heights that can be used as range
   420  // requests.
   421  func (c *Core) getRanges(heights []uint64) []chainsync.Range {
   422  
   423  	// sort the heights so we can build contiguous ranges more easily
   424  	sort.Slice(heights, func(i int, j int) bool {
   425  		return heights[i] < heights[j]
   426  	})
   427  
   428  	// build contiguous height ranges with maximum batch size
   429  	start := uint64(0)
   430  	end := uint64(0)
   431  	var ranges []chainsync.Range
   432  	for index, height := range heights {
   433  
   434  		// on the first iteration, we set the start pointer, so we don't need to
   435  		// guard the for loop when heights is empty
   436  		if index == 0 {
   437  			start = height
   438  		}
   439  
   440  		// we always forward the end pointer to the new height
   441  		end = height
   442  
   443  		// if we have the end of the loop, we always create one final range
   444  		if index >= len(heights)-1 {
   445  			r := chainsync.Range{From: start, To: end}
   446  			ranges = append(ranges, r)
   447  			break
   448  		}
   449  
   450  		// at this point, we will have a next height as iteration will continue
   451  		nextHeight := heights[index+1]
   452  
   453  		// if we have reached the maximum size for a range, we create the range
   454  		// and forward the start pointer to the next height
   455  		rangeSize := end - start + 1
   456  		if rangeSize >= uint64(c.Config.MaxSize) {
   457  			r := chainsync.Range{From: start, To: end}
   458  			ranges = append(ranges, r)
   459  			start = nextHeight
   460  			continue
   461  		}
   462  
   463  		// if end is more than one smaller than the next height, we have a gap
   464  		// next, so we create a range and forward the start pointer
   465  		if nextHeight > end+1 {
   466  			r := chainsync.Range{From: start, To: end}
   467  			ranges = append(ranges, r)
   468  			start = nextHeight
   469  			continue
   470  		}
   471  	}
   472  
   473  	return ranges
   474  }
   475  
   476  // getBatches returns a set of batches that can be used in batch requests.
   477  func (c *Core) getBatches(blockIDs []flow.Identifier) []chainsync.Batch {
   478  
   479  	var batches []chainsync.Batch
   480  	// split the block IDs into maximum sized requests
   481  	for from := 0; from < len(blockIDs); from += int(c.Config.MaxSize) {
   482  
   483  		// make sure last range is not out of bounds
   484  		to := from + int(c.Config.MaxSize)
   485  		if to > len(blockIDs) {
   486  			to = len(blockIDs)
   487  		}
   488  
   489  		// create the block IDs slice
   490  		requestIDs := blockIDs[from:to]
   491  		batch := chainsync.Batch{
   492  			BlockIDs: requestIDs,
   493  		}
   494  		batches = append(batches, batch)
   495  	}
   496  
   497  	return batches
   498  }
   499  
   500  // selectRequests selects which requests should be submitted, given a set of
   501  // candidate range and batch requests. Range requests are given precedence and
   502  // the total number of requests does not exceed the configured request maximum.
   503  func (c *Core) selectRequests(ranges []chainsync.Range, batches []chainsync.Batch) ([]chainsync.Range, []chainsync.Batch) {
   504  	max := int(c.Config.MaxRequests)
   505  
   506  	if len(ranges) >= max {
   507  		return ranges[:max], nil
   508  	}
   509  	if len(ranges)+len(batches) >= max {
   510  		return ranges, batches[:max-len(ranges)]
   511  	}
   512  	return ranges, batches
   513  }