github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/chainsync/core.go (about) 1 package chainsync 2 3 import ( 4 "fmt" 5 "sort" 6 "sync" 7 "time" 8 9 "github.com/rs/zerolog" 10 11 "github.com/onflow/flow-go/model/chainsync" 12 "github.com/onflow/flow-go/model/flow" 13 "github.com/onflow/flow-go/module" 14 ) 15 16 const ( 17 // DefaultPollNodes is the default number of nodes we send a message to on 18 // each poll interval. 19 DefaultPollNodes uint = 3 20 21 // DefaultBlockRequestNodes is the default number of nodes we request a 22 // block resource from. 23 DefaultBlockRequestNodes uint = 3 24 25 // DefaultQueuedHeightMultiplicity limits the number of heights we queue 26 // above the current finalized height. 27 DefaultQueuedHeightMultiplicity uint = 4 28 ) 29 30 type Config struct { 31 RetryInterval time.Duration // the initial interval before we retry a request, uses exponential backoff 32 Tolerance uint // determines how big of a difference in block heights we tolerated before actively syncing with range requests 33 MaxAttempts uint // the maximum number of attempts we make for each requested block/height before discarding 34 MaxSize uint // the maximum number of blocks we request in the same block request message 35 MaxRequests uint // the maximum number of requests we send during each scanning period 36 } 37 38 func DefaultConfig() Config { 39 return Config{ 40 RetryInterval: 4 * time.Second, 41 Tolerance: 10, 42 MaxAttempts: 5, 43 MaxSize: 64, 44 MaxRequests: 3, 45 } 46 } 47 48 // Core contains core logic, configuration, and state for chain state 49 // synchronization. It is generic to chain type, so it works for both consensus 50 // and collection nodes. 51 // 52 // Core should be wrapped by a type-aware engine that manages the specifics of 53 // each chain. Example: https://github.com/onflow/flow-go/blob/master/engine/common/synchronization/engine.go 54 // 55 // Core is safe for concurrent use by multiple goroutines. 56 type Core struct { 57 log zerolog.Logger 58 Config Config 59 mu sync.Mutex 60 heights map[uint64]*chainsync.Status 61 blockIDs map[flow.Identifier]*chainsync.Status 62 metrics module.ChainSyncMetrics 63 localFinalizedHeight uint64 64 } 65 66 func New(log zerolog.Logger, config Config, metrics module.ChainSyncMetrics, chainID flow.ChainID) (*Core, error) { 67 core := &Core{ 68 log: log.With().Str("sync_core", chainID.String()).Logger(), 69 Config: config, 70 heights: make(map[uint64]*chainsync.Status), 71 blockIDs: make(map[flow.Identifier]*chainsync.Status), 72 metrics: metrics, 73 localFinalizedHeight: 0, 74 } 75 return core, nil 76 } 77 78 // HandleBlock handles receiving a new block from another node. It returns 79 // true if the block should be processed by the compliance layer and false 80 // if it should be ignored. 81 func (c *Core) HandleBlock(header *flow.Header) bool { 82 log := c.log 83 if c.log.Debug().Enabled() { 84 log = c.log.With().Str("block_id", header.ID().String()).Uint64("block_height", header.Height).Logger() 85 } 86 c.mu.Lock() 87 defer c.mu.Unlock() 88 89 status := c.getRequestStatus(header.Height, header.ID()) 90 91 // if we never asked for this block, discard it 92 if !status.WasQueued() { 93 log.Debug().Msg("discarding not queued block") 94 return false 95 } 96 // if we have already received this block, exit 97 if status.WasReceived() { 98 log.Debug().Msg("discarding not received block") 99 return false 100 } 101 102 // this is a new block, remember that we've seen it 103 status.Header = header 104 status.Received = time.Now() 105 106 // track it by ID and by height so we don't accidentally request it again 107 c.blockIDs[header.ID()] = status 108 c.heights[header.Height] = status 109 110 log.Debug().Msg("handled block") 111 return true 112 } 113 114 // HandleHeight handles receiving a new highest finalized height from another node. 115 // If the height difference between local and the reported height is outside tolerance, we do nothing. 116 // Otherwise, we queue each missing height. 117 func (c *Core) HandleHeight(final *flow.Header, height uint64) { 118 log := c.log.With().Uint64("final_height", final.Height).Uint64("recv_height", height).Logger() 119 log.Debug().Msg("received height") 120 // don't bother queueing anything if we're within tolerance 121 if c.WithinTolerance(final, height) { 122 log.Debug().Msg("height within tolerance - discarding") 123 return 124 } 125 126 // if we are sufficiently behind, we want to sync the missing blocks 127 if height > final.Height { 128 c.mu.Lock() 129 defer c.mu.Unlock() 130 131 // limit to request up to DefaultQueuedHeightMultiplicity*MaxRequests*MaxSize blocks from the peer. 132 // without this limit, then if we are falling far behind, 133 // we would queue up too many heights. 134 heightLimit := final.Height + uint64(DefaultQueuedHeightMultiplicity*c.Config.MaxRequests*c.Config.MaxSize) 135 if height > heightLimit { 136 height = heightLimit 137 } 138 139 for h := final.Height + 1; h <= height; h++ { 140 c.requeueHeight(h) 141 } 142 log.Debug().Msgf("requeued heights [%d-%d]", final.Height+1, height) 143 } 144 } 145 146 func (c *Core) RequestBlock(blockID flow.Identifier, height uint64) { 147 log := c.log.With().Str("block_id", blockID.String()).Uint64("height", height).Logger() 148 // requesting a block by its ID storing the height to prune more efficiently 149 c.mu.Lock() 150 defer c.mu.Unlock() 151 152 // if we already received this block, reset the status so we can re-queue 153 status := c.blockIDs[blockID] 154 if status.WasReceived() { 155 log.Debug().Msgf("requested block was already received") 156 delete(c.blockIDs, status.Header.ID()) 157 delete(c.heights, status.Header.Height) 158 } 159 160 c.queueByBlockID(blockID, height) 161 log.Debug().Msgf("enqueued requested block") 162 } 163 164 func (c *Core) RequestHeight(height uint64) { 165 c.mu.Lock() 166 defer c.mu.Unlock() 167 168 c.requeueHeight(height) 169 c.log.Debug().Uint64("height", height).Msg("enqueued requested height") 170 } 171 172 // requeueHeight queues the given height, ignoring any previously received 173 // blocks at that height 174 func (c *Core) requeueHeight(height uint64) { 175 // if we already received this block, reset the status so we can re-queue 176 status := c.heights[height] 177 if status.WasReceived() { 178 delete(c.blockIDs, status.Header.ID()) 179 delete(c.heights, status.Header.Height) 180 } 181 182 c.queueByHeight(height) 183 } 184 185 // ScanPending scans all pending block statuses for blocks that should be 186 // requested. It apportions requestable items into range and batch requests 187 // according to configured maximums, giving precedence to range requests. 188 func (c *Core) ScanPending(final *flow.Header) ([]chainsync.Range, []chainsync.Batch) { 189 c.mu.Lock() 190 defer c.mu.Unlock() 191 192 log := c.log.With().Uint64("final_height", final.Height).Logger() 193 194 // prune if the current height is less than the new height 195 c.prune(final) 196 197 // get all items that are eligible for initial or re-requesting 198 heights, blockIDs := c.getRequestableItems() 199 c.log.Debug().Msgf("scan found %d requestable heights, %d requestable block IDs", len(heights), len(blockIDs)) 200 201 // convert to valid range and batch requests 202 ranges := c.getRanges(heights) 203 batches := c.getBatches(blockIDs) 204 log.Debug().Str("ranges", fmt.Sprintf("%v", ranges)).Str("batches", fmt.Sprintf("%v", batches)).Msg("compiled range and batch requests") 205 206 return c.selectRequests(ranges, batches) 207 } 208 209 // WithinTolerance returns whether or not the given height is within configured 210 // height tolerance, wrt the given local finalized header. 211 func (c *Core) WithinTolerance(final *flow.Header, height uint64) bool { 212 213 lower := final.Height - uint64(c.Config.Tolerance) 214 if lower > final.Height { // underflow check 215 lower = 0 216 } 217 upper := final.Height + uint64(c.Config.Tolerance) 218 219 return height >= lower && height <= upper 220 } 221 222 // queueByHeight queues a request for the finalized block at the given height, 223 // only if no equivalent request has been queued before. 224 func (c *Core) queueByHeight(height uint64) { 225 // do not queue the block if the height is lower or the same as the local finalized height 226 // the check != 0 is necessary or we will never queue blocks at height 0 227 if height <= c.localFinalizedHeight && c.localFinalizedHeight != 0 { 228 return 229 } 230 231 // only queue the request if have never queued it before 232 if c.heights[height].WasQueued() { 233 return 234 } 235 236 // queue the request 237 c.heights[height] = chainsync.NewQueuedStatus(height) 238 } 239 240 // queueByBlockID queues a request for a block by block ID, only if no 241 // equivalent request has been queued before. 242 func (c *Core) queueByBlockID(blockID flow.Identifier, height uint64) { 243 // do not queue the block if the height is lower or the same as the local finalized height 244 // the check != 0 is necessary or we will never queue blocks at height 0 245 if height <= c.localFinalizedHeight && c.localFinalizedHeight != 0 { 246 return 247 } 248 249 // only queue the request if have never queued it before 250 if c.blockIDs[blockID].WasQueued() { 251 return 252 } 253 254 // queue the request 255 c.blockIDs[blockID] = chainsync.NewQueuedStatus(height) 256 } 257 258 // getRequestStatus retrieves a request status for a block, regardless of 259 // whether it was queued by height or by block ID. 260 func (c *Core) getRequestStatus(height uint64, blockID flow.Identifier) *chainsync.Status { 261 heightStatus := c.heights[height] 262 idStatus := c.blockIDs[blockID] 263 264 if idStatus.WasQueued() { 265 return idStatus 266 } 267 // Only return the height status if there is no matching status for the ID 268 if heightStatus.WasQueued() { 269 return heightStatus 270 } 271 272 return nil 273 } 274 275 // prune removes any pending requests which we have received and which is below 276 // the finalized height, or which we received sufficiently long ago. 277 func (c *Core) prune(final *flow.Header) { 278 if c.localFinalizedHeight >= final.Height { 279 return 280 } 281 282 c.localFinalizedHeight = final.Height 283 284 // track how many statuses we are pruning 285 initialHeights := len(c.heights) 286 initialBlockIDs := len(c.blockIDs) 287 288 for height, status := range c.heights { 289 if height <= final.Height { 290 delete(c.heights, height) 291 c.metrics.PrunedBlockByHeight(status) 292 } 293 } 294 295 for blockID, status := range c.blockIDs { 296 if status.BlockHeight <= final.Height { 297 delete(c.blockIDs, blockID) 298 c.metrics.PrunedBlockById(status) 299 } 300 } 301 302 currentHeights := len(c.heights) 303 currentBlockIDs := len(c.blockIDs) 304 305 prunedHeights := initialHeights - currentHeights 306 prunedBlockIDs := initialBlockIDs - currentBlockIDs 307 308 c.metrics.PrunedBlocks(prunedHeights, prunedBlockIDs, currentHeights, currentBlockIDs) 309 310 c.log.Debug(). 311 Uint64("final_height", final.Height). 312 Msgf("pruned %d heights, %d block IDs", prunedHeights, prunedBlockIDs) 313 } 314 315 func (c *Core) Prune(final *flow.Header) { 316 c.mu.Lock() 317 defer c.mu.Unlock() 318 c.prune(final) 319 } 320 321 // getRequestableItems will find all block IDs and heights that are eligible 322 // to be requested. 323 func (c *Core) getRequestableItems() ([]uint64, []flow.Identifier) { 324 325 // TODO: we will probably want to limit the maximum amount of in-flight 326 // requests and maximum amount of blocks requested at the same time here; 327 // for now, we just ignore that problem, but once we do, we should always 328 // prioritize range requests over batch requests 329 330 now := time.Now() 331 332 // create a list of all height requests that should be sent 333 var heights []uint64 334 for height, status := range c.heights { 335 336 // if the last request is young enough, skip 337 retryAfter := status.Requested.Add(c.Config.RetryInterval << status.Attempts) 338 if now.Before(retryAfter) { 339 continue 340 } 341 342 // if we've already received this block, skip 343 if status.WasReceived() { 344 continue 345 } 346 347 // if we reached maximum number of attempts, delete 348 if status.Attempts >= c.Config.MaxAttempts { 349 delete(c.heights, height) 350 continue 351 } 352 353 // otherwise, append to heights to be requested 354 heights = append(heights, height) 355 } 356 357 // create list of all the block IDs blocks that are missing 358 var blockIDs []flow.Identifier 359 for blockID, status := range c.blockIDs { 360 361 // if the last request is young enough, skip 362 retryAfter := status.Requested.Add(c.Config.RetryInterval << status.Attempts) 363 if now.Before(retryAfter) { 364 continue 365 } 366 367 // if we've already received this block, skip 368 if status.WasReceived() { 369 continue 370 } 371 372 // if we reached the maximum number of attempts for a queue item, drop 373 if status.Attempts >= c.Config.MaxAttempts { 374 delete(c.blockIDs, blockID) 375 continue 376 } 377 378 // otherwise, append to blockIDs to be requested 379 blockIDs = append(blockIDs, blockID) 380 } 381 382 return heights, blockIDs 383 } 384 385 // RangeRequested updates status state for a range of block heights that has 386 // been successfully requested. Must be called when a range request is submitted. 387 func (c *Core) RangeRequested(ran chainsync.Range) { 388 c.mu.Lock() 389 defer c.mu.Unlock() 390 c.metrics.RangeRequested(ran) 391 392 for height := ran.From; height <= ran.To; height++ { 393 status, exists := c.heights[height] 394 if !exists { 395 return 396 } 397 status.Requested = time.Now() 398 status.Attempts++ 399 } 400 } 401 402 // BatchRequested updates status state for a batch of block IDs that has been 403 // successfully requested. Must be called when a batch request is submitted. 404 func (c *Core) BatchRequested(batch chainsync.Batch) { 405 c.mu.Lock() 406 defer c.mu.Unlock() 407 c.metrics.BatchRequested(batch) 408 409 for _, blockID := range batch.BlockIDs { 410 status, exists := c.blockIDs[blockID] 411 if !exists { 412 return 413 } 414 status.Requested = time.Now() 415 status.Attempts++ 416 } 417 } 418 419 // getRanges returns a set of ranges of heights that can be used as range 420 // requests. 421 func (c *Core) getRanges(heights []uint64) []chainsync.Range { 422 423 // sort the heights so we can build contiguous ranges more easily 424 sort.Slice(heights, func(i int, j int) bool { 425 return heights[i] < heights[j] 426 }) 427 428 // build contiguous height ranges with maximum batch size 429 start := uint64(0) 430 end := uint64(0) 431 var ranges []chainsync.Range 432 for index, height := range heights { 433 434 // on the first iteration, we set the start pointer, so we don't need to 435 // guard the for loop when heights is empty 436 if index == 0 { 437 start = height 438 } 439 440 // we always forward the end pointer to the new height 441 end = height 442 443 // if we have the end of the loop, we always create one final range 444 if index >= len(heights)-1 { 445 r := chainsync.Range{From: start, To: end} 446 ranges = append(ranges, r) 447 break 448 } 449 450 // at this point, we will have a next height as iteration will continue 451 nextHeight := heights[index+1] 452 453 // if we have reached the maximum size for a range, we create the range 454 // and forward the start pointer to the next height 455 rangeSize := end - start + 1 456 if rangeSize >= uint64(c.Config.MaxSize) { 457 r := chainsync.Range{From: start, To: end} 458 ranges = append(ranges, r) 459 start = nextHeight 460 continue 461 } 462 463 // if end is more than one smaller than the next height, we have a gap 464 // next, so we create a range and forward the start pointer 465 if nextHeight > end+1 { 466 r := chainsync.Range{From: start, To: end} 467 ranges = append(ranges, r) 468 start = nextHeight 469 continue 470 } 471 } 472 473 return ranges 474 } 475 476 // getBatches returns a set of batches that can be used in batch requests. 477 func (c *Core) getBatches(blockIDs []flow.Identifier) []chainsync.Batch { 478 479 var batches []chainsync.Batch 480 // split the block IDs into maximum sized requests 481 for from := 0; from < len(blockIDs); from += int(c.Config.MaxSize) { 482 483 // make sure last range is not out of bounds 484 to := from + int(c.Config.MaxSize) 485 if to > len(blockIDs) { 486 to = len(blockIDs) 487 } 488 489 // create the block IDs slice 490 requestIDs := blockIDs[from:to] 491 batch := chainsync.Batch{ 492 BlockIDs: requestIDs, 493 } 494 batches = append(batches, batch) 495 } 496 497 return batches 498 } 499 500 // selectRequests selects which requests should be submitted, given a set of 501 // candidate range and batch requests. Range requests are given precedence and 502 // the total number of requests does not exceed the configured request maximum. 503 func (c *Core) selectRequests(ranges []chainsync.Range, batches []chainsync.Batch) ([]chainsync.Range, []chainsync.Batch) { 504 max := int(c.Config.MaxRequests) 505 506 if len(ranges) >= max { 507 return ranges[:max], nil 508 } 509 if len(ranges)+len(batches) >= max { 510 return ranges, batches[:max-len(ranges)] 511 } 512 return ranges, batches 513 }