github.com/prysmaticlabs/prysm@v1.4.4/beacon-chain/sync/initial-sync/blocks_fetcher.go (about) 1 package initialsync 2 3 import ( 4 "context" 5 "fmt" 6 "sync" 7 "time" 8 9 "github.com/kevinms/leakybucket-go" 10 "github.com/libp2p/go-libp2p-core/peer" 11 "github.com/pkg/errors" 12 types "github.com/prysmaticlabs/eth2-types" 13 "github.com/prysmaticlabs/prysm/beacon-chain/db" 14 "github.com/prysmaticlabs/prysm/beacon-chain/p2p" 15 p2pTypes "github.com/prysmaticlabs/prysm/beacon-chain/p2p/types" 16 prysmsync "github.com/prysmaticlabs/prysm/beacon-chain/sync" 17 "github.com/prysmaticlabs/prysm/cmd/beacon-chain/flags" 18 p2ppb "github.com/prysmaticlabs/prysm/proto/beacon/p2p/v1" 19 "github.com/prysmaticlabs/prysm/proto/interfaces" 20 "github.com/prysmaticlabs/prysm/shared/params" 21 "github.com/prysmaticlabs/prysm/shared/rand" 22 "github.com/sirupsen/logrus" 23 "go.opencensus.io/trace" 24 ) 25 26 const ( 27 // maxPendingRequests limits how many concurrent fetch request one can initiate. 28 maxPendingRequests = 64 29 // peersPercentagePerRequest caps percentage of peers to be used in a request. 30 peersPercentagePerRequest = 0.75 31 // handshakePollingInterval is a polling interval for checking the number of received handshakes. 32 handshakePollingInterval = 5 * time.Second 33 // peerLocksPollingInterval is a polling interval for checking if there are stale peer locks. 34 peerLocksPollingInterval = 5 * time.Minute 35 // peerLockMaxAge is maximum time before stale lock is purged. 36 peerLockMaxAge = 60 * time.Minute 37 // nonSkippedSlotsFullSearchEpochs how many epochs to check in full, before resorting to random 38 // sampling of slots once per epoch 39 nonSkippedSlotsFullSearchEpochs = 10 40 // peerFilterCapacityWeight defines how peer's capacity affects peer's score. Provided as 41 // percentage, i.e. 0.3 means capacity will determine 30% of peer's score. 42 peerFilterCapacityWeight = 0.2 43 // backtrackingMaxHops how many hops (during search for common ancestor in backtracking) to do 44 // before giving up. 45 backtrackingMaxHops = 128 46 ) 47 48 var ( 49 errNoPeersAvailable = errors.New("no peers available, waiting for reconnect") 50 errFetcherCtxIsDone = errors.New("fetcher's context is done, reinitialize") 51 errSlotIsTooHigh = errors.New("slot is higher than the finalized slot") 52 errBlockAlreadyProcessed = errors.New("block is already processed") 53 errParentDoesNotExist = errors.New("beacon node doesn't have a parent in db with root") 54 errNoPeersWithAltBlocks = errors.New("no peers with alternative blocks found") 55 ) 56 57 // blocksFetcherConfig is a config to setup the block fetcher. 58 type blocksFetcherConfig struct { 59 chain blockchainService 60 p2p p2p.P2P 61 db db.ReadOnlyDatabase 62 peerFilterCapacityWeight float64 63 mode syncMode 64 } 65 66 // blocksFetcher is a service to fetch chain data from peers. 67 // On an incoming requests, requested block range is evenly divided 68 // among available peers (for fair network load distribution). 69 type blocksFetcher struct { 70 sync.Mutex 71 ctx context.Context 72 cancel context.CancelFunc 73 rand *rand.Rand 74 chain blockchainService 75 p2p p2p.P2P 76 db db.ReadOnlyDatabase 77 blocksPerSecond uint64 78 rateLimiter *leakybucket.Collector 79 peerLocks map[peer.ID]*peerLock 80 fetchRequests chan *fetchRequestParams 81 fetchResponses chan *fetchRequestResponse 82 capacityWeight float64 // how remaining capacity affects peer selection 83 mode syncMode // allows to use fetcher in different sync scenarios 84 quit chan struct{} // termination notifier 85 } 86 87 // peerLock restricts fetcher actions on per peer basis. Currently, used for rate limiting. 88 type peerLock struct { 89 sync.Mutex 90 accessed time.Time 91 } 92 93 // fetchRequestParams holds parameters necessary to schedule a fetch request. 94 type fetchRequestParams struct { 95 ctx context.Context // if provided, it is used instead of global fetcher's context 96 start types.Slot // starting slot 97 count uint64 // how many slots to receive (fetcher may return fewer slots) 98 } 99 100 // fetchRequestResponse is a combined type to hold results of both successful executions and errors. 101 // Valid usage pattern will be to check whether result's `err` is nil, before using `blocks`. 102 type fetchRequestResponse struct { 103 pid peer.ID 104 start types.Slot 105 count uint64 106 blocks []interfaces.SignedBeaconBlock 107 err error 108 } 109 110 // newBlocksFetcher creates ready to use fetcher. 111 func newBlocksFetcher(ctx context.Context, cfg *blocksFetcherConfig) *blocksFetcher { 112 blocksPerSecond := flags.Get().BlockBatchLimit 113 allowedBlocksBurst := flags.Get().BlockBatchLimitBurstFactor * flags.Get().BlockBatchLimit 114 // Allow fetcher to go almost to the full burst capacity (less a single batch). 115 rateLimiter := leakybucket.NewCollector( 116 float64(blocksPerSecond), int64(allowedBlocksBurst-blocksPerSecond), 117 false /* deleteEmptyBuckets */) 118 119 capacityWeight := cfg.peerFilterCapacityWeight 120 if capacityWeight >= 1 { 121 capacityWeight = peerFilterCapacityWeight 122 } 123 124 ctx, cancel := context.WithCancel(ctx) 125 return &blocksFetcher{ 126 ctx: ctx, 127 cancel: cancel, 128 rand: rand.NewGenerator(), 129 chain: cfg.chain, 130 p2p: cfg.p2p, 131 db: cfg.db, 132 blocksPerSecond: uint64(blocksPerSecond), 133 rateLimiter: rateLimiter, 134 peerLocks: make(map[peer.ID]*peerLock), 135 fetchRequests: make(chan *fetchRequestParams, maxPendingRequests), 136 fetchResponses: make(chan *fetchRequestResponse, maxPendingRequests), 137 capacityWeight: capacityWeight, 138 mode: cfg.mode, 139 quit: make(chan struct{}), 140 } 141 } 142 143 // start boots up the fetcher, which starts listening for incoming fetch requests. 144 func (f *blocksFetcher) start() error { 145 select { 146 case <-f.ctx.Done(): 147 return errFetcherCtxIsDone 148 default: 149 go f.loop() 150 return nil 151 } 152 } 153 154 // stop terminates all fetcher operations. 155 func (f *blocksFetcher) stop() { 156 defer func() { 157 if f.rateLimiter != nil { 158 f.rateLimiter.Free() 159 f.rateLimiter = nil 160 } 161 }() 162 f.cancel() 163 <-f.quit // make sure that loop() is done 164 } 165 166 // requestResponses exposes a channel into which fetcher pushes generated request responses. 167 func (f *blocksFetcher) requestResponses() <-chan *fetchRequestResponse { 168 return f.fetchResponses 169 } 170 171 // loop is a main fetcher loop, listens for incoming requests/cancellations, forwards outgoing responses. 172 func (f *blocksFetcher) loop() { 173 defer close(f.quit) 174 175 // Wait for all loop's goroutines to finish, and safely release resources. 176 wg := &sync.WaitGroup{} 177 defer func() { 178 wg.Wait() 179 close(f.fetchResponses) 180 }() 181 182 // Periodically remove stale peer locks. 183 go func() { 184 ticker := time.NewTicker(peerLocksPollingInterval) 185 defer ticker.Stop() 186 for { 187 select { 188 case <-ticker.C: 189 f.removeStalePeerLocks(peerLockMaxAge) 190 case <-f.ctx.Done(): 191 return 192 } 193 } 194 }() 195 196 // Main loop. 197 for { 198 // Make sure there is are available peers before processing requests. 199 if _, err := f.waitForMinimumPeers(f.ctx); err != nil { 200 log.Error(err) 201 } 202 203 select { 204 case <-f.ctx.Done(): 205 log.Debug("Context closed, exiting goroutine (blocks fetcher)") 206 return 207 case req := <-f.fetchRequests: 208 wg.Add(1) 209 go func() { 210 defer wg.Done() 211 select { 212 case <-f.ctx.Done(): 213 case f.fetchResponses <- f.handleRequest(req.ctx, req.start, req.count): 214 } 215 }() 216 } 217 } 218 } 219 220 // scheduleRequest adds request to incoming queue. 221 func (f *blocksFetcher) scheduleRequest(ctx context.Context, start types.Slot, count uint64) error { 222 if ctx.Err() != nil { 223 return ctx.Err() 224 } 225 226 request := &fetchRequestParams{ 227 ctx: ctx, 228 start: start, 229 count: count, 230 } 231 select { 232 case <-f.ctx.Done(): 233 return errFetcherCtxIsDone 234 case f.fetchRequests <- request: 235 } 236 return nil 237 } 238 239 // handleRequest parses fetch request and forwards it to response builder. 240 func (f *blocksFetcher) handleRequest(ctx context.Context, start types.Slot, count uint64) *fetchRequestResponse { 241 ctx, span := trace.StartSpan(ctx, "initialsync.handleRequest") 242 defer span.End() 243 244 response := &fetchRequestResponse{ 245 start: start, 246 count: count, 247 blocks: []interfaces.SignedBeaconBlock{}, 248 err: nil, 249 } 250 251 if ctx.Err() != nil { 252 response.err = ctx.Err() 253 return response 254 } 255 256 _, targetEpoch, peers := f.calculateHeadAndTargetEpochs() 257 if len(peers) == 0 { 258 response.err = errNoPeersAvailable 259 return response 260 } 261 262 // Short circuit start far exceeding the highest finalized epoch in some infinite loop. 263 if f.mode == modeStopOnFinalizedEpoch { 264 highestFinalizedSlot := params.BeaconConfig().SlotsPerEpoch.Mul(uint64(targetEpoch + 1)) 265 if start > highestFinalizedSlot { 266 response.err = fmt.Errorf("%w, slot: %d, highest finalized slot: %d", 267 errSlotIsTooHigh, start, highestFinalizedSlot) 268 return response 269 } 270 } 271 272 response.blocks, response.pid, response.err = f.fetchBlocksFromPeer(ctx, start, count, peers) 273 return response 274 } 275 276 // fetchBlocksFromPeer fetches blocks from a single randomly selected peer. 277 func (f *blocksFetcher) fetchBlocksFromPeer( 278 ctx context.Context, 279 start types.Slot, count uint64, 280 peers []peer.ID, 281 ) ([]interfaces.SignedBeaconBlock, peer.ID, error) { 282 ctx, span := trace.StartSpan(ctx, "initialsync.fetchBlocksFromPeer") 283 defer span.End() 284 285 peers = f.filterPeers(ctx, peers, peersPercentagePerRequest) 286 req := &p2ppb.BeaconBlocksByRangeRequest{ 287 StartSlot: start, 288 Count: count, 289 Step: 1, 290 } 291 for i := 0; i < len(peers); i++ { 292 if blocks, err := f.requestBlocks(ctx, req, peers[i]); err == nil { 293 f.p2p.Peers().Scorers().BlockProviderScorer().Touch(peers[i]) 294 return blocks, peers[i], err 295 } 296 } 297 return nil, "", errNoPeersAvailable 298 } 299 300 // requestBlocks is a wrapper for handling BeaconBlocksByRangeRequest requests/streams. 301 func (f *blocksFetcher) requestBlocks( 302 ctx context.Context, 303 req *p2ppb.BeaconBlocksByRangeRequest, 304 pid peer.ID, 305 ) ([]interfaces.SignedBeaconBlock, error) { 306 if ctx.Err() != nil { 307 return nil, ctx.Err() 308 } 309 l := f.peerLock(pid) 310 l.Lock() 311 log.WithFields(logrus.Fields{ 312 "peer": pid, 313 "start": req.StartSlot, 314 "count": req.Count, 315 "step": req.Step, 316 "capacity": f.rateLimiter.Remaining(pid.String()), 317 "score": f.p2p.Peers().Scorers().BlockProviderScorer().FormatScorePretty(pid), 318 }).Debug("Requesting blocks") 319 if f.rateLimiter.Remaining(pid.String()) < int64(req.Count) { 320 if err := f.waitForBandwidth(pid); err != nil { 321 return nil, err 322 } 323 } 324 f.rateLimiter.Add(pid.String(), int64(req.Count)) 325 l.Unlock() 326 return prysmsync.SendBeaconBlocksByRangeRequest(ctx, f.chain, f.p2p, pid, req, nil) 327 } 328 329 // requestBlocksByRoot is a wrapper for handling BeaconBlockByRootsReq requests/streams. 330 func (f *blocksFetcher) requestBlocksByRoot( 331 ctx context.Context, 332 req *p2pTypes.BeaconBlockByRootsReq, 333 pid peer.ID, 334 ) ([]interfaces.SignedBeaconBlock, error) { 335 if ctx.Err() != nil { 336 return nil, ctx.Err() 337 } 338 l := f.peerLock(pid) 339 l.Lock() 340 log.WithFields(logrus.Fields{ 341 "peer": pid, 342 "numRoots": len(*req), 343 "capacity": f.rateLimiter.Remaining(pid.String()), 344 "score": f.p2p.Peers().Scorers().BlockProviderScorer().FormatScorePretty(pid), 345 }).Debug("Requesting blocks (by roots)") 346 if f.rateLimiter.Remaining(pid.String()) < int64(len(*req)) { 347 if err := f.waitForBandwidth(pid); err != nil { 348 return nil, err 349 } 350 } 351 f.rateLimiter.Add(pid.String(), int64(len(*req))) 352 l.Unlock() 353 354 return prysmsync.SendBeaconBlocksByRootRequest(ctx, f.chain, f.p2p, pid, req, nil) 355 } 356 357 // waitForBandwidth blocks up until peer's bandwidth is restored. 358 func (f *blocksFetcher) waitForBandwidth(pid peer.ID) error { 359 log.WithField("peer", pid).Debug("Slowing down for rate limit") 360 timer := time.NewTimer(f.rateLimiter.TillEmpty(pid.String())) 361 defer timer.Stop() 362 select { 363 case <-f.ctx.Done(): 364 return errFetcherCtxIsDone 365 case <-timer.C: 366 // Peer has gathered enough capacity to be polled again. 367 } 368 return nil 369 }