github.1485827954.workers.dev/ethereum/go-ethereum@v1.14.3/eth/downloader/fetchers_concurrent.go (about) 1 // Copyright 2021 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package downloader 18 19 import ( 20 "errors" 21 "sort" 22 "time" 23 24 "github.com/ethereum/go-ethereum/common" 25 "github.com/ethereum/go-ethereum/common/prque" 26 "github.com/ethereum/go-ethereum/eth/protocols/eth" 27 "github.com/ethereum/go-ethereum/log" 28 ) 29 30 // timeoutGracePeriod is the amount of time to allow for a peer to deliver a 31 // response to a locally already timed out request. Timeouts are not penalized 32 // as a peer might be temporarily overloaded, however, they still must reply 33 // to each request. Failing to do so is considered a protocol violation. 34 var timeoutGracePeriod = 2 * time.Minute 35 36 // typedQueue is an interface defining the adaptor needed to translate the type 37 // specific downloader/queue schedulers into the type-agnostic general concurrent 38 // fetcher algorithm calls. 39 type typedQueue interface { 40 // waker returns a notification channel that gets pinged in case more fetches 41 // have been queued up, so the fetcher might assign it to idle peers. 42 waker() chan bool 43 44 // pending returns the number of wrapped items that are currently queued for 45 // fetching by the concurrent downloader. 46 pending() int 47 48 // capacity is responsible for calculating how many items of the abstracted 49 // type a particular peer is estimated to be able to retrieve within the 50 // allotted round trip time. 51 capacity(peer *peerConnection, rtt time.Duration) int 52 53 // updateCapacity is responsible for updating how many items of the abstracted 54 // type a particular peer is estimated to be able to retrieve in a unit time. 55 updateCapacity(peer *peerConnection, items int, elapsed time.Duration) 56 57 // reserve is responsible for allocating a requested number of pending items 58 // from the download queue to the specified peer. 59 reserve(peer *peerConnection, items int) (*fetchRequest, bool, bool) 60 61 // unreserve is responsible for removing the current retrieval allocation 62 // assigned to a specific peer and placing it back into the pool to allow 63 // reassigning to some other peer. 64 unreserve(peer string) int 65 66 // request is responsible for converting a generic fetch request into a typed 67 // one and sending it to the remote peer for fulfillment. 68 request(peer *peerConnection, req *fetchRequest, resCh chan *eth.Response) (*eth.Request, error) 69 70 // deliver is responsible for taking a generic response packet from the 71 // concurrent fetcher, unpacking the type specific data and delivering 72 // it to the downloader's queue. 73 deliver(peer *peerConnection, packet *eth.Response) (int, error) 74 } 75 76 // concurrentFetch iteratively downloads scheduled block parts, taking available 77 // peers, reserving a chunk of fetch requests for each and waiting for delivery 78 // or timeouts. 79 func (d *Downloader) concurrentFetch(queue typedQueue) error { 80 // Create a delivery channel to accept responses from all peers 81 responses := make(chan *eth.Response) 82 83 // Track the currently active requests and their timeout order 84 pending := make(map[string]*eth.Request) 85 defer func() { 86 // Abort all requests on sync cycle cancellation. The requests may still 87 // be fulfilled by the remote side, but the dispatcher will not wait to 88 // deliver them since nobody's going to be listening. 89 for _, req := range pending { 90 req.Close() 91 } 92 }() 93 ordering := make(map[*eth.Request]int) 94 timeouts := prque.New[int64, *eth.Request](func(data *eth.Request, index int) { 95 ordering[data] = index 96 }) 97 98 timeout := time.NewTimer(0) 99 if !timeout.Stop() { 100 <-timeout.C 101 } 102 defer timeout.Stop() 103 104 // Track the timed-out but not-yet-answered requests separately. We want to 105 // keep tracking which peers are busy (potentially overloaded), so removing 106 // all trace of a timed out request is not good. We also can't just cancel 107 // the pending request altogether as that would prevent a late response from 108 // being delivered, thus never unblocking the peer. 109 stales := make(map[string]*eth.Request) 110 defer func() { 111 // Abort all requests on sync cycle cancellation. The requests may still 112 // be fulfilled by the remote side, but the dispatcher will not wait to 113 // deliver them since nobody's going to be listening. 114 for _, req := range stales { 115 req.Close() 116 } 117 }() 118 // Subscribe to peer lifecycle events to schedule tasks to new joiners and 119 // reschedule tasks upon disconnections. We don't care which event happened 120 // for simplicity, so just use a single channel. 121 peering := make(chan *peeringEvent, 64) // arbitrary buffer, just some burst protection 122 123 peeringSub := d.peers.SubscribeEvents(peering) 124 defer peeringSub.Unsubscribe() 125 126 // Prepare the queue and fetch block parts until the block header fetcher's done 127 finished := false 128 for { 129 // If there's nothing more to fetch, wait or terminate 130 if queue.pending() == 0 { 131 if len(pending) == 0 && finished { 132 return nil 133 } 134 } else { 135 // Send a download request to all idle peers, until throttled 136 var ( 137 idles []*peerConnection 138 caps []int 139 ) 140 for _, peer := range d.peers.AllPeers() { 141 pending, stale := pending[peer.id], stales[peer.id] 142 if pending == nil && stale == nil { 143 idles = append(idles, peer) 144 caps = append(caps, queue.capacity(peer, time.Second)) 145 } else if stale != nil { 146 if waited := time.Since(stale.Sent); waited > timeoutGracePeriod { 147 // Request has been in flight longer than the grace period 148 // permitted it, consider the peer malicious attempting to 149 // stall the sync. 150 peer.log.Warn("Peer stalling, dropping", "waited", common.PrettyDuration(waited)) 151 d.dropPeer(peer.id) 152 } 153 } 154 } 155 sort.Sort(&peerCapacitySort{idles, caps}) 156 157 var throttled bool 158 for _, peer := range idles { 159 // Short circuit if throttling activated or there are no more 160 // queued tasks to be retrieved 161 if throttled { 162 break 163 } 164 if queued := queue.pending(); queued == 0 { 165 break 166 } 167 // Reserve a chunk of fetches for a peer. A nil can mean either that 168 // no more headers are available, or that the peer is known not to 169 // have them. 170 request, _, throttle := queue.reserve(peer, queue.capacity(peer, d.peers.rates.TargetRoundTrip())) 171 if throttle { 172 throttled = true 173 throttleCounter.Inc(1) 174 } 175 if request == nil { 176 continue 177 } 178 // Fetch the chunk and make sure any errors return the hashes to the queue 179 req, err := queue.request(peer, request, responses) 180 if err != nil { 181 // Sending the request failed, which generally means the peer 182 // was disconnected in between assignment and network send. 183 // Although all peer removal operations return allocated tasks 184 // to the queue, that is async, and we can do better here by 185 // immediately pushing the unfulfilled requests. 186 queue.unreserve(peer.id) // TODO(karalabe): This needs a non-expiration method 187 continue 188 } 189 pending[peer.id] = req 190 191 ttl := d.peers.rates.TargetTimeout() 192 ordering[req] = timeouts.Size() 193 194 timeouts.Push(req, -time.Now().Add(ttl).UnixNano()) 195 if timeouts.Size() == 1 { 196 timeout.Reset(ttl) 197 } 198 } 199 } 200 // Wait for something to happen 201 select { 202 case <-d.cancelCh: 203 // If sync was cancelled, tear down the parallel retriever. Pending 204 // requests will be cancelled locally, and the remote responses will 205 // be dropped when they arrive 206 return errCanceled 207 208 case event := <-peering: 209 // A peer joined or left, the tasks queue and allocations need to be 210 // checked for potential assignment or reassignment 211 peerid := event.peer.id 212 213 if event.join { 214 // Sanity check the internal state; this can be dropped later 215 if _, ok := pending[peerid]; ok { 216 event.peer.log.Error("Pending request exists for joining peer") 217 } 218 if _, ok := stales[peerid]; ok { 219 event.peer.log.Error("Stale request exists for joining peer") 220 } 221 // Loop back to the entry point for task assignment 222 continue 223 } 224 // A peer left, any existing requests need to be untracked, pending 225 // tasks returned and possible reassignment checked 226 if req, ok := pending[peerid]; ok { 227 queue.unreserve(peerid) // TODO(karalabe): This needs a non-expiration method 228 delete(pending, peerid) 229 req.Close() 230 231 if index, live := ordering[req]; live { 232 timeouts.Remove(index) 233 if index == 0 { 234 if !timeout.Stop() { 235 <-timeout.C 236 } 237 if timeouts.Size() > 0 { 238 _, exp := timeouts.Peek() 239 timeout.Reset(time.Until(time.Unix(0, -exp))) 240 } 241 } 242 delete(ordering, req) 243 } 244 } 245 if req, ok := stales[peerid]; ok { 246 delete(stales, peerid) 247 req.Close() 248 } 249 250 case <-timeout.C: 251 // Retrieve the next request which should have timed out. The check 252 // below is purely for to catch programming errors, given the correct 253 // code, there's no possible order of events that should result in a 254 // timeout firing for a non-existent event. 255 req, exp := timeouts.Peek() 256 if now, at := time.Now(), time.Unix(0, -exp); now.Before(at) { 257 log.Error("Timeout triggered but not reached", "left", at.Sub(now)) 258 timeout.Reset(at.Sub(now)) 259 continue 260 } 261 // Stop tracking the timed out request from a timing perspective, 262 // cancel it, so it's not considered in-flight anymore, but keep 263 // the peer marked busy to prevent assigning a second request and 264 // overloading it further. 265 delete(pending, req.Peer) 266 stales[req.Peer] = req 267 268 timeouts.Pop() // Popping an item will reorder indices in `ordering`, delete after, otherwise will resurrect! 269 if timeouts.Size() > 0 { 270 _, exp := timeouts.Peek() 271 timeout.Reset(time.Until(time.Unix(0, -exp))) 272 } 273 delete(ordering, req) 274 275 // New timeout potentially set if there are more requests pending, 276 // reschedule the failed one to a free peer 277 fails := queue.unreserve(req.Peer) 278 279 // Finally, update the peer's retrieval capacity, or if it's already 280 // below the minimum allowance, drop the peer. If a lot of retrieval 281 // elements expired, we might have overestimated the remote peer or 282 // perhaps ourselves. Only reset to minimal throughput but don't drop 283 // just yet. 284 // 285 // The reason the minimum threshold is 2 is that the downloader tries 286 // to estimate the bandwidth and latency of a peer separately, which 287 // requires pushing the measured capacity a bit and seeing how response 288 // times reacts, to it always requests one more than the minimum (i.e. 289 // min 2). 290 peer := d.peers.Peer(req.Peer) 291 if peer == nil { 292 // If the peer got disconnected in between, we should really have 293 // short-circuited it already. Just in case there's some strange 294 // codepath, leave this check in not to crash. 295 log.Error("Delivery timeout from unknown peer", "peer", req.Peer) 296 continue 297 } 298 if fails > 2 { 299 queue.updateCapacity(peer, 0, 0) 300 } else { 301 d.dropPeer(peer.id) 302 } 303 304 case res := <-responses: 305 // Response arrived, it may be for an existing or an already timed 306 // out request. If the former, update the timeout heap and perhaps 307 // reschedule the timeout timer. 308 index, live := ordering[res.Req] 309 if live { 310 timeouts.Remove(index) 311 if index == 0 { 312 if !timeout.Stop() { 313 <-timeout.C 314 } 315 if timeouts.Size() > 0 { 316 _, exp := timeouts.Peek() 317 timeout.Reset(time.Until(time.Unix(0, -exp))) 318 } 319 } 320 delete(ordering, res.Req) 321 } 322 // Delete the pending request (if it still exists) and mark the peer idle 323 delete(pending, res.Req.Peer) 324 delete(stales, res.Req.Peer) 325 326 // Signal the dispatcher that the round trip is done. We'll drop the 327 // peer if the data turns out to be junk. 328 res.Done <- nil 329 res.Req.Close() 330 331 // If the peer was previously banned and failed to deliver its pack 332 // in a reasonable time frame, ignore its message. 333 if peer := d.peers.Peer(res.Req.Peer); peer != nil { 334 // Deliver the received chunk of data and check chain validity 335 accepted, err := queue.deliver(peer, res) 336 if errors.Is(err, errInvalidChain) { 337 return err 338 } 339 // Unless a peer delivered something completely else than requested (usually 340 // caused by a timed out request which came through in the end), set it to 341 // idle. If the delivery's stale, the peer should have already been idled. 342 if !errors.Is(err, errStaleDelivery) { 343 queue.updateCapacity(peer, accepted, res.Time) 344 } 345 } 346 347 case cont := <-queue.waker(): 348 // The header fetcher sent a continuation flag, check if it's done 349 if !cont { 350 finished = true 351 } 352 } 353 } 354 }