github.com/palisadeinc/bor@v0.0.0-20230615125219-ab7196213d15/eth/fetcher/tx_fetcher.go (about) 1 // Copyright 2020 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package fetcher 18 19 import ( 20 "bytes" 21 "errors" 22 "fmt" 23 mrand "math/rand" 24 "sort" 25 "time" 26 27 mapset "github.com/deckarep/golang-set" 28 "github.com/ethereum/go-ethereum/common" 29 "github.com/ethereum/go-ethereum/common/mclock" 30 "github.com/ethereum/go-ethereum/core" 31 "github.com/ethereum/go-ethereum/core/types" 32 "github.com/ethereum/go-ethereum/log" 33 "github.com/ethereum/go-ethereum/metrics" 34 ) 35 36 const ( 37 // maxTxAnnounces is the maximum number of unique transaction a peer 38 // can announce in a short time. 39 maxTxAnnounces = 4096 40 41 // maxTxRetrievals is the maximum transaction number can be fetched in one 42 // request. The rationale to pick 256 is: 43 // - In eth protocol, the softResponseLimit is 2MB. Nowadays according to 44 // Etherscan the average transaction size is around 200B, so in theory 45 // we can include lots of transaction in a single protocol packet. 46 // - However the maximum size of a single transaction is raised to 128KB, 47 // so pick a middle value here to ensure we can maximize the efficiency 48 // of the retrieval and response size overflow won't happen in most cases. 49 maxTxRetrievals = 256 50 51 // maxTxUnderpricedSetSize is the size of the underpriced transaction set that 52 // is used to track recent transactions that have been dropped so we don't 53 // re-request them. 54 maxTxUnderpricedSetSize = 32768 55 56 // txGatherSlack is the interval used to collate almost-expired announces 57 // with network fetches. 58 txGatherSlack = 100 * time.Millisecond 59 60 // maxTxArrivalWait is the longest acceptable duration for the txArrivalWait 61 // configuration value. Longer config values will default to this. 62 maxTxArrivalWait = 500 * time.Millisecond 63 ) 64 65 var ( 66 // txFetchTimeout is the maximum allotted time to return an explicitly 67 // requested transaction. 68 txFetchTimeout = 5 * time.Second 69 ) 70 71 var ( 72 txAnnounceInMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/announces/in", nil) 73 txAnnounceKnownMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/announces/known", nil) 74 txAnnounceUnderpricedMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/announces/underpriced", nil) 75 txAnnounceDOSMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/announces/dos", nil) 76 77 txBroadcastInMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/broadcasts/in", nil) 78 txBroadcastKnownMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/broadcasts/known", nil) 79 txBroadcastUnderpricedMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/broadcasts/underpriced", nil) 80 txBroadcastOtherRejectMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/broadcasts/otherreject", nil) 81 82 txRequestOutMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/request/out", nil) 83 txRequestFailMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/request/fail", nil) 84 txRequestDoneMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/request/done", nil) 85 txRequestTimeoutMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/request/timeout", nil) 86 87 txReplyInMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/replies/in", nil) 88 txReplyKnownMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/replies/known", nil) 89 txReplyUnderpricedMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/replies/underpriced", nil) 90 txReplyOtherRejectMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/replies/otherreject", nil) 91 92 txFetcherWaitingPeers = metrics.NewRegisteredGauge("eth/fetcher/transaction/waiting/peers", nil) 93 txFetcherWaitingHashes = metrics.NewRegisteredGauge("eth/fetcher/transaction/waiting/hashes", nil) 94 txFetcherQueueingPeers = metrics.NewRegisteredGauge("eth/fetcher/transaction/queueing/peers", nil) 95 txFetcherQueueingHashes = metrics.NewRegisteredGauge("eth/fetcher/transaction/queueing/hashes", nil) 96 txFetcherFetchingPeers = metrics.NewRegisteredGauge("eth/fetcher/transaction/fetching/peers", nil) 97 txFetcherFetchingHashes = metrics.NewRegisteredGauge("eth/fetcher/transaction/fetching/hashes", nil) 98 ) 99 100 // txAnnounce is the notification of the availability of a batch 101 // of new transactions in the network. 102 type txAnnounce struct { 103 origin string // Identifier of the peer originating the notification 104 hashes []common.Hash // Batch of transaction hashes being announced 105 } 106 107 // txRequest represents an in-flight transaction retrieval request destined to 108 // a specific peers. 109 type txRequest struct { 110 hashes []common.Hash // Transactions having been requested 111 stolen map[common.Hash]struct{} // Deliveries by someone else (don't re-request) 112 time mclock.AbsTime // Timestamp of the request 113 } 114 115 // txDelivery is the notification that a batch of transactions have been added 116 // to the pool and should be untracked. 117 type txDelivery struct { 118 origin string // Identifier of the peer originating the notification 119 hashes []common.Hash // Batch of transaction hashes having been delivered 120 direct bool // Whether this is a direct reply or a broadcast 121 } 122 123 // txDrop is the notiication that a peer has disconnected. 124 type txDrop struct { 125 peer string 126 } 127 128 // TxFetcher is responsible for retrieving new transaction based on announcements. 129 // 130 // The fetcher operates in 3 stages: 131 // - Transactions that are newly discovered are moved into a wait list. 132 // - After ~500ms passes, transactions from the wait list that have not been 133 // broadcast to us in whole are moved into a queueing area. 134 // - When a connected peer doesn't have in-flight retrieval requests, any 135 // transaction queued up (and announced by the peer) are allocated to the 136 // peer and moved into a fetching status until it's fulfilled or fails. 137 // 138 // The invariants of the fetcher are: 139 // - Each tracked transaction (hash) must only be present in one of the 140 // three stages. This ensures that the fetcher operates akin to a finite 141 // state automata and there's do data leak. 142 // - Each peer that announced transactions may be scheduled retrievals, but 143 // only ever one concurrently. This ensures we can immediately know what is 144 // missing from a reply and reschedule it. 145 type TxFetcher struct { 146 notify chan *txAnnounce 147 cleanup chan *txDelivery 148 drop chan *txDrop 149 quit chan struct{} 150 151 underpriced mapset.Set // Transactions discarded as too cheap (don't re-fetch) 152 153 // Stage 1: Waiting lists for newly discovered transactions that might be 154 // broadcast without needing explicit request/reply round trips. 155 waitlist map[common.Hash]map[string]struct{} // Transactions waiting for an potential broadcast 156 waittime map[common.Hash]mclock.AbsTime // Timestamps when transactions were added to the waitlist 157 waitslots map[string]map[common.Hash]struct{} // Waiting announcement sgroupped by peer (DoS protection) 158 159 // Stage 2: Queue of transactions that waiting to be allocated to some peer 160 // to be retrieved directly. 161 announces map[string]map[common.Hash]struct{} // Set of announced transactions, grouped by origin peer 162 announced map[common.Hash]map[string]struct{} // Set of download locations, grouped by transaction hash 163 164 // Stage 3: Set of transactions currently being retrieved, some which may be 165 // fulfilled and some rescheduled. Note, this step shares 'announces' from the 166 // previous stage to avoid having to duplicate (need it for DoS checks). 167 fetching map[common.Hash]string // Transaction set currently being retrieved 168 requests map[string]*txRequest // In-flight transaction retrievals 169 alternates map[common.Hash]map[string]struct{} // In-flight transaction alternate origins if retrieval fails 170 171 // Callbacks 172 hasTx func(common.Hash) bool // Retrieves a tx from the local txpool 173 addTxs func([]*types.Transaction) []error // Insert a batch of transactions into local txpool 174 fetchTxs func(string, []common.Hash) error // Retrieves a set of txs from a remote peer 175 176 step chan struct{} // Notification channel when the fetcher loop iterates 177 clock mclock.Clock // Time wrapper to simulate in tests 178 rand *mrand.Rand // Randomizer to use in tests instead of map range loops (soft-random) 179 180 txArrivalWait time.Duration // txArrivalWait is the time allowance before an announced transaction is explicitly requested. 181 } 182 183 // NewTxFetcher creates a transaction fetcher to retrieve transaction 184 // based on hash announcements. 185 func NewTxFetcher(hasTx func(common.Hash) bool, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error, txArrivalWait time.Duration) *TxFetcher { 186 return NewTxFetcherForTests(hasTx, addTxs, fetchTxs, mclock.System{}, nil, txArrivalWait) 187 } 188 189 // NewTxFetcherForTests is a testing method to mock out the realtime clock with 190 // a simulated version and the internal randomness with a deterministic one. 191 func NewTxFetcherForTests( 192 hasTx func(common.Hash) bool, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error, 193 clock mclock.Clock, rand *mrand.Rand, txArrivalWait time.Duration) *TxFetcher { 194 return &TxFetcher{ 195 notify: make(chan *txAnnounce), 196 cleanup: make(chan *txDelivery), 197 drop: make(chan *txDrop), 198 quit: make(chan struct{}), 199 waitlist: make(map[common.Hash]map[string]struct{}), 200 waittime: make(map[common.Hash]mclock.AbsTime), 201 waitslots: make(map[string]map[common.Hash]struct{}), 202 announces: make(map[string]map[common.Hash]struct{}), 203 announced: make(map[common.Hash]map[string]struct{}), 204 fetching: make(map[common.Hash]string), 205 requests: make(map[string]*txRequest), 206 alternates: make(map[common.Hash]map[string]struct{}), 207 underpriced: mapset.NewSet(), 208 hasTx: hasTx, 209 addTxs: addTxs, 210 fetchTxs: fetchTxs, 211 clock: clock, 212 rand: rand, 213 txArrivalWait: txArrivalWait, 214 } 215 } 216 217 // Notify announces the fetcher of the potential availability of a new batch of 218 // transactions in the network. 219 func (f *TxFetcher) Notify(peer string, hashes []common.Hash) error { 220 // Keep track of all the announced transactions 221 txAnnounceInMeter.Mark(int64(len(hashes))) 222 223 // Skip any transaction announcements that we already know of, or that we've 224 // previously marked as cheap and discarded. This check is of course racey, 225 // because multiple concurrent notifies will still manage to pass it, but it's 226 // still valuable to check here because it runs concurrent to the internal 227 // loop, so anything caught here is time saved internally. 228 var ( 229 unknowns = make([]common.Hash, 0, len(hashes)) 230 duplicate, underpriced int64 231 ) 232 for _, hash := range hashes { 233 switch { 234 case f.hasTx(hash): 235 duplicate++ 236 237 case f.underpriced.Contains(hash): 238 underpriced++ 239 240 default: 241 unknowns = append(unknowns, hash) 242 } 243 } 244 txAnnounceKnownMeter.Mark(duplicate) 245 txAnnounceUnderpricedMeter.Mark(underpriced) 246 247 // If anything's left to announce, push it into the internal loop 248 if len(unknowns) == 0 { 249 return nil 250 } 251 announce := &txAnnounce{ 252 origin: peer, 253 hashes: unknowns, 254 } 255 select { 256 case f.notify <- announce: 257 return nil 258 case <-f.quit: 259 return errTerminated 260 } 261 } 262 263 // Enqueue imports a batch of received transaction into the transaction pool 264 // and the fetcher. This method may be called by both transaction broadcasts and 265 // direct request replies. The differentiation is important so the fetcher can 266 // re-shedule missing transactions as soon as possible. 267 func (f *TxFetcher) Enqueue(peer string, txs []*types.Transaction, direct bool) error { 268 // Keep track of all the propagated transactions 269 if direct { 270 txReplyInMeter.Mark(int64(len(txs))) 271 } else { 272 txBroadcastInMeter.Mark(int64(len(txs))) 273 } 274 // Push all the transactions into the pool, tracking underpriced ones to avoid 275 // re-requesting them and dropping the peer in case of malicious transfers. 276 var ( 277 added = make([]common.Hash, 0, len(txs)) 278 duplicate int64 279 underpriced int64 280 otherreject int64 281 ) 282 errs := f.addTxs(txs) 283 for i, err := range errs { 284 // Track the transaction hash if the price is too low for us. 285 // Avoid re-request this transaction when we receive another 286 // announcement. 287 if errors.Is(err, core.ErrUnderpriced) || errors.Is(err, core.ErrReplaceUnderpriced) { 288 for f.underpriced.Cardinality() >= maxTxUnderpricedSetSize { 289 f.underpriced.Pop() 290 } 291 f.underpriced.Add(txs[i].Hash()) 292 } 293 // Track a few interesting failure types 294 switch { 295 case err == nil: // Noop, but need to handle to not count these 296 297 case errors.Is(err, core.ErrAlreadyKnown): 298 duplicate++ 299 300 case errors.Is(err, core.ErrUnderpriced) || errors.Is(err, core.ErrReplaceUnderpriced): 301 underpriced++ 302 303 default: 304 otherreject++ 305 } 306 added = append(added, txs[i].Hash()) 307 } 308 if direct { 309 txReplyKnownMeter.Mark(duplicate) 310 txReplyUnderpricedMeter.Mark(underpriced) 311 txReplyOtherRejectMeter.Mark(otherreject) 312 } else { 313 txBroadcastKnownMeter.Mark(duplicate) 314 txBroadcastUnderpricedMeter.Mark(underpriced) 315 txBroadcastOtherRejectMeter.Mark(otherreject) 316 } 317 select { 318 case f.cleanup <- &txDelivery{origin: peer, hashes: added, direct: direct}: 319 return nil 320 case <-f.quit: 321 return errTerminated 322 } 323 } 324 325 // Drop should be called when a peer disconnects. It cleans up all the internal 326 // data structures of the given node. 327 func (f *TxFetcher) Drop(peer string) error { 328 select { 329 case f.drop <- &txDrop{peer: peer}: 330 return nil 331 case <-f.quit: 332 return errTerminated 333 } 334 } 335 336 // Start boots up the announcement based synchroniser, accepting and processing 337 // hash notifications and block fetches until termination requested. 338 func (f *TxFetcher) Start() { 339 // the txArrivalWait duration should not be less than the txGatherSlack duration 340 if f.txArrivalWait < txGatherSlack { 341 f.txArrivalWait = txGatherSlack 342 } 343 344 // the txArrivalWait duration should not be greater than the maxTxArrivalWait duration 345 if f.txArrivalWait > maxTxArrivalWait { 346 f.txArrivalWait = maxTxArrivalWait 347 } 348 349 go f.loop() 350 } 351 352 // Stop terminates the announcement based synchroniser, canceling all pending 353 // operations. 354 func (f *TxFetcher) Stop() { 355 close(f.quit) 356 } 357 358 func (f *TxFetcher) loop() { 359 var ( 360 waitTimer = new(mclock.Timer) 361 timeoutTimer = new(mclock.Timer) 362 363 waitTrigger = make(chan struct{}, 1) 364 timeoutTrigger = make(chan struct{}, 1) 365 ) 366 367 log.Info("TxFetcher", "txArrivalWait", f.txArrivalWait.String()) 368 369 for { 370 select { 371 case ann := <-f.notify: 372 // Drop part of the new announcements if there are too many accumulated. 373 // Note, we could but do not filter already known transactions here as 374 // the probability of something arriving between this call and the pre- 375 // filter outside is essentially zero. 376 used := len(f.waitslots[ann.origin]) + len(f.announces[ann.origin]) 377 if used >= maxTxAnnounces { 378 // This can happen if a set of transactions are requested but not 379 // all fulfilled, so the remainder are rescheduled without the cap 380 // check. Should be fine as the limit is in the thousands and the 381 // request size in the hundreds. 382 txAnnounceDOSMeter.Mark(int64(len(ann.hashes))) 383 break 384 } 385 want := used + len(ann.hashes) 386 if want > maxTxAnnounces { 387 txAnnounceDOSMeter.Mark(int64(want - maxTxAnnounces)) 388 ann.hashes = ann.hashes[:want-maxTxAnnounces] 389 } 390 // All is well, schedule the remainder of the transactions 391 idleWait := len(f.waittime) == 0 392 _, oldPeer := f.announces[ann.origin] 393 394 for _, hash := range ann.hashes { 395 // If the transaction is already downloading, add it to the list 396 // of possible alternates (in case the current retrieval fails) and 397 // also account it for the peer. 398 if f.alternates[hash] != nil { 399 f.alternates[hash][ann.origin] = struct{}{} 400 401 // Stage 2 and 3 share the set of origins per tx 402 if announces := f.announces[ann.origin]; announces != nil { 403 announces[hash] = struct{}{} 404 } else { 405 f.announces[ann.origin] = map[common.Hash]struct{}{hash: {}} 406 } 407 continue 408 } 409 // If the transaction is not downloading, but is already queued 410 // from a different peer, track it for the new peer too. 411 if f.announced[hash] != nil { 412 f.announced[hash][ann.origin] = struct{}{} 413 414 // Stage 2 and 3 share the set of origins per tx 415 if announces := f.announces[ann.origin]; announces != nil { 416 announces[hash] = struct{}{} 417 } else { 418 f.announces[ann.origin] = map[common.Hash]struct{}{hash: {}} 419 } 420 continue 421 } 422 // If the transaction is already known to the fetcher, but not 423 // yet downloading, add the peer as an alternate origin in the 424 // waiting list. 425 if f.waitlist[hash] != nil { 426 f.waitlist[hash][ann.origin] = struct{}{} 427 428 if waitslots := f.waitslots[ann.origin]; waitslots != nil { 429 waitslots[hash] = struct{}{} 430 } else { 431 f.waitslots[ann.origin] = map[common.Hash]struct{}{hash: {}} 432 } 433 continue 434 } 435 // Transaction unknown to the fetcher, insert it into the waiting list 436 f.waitlist[hash] = map[string]struct{}{ann.origin: {}} 437 f.waittime[hash] = f.clock.Now() 438 439 if waitslots := f.waitslots[ann.origin]; waitslots != nil { 440 waitslots[hash] = struct{}{} 441 } else { 442 f.waitslots[ann.origin] = map[common.Hash]struct{}{hash: {}} 443 } 444 } 445 // If a new item was added to the waitlist, schedule it into the fetcher 446 if idleWait && len(f.waittime) > 0 { 447 f.rescheduleWait(waitTimer, waitTrigger) 448 } 449 // If this peer is new and announced something already queued, maybe 450 // request transactions from them 451 if !oldPeer && len(f.announces[ann.origin]) > 0 { 452 f.scheduleFetches(timeoutTimer, timeoutTrigger, map[string]struct{}{ann.origin: {}}) 453 } 454 455 case <-waitTrigger: 456 // At least one transaction's waiting time ran out, push all expired 457 // ones into the retrieval queues 458 actives := make(map[string]struct{}) 459 for hash, instance := range f.waittime { 460 if time.Duration(f.clock.Now()-instance)+txGatherSlack > f.txArrivalWait { 461 // Transaction expired without propagation, schedule for retrieval 462 if f.announced[hash] != nil { 463 panic("announce tracker already contains waitlist item") 464 } 465 f.announced[hash] = f.waitlist[hash] 466 for peer := range f.waitlist[hash] { 467 if announces := f.announces[peer]; announces != nil { 468 announces[hash] = struct{}{} 469 } else { 470 f.announces[peer] = map[common.Hash]struct{}{hash: {}} 471 } 472 delete(f.waitslots[peer], hash) 473 if len(f.waitslots[peer]) == 0 { 474 delete(f.waitslots, peer) 475 } 476 actives[peer] = struct{}{} 477 } 478 delete(f.waittime, hash) 479 delete(f.waitlist, hash) 480 } 481 } 482 // If transactions are still waiting for propagation, reschedule the wait timer 483 if len(f.waittime) > 0 { 484 f.rescheduleWait(waitTimer, waitTrigger) 485 } 486 // If any peers became active and are idle, request transactions from them 487 if len(actives) > 0 { 488 f.scheduleFetches(timeoutTimer, timeoutTrigger, actives) 489 } 490 491 case <-timeoutTrigger: 492 // Clean up any expired retrievals and avoid re-requesting them from the 493 // same peer (either overloaded or malicious, useless in both cases). We 494 // could also penalize (Drop), but there's nothing to gain, and if could 495 // possibly further increase the load on it. 496 for peer, req := range f.requests { 497 if time.Duration(f.clock.Now()-req.time)+txGatherSlack > txFetchTimeout { 498 txRequestTimeoutMeter.Mark(int64(len(req.hashes))) 499 500 // Reschedule all the not-yet-delivered fetches to alternate peers 501 for _, hash := range req.hashes { 502 // Skip rescheduling hashes already delivered by someone else 503 if req.stolen != nil { 504 if _, ok := req.stolen[hash]; ok { 505 continue 506 } 507 } 508 // Move the delivery back from fetching to queued 509 if _, ok := f.announced[hash]; ok { 510 panic("announced tracker already contains alternate item") 511 } 512 if f.alternates[hash] != nil { // nil if tx was broadcast during fetch 513 f.announced[hash] = f.alternates[hash] 514 } 515 delete(f.announced[hash], peer) 516 if len(f.announced[hash]) == 0 { 517 delete(f.announced, hash) 518 } 519 delete(f.announces[peer], hash) 520 delete(f.alternates, hash) 521 delete(f.fetching, hash) 522 } 523 if len(f.announces[peer]) == 0 { 524 delete(f.announces, peer) 525 } 526 // Keep track of the request as dangling, but never expire 527 f.requests[peer].hashes = nil 528 } 529 } 530 // Schedule a new transaction retrieval 531 f.scheduleFetches(timeoutTimer, timeoutTrigger, nil) 532 533 // No idea if we scheduled something or not, trigger the timer if needed 534 // TODO(karalabe): this is kind of lame, can't we dump it into scheduleFetches somehow? 535 f.rescheduleTimeout(timeoutTimer, timeoutTrigger) 536 537 case delivery := <-f.cleanup: 538 // Independent if the delivery was direct or broadcast, remove all 539 // traces of the hash from internal trackers 540 for _, hash := range delivery.hashes { 541 if _, ok := f.waitlist[hash]; ok { 542 for peer, txset := range f.waitslots { 543 delete(txset, hash) 544 if len(txset) == 0 { 545 delete(f.waitslots, peer) 546 } 547 } 548 delete(f.waitlist, hash) 549 delete(f.waittime, hash) 550 } else { 551 for peer, txset := range f.announces { 552 delete(txset, hash) 553 if len(txset) == 0 { 554 delete(f.announces, peer) 555 } 556 } 557 delete(f.announced, hash) 558 delete(f.alternates, hash) 559 560 // If a transaction currently being fetched from a different 561 // origin was delivered (delivery stolen), mark it so the 562 // actual delivery won't double schedule it. 563 if origin, ok := f.fetching[hash]; ok && (origin != delivery.origin || !delivery.direct) { 564 stolen := f.requests[origin].stolen 565 if stolen == nil { 566 f.requests[origin].stolen = make(map[common.Hash]struct{}) 567 stolen = f.requests[origin].stolen 568 } 569 stolen[hash] = struct{}{} 570 } 571 delete(f.fetching, hash) 572 } 573 } 574 // In case of a direct delivery, also reschedule anything missing 575 // from the original query 576 if delivery.direct { 577 // Mark the reqesting successful (independent of individual status) 578 txRequestDoneMeter.Mark(int64(len(delivery.hashes))) 579 580 // Make sure something was pending, nuke it 581 req := f.requests[delivery.origin] 582 if req == nil { 583 log.Warn("Unexpected transaction delivery", "peer", delivery.origin) 584 break 585 } 586 delete(f.requests, delivery.origin) 587 588 // Anything not delivered should be re-scheduled (with or without 589 // this peer, depending on the response cutoff) 590 delivered := make(map[common.Hash]struct{}) 591 for _, hash := range delivery.hashes { 592 delivered[hash] = struct{}{} 593 } 594 cutoff := len(req.hashes) // If nothing is delivered, assume everything is missing, don't retry!!! 595 for i, hash := range req.hashes { 596 if _, ok := delivered[hash]; ok { 597 cutoff = i 598 } 599 } 600 // Reschedule missing hashes from alternates, not-fulfilled from alt+self 601 for i, hash := range req.hashes { 602 // Skip rescheduling hashes already delivered by someone else 603 if req.stolen != nil { 604 if _, ok := req.stolen[hash]; ok { 605 continue 606 } 607 } 608 if _, ok := delivered[hash]; !ok { 609 if i < cutoff { 610 delete(f.alternates[hash], delivery.origin) 611 delete(f.announces[delivery.origin], hash) 612 if len(f.announces[delivery.origin]) == 0 { 613 delete(f.announces, delivery.origin) 614 } 615 } 616 if len(f.alternates[hash]) > 0 { 617 if _, ok := f.announced[hash]; ok { 618 panic(fmt.Sprintf("announced tracker already contains alternate item: %v", f.announced[hash])) 619 } 620 f.announced[hash] = f.alternates[hash] 621 } 622 } 623 delete(f.alternates, hash) 624 delete(f.fetching, hash) 625 } 626 // Something was delivered, try to rechedule requests 627 f.scheduleFetches(timeoutTimer, timeoutTrigger, nil) // Partial delivery may enable others to deliver too 628 } 629 630 case drop := <-f.drop: 631 // A peer was dropped, remove all traces of it 632 if _, ok := f.waitslots[drop.peer]; ok { 633 for hash := range f.waitslots[drop.peer] { 634 delete(f.waitlist[hash], drop.peer) 635 if len(f.waitlist[hash]) == 0 { 636 delete(f.waitlist, hash) 637 delete(f.waittime, hash) 638 } 639 } 640 delete(f.waitslots, drop.peer) 641 if len(f.waitlist) > 0 { 642 f.rescheduleWait(waitTimer, waitTrigger) 643 } 644 } 645 // Clean up any active requests 646 var request *txRequest 647 if request = f.requests[drop.peer]; request != nil { 648 for _, hash := range request.hashes { 649 // Skip rescheduling hashes already delivered by someone else 650 if request.stolen != nil { 651 if _, ok := request.stolen[hash]; ok { 652 continue 653 } 654 } 655 // Undelivered hash, reschedule if there's an alternative origin available 656 delete(f.alternates[hash], drop.peer) 657 if len(f.alternates[hash]) == 0 { 658 delete(f.alternates, hash) 659 } else { 660 f.announced[hash] = f.alternates[hash] 661 delete(f.alternates, hash) 662 } 663 delete(f.fetching, hash) 664 } 665 delete(f.requests, drop.peer) 666 } 667 // Clean up general announcement tracking 668 if _, ok := f.announces[drop.peer]; ok { 669 for hash := range f.announces[drop.peer] { 670 delete(f.announced[hash], drop.peer) 671 if len(f.announced[hash]) == 0 { 672 delete(f.announced, hash) 673 } 674 } 675 delete(f.announces, drop.peer) 676 } 677 // If a request was cancelled, check if anything needs to be rescheduled 678 if request != nil { 679 f.scheduleFetches(timeoutTimer, timeoutTrigger, nil) 680 f.rescheduleTimeout(timeoutTimer, timeoutTrigger) 681 } 682 683 case <-f.quit: 684 return 685 } 686 // No idea what happened, but bump some sanity metrics 687 txFetcherWaitingPeers.Update(int64(len(f.waitslots))) 688 txFetcherWaitingHashes.Update(int64(len(f.waitlist))) 689 txFetcherQueueingPeers.Update(int64(len(f.announces) - len(f.requests))) 690 txFetcherQueueingHashes.Update(int64(len(f.announced))) 691 txFetcherFetchingPeers.Update(int64(len(f.requests))) 692 txFetcherFetchingHashes.Update(int64(len(f.fetching))) 693 694 // Loop did something, ping the step notifier if needed (tests) 695 if f.step != nil { 696 f.step <- struct{}{} 697 } 698 } 699 } 700 701 // rescheduleWait iterates over all the transactions currently in the waitlist 702 // and schedules the movement into the fetcher for the earliest. 703 // 704 // The method has a granularity of 'gatherSlack', since there's not much point in 705 // spinning over all the transactions just to maybe find one that should trigger 706 // a few ms earlier. 707 func (f *TxFetcher) rescheduleWait(timer *mclock.Timer, trigger chan struct{}) { 708 if *timer != nil { 709 (*timer).Stop() 710 } 711 now := f.clock.Now() 712 713 earliest := now 714 for _, instance := range f.waittime { 715 if earliest > instance { 716 earliest = instance 717 if f.txArrivalWait-time.Duration(now-earliest) < gatherSlack { 718 break 719 } 720 } 721 } 722 723 *timer = f.clock.AfterFunc( 724 f.txArrivalWait-time.Duration(now-earliest), 725 func() { trigger <- struct{}{} }, 726 ) 727 } 728 729 // rescheduleTimeout iterates over all the transactions currently in flight and 730 // schedules a cleanup run when the first would trigger. 731 // 732 // The method has a granularity of 'gatherSlack', since there's not much point in 733 // spinning over all the transactions just to maybe find one that should trigger 734 // a few ms earlier. 735 // 736 // This method is a bit "flaky" "by design". In theory the timeout timer only ever 737 // should be rescheduled if some request is pending. In practice, a timeout will 738 // cause the timer to be rescheduled every 5 secs (until the peer comes through or 739 // disconnects). This is a limitation of the fetcher code because we don't trac 740 // pending requests and timed out requests separatey. Without double tracking, if 741 // we simply didn't reschedule the timer on all-timeout then the timer would never 742 // be set again since len(request) > 0 => something's running. 743 func (f *TxFetcher) rescheduleTimeout(timer *mclock.Timer, trigger chan struct{}) { 744 if *timer != nil { 745 (*timer).Stop() 746 } 747 now := f.clock.Now() 748 749 earliest := now 750 for _, req := range f.requests { 751 // If this request already timed out, skip it altogether 752 if req.hashes == nil { 753 continue 754 } 755 if earliest > req.time { 756 earliest = req.time 757 if txFetchTimeout-time.Duration(now-earliest) < gatherSlack { 758 break 759 } 760 } 761 } 762 *timer = f.clock.AfterFunc(txFetchTimeout-time.Duration(now-earliest), func() { 763 trigger <- struct{}{} 764 }) 765 } 766 767 // scheduleFetches starts a batch of retrievals for all available idle peers. 768 func (f *TxFetcher) scheduleFetches(timer *mclock.Timer, timeout chan struct{}, whitelist map[string]struct{}) { 769 // Gather the set of peers we want to retrieve from (default to all) 770 actives := whitelist 771 if actives == nil { 772 actives = make(map[string]struct{}) 773 for peer := range f.announces { 774 actives[peer] = struct{}{} 775 } 776 } 777 if len(actives) == 0 { 778 return 779 } 780 // For each active peer, try to schedule some transaction fetches 781 idle := len(f.requests) == 0 782 783 f.forEachPeer(actives, func(peer string) { 784 if f.requests[peer] != nil { 785 return // continue in the for-each 786 } 787 if len(f.announces[peer]) == 0 { 788 return // continue in the for-each 789 } 790 hashes := make([]common.Hash, 0, maxTxRetrievals) 791 f.forEachHash(f.announces[peer], func(hash common.Hash) bool { 792 if _, ok := f.fetching[hash]; !ok { 793 // Mark the hash as fetching and stash away possible alternates 794 f.fetching[hash] = peer 795 796 if _, ok := f.alternates[hash]; ok { 797 panic(fmt.Sprintf("alternate tracker already contains fetching item: %v", f.alternates[hash])) 798 } 799 f.alternates[hash] = f.announced[hash] 800 delete(f.announced, hash) 801 802 // Accumulate the hash and stop if the limit was reached 803 hashes = append(hashes, hash) 804 if len(hashes) >= maxTxRetrievals { 805 return false // break in the for-each 806 } 807 } 808 return true // continue in the for-each 809 }) 810 // If any hashes were allocated, request them from the peer 811 if len(hashes) > 0 { 812 f.requests[peer] = &txRequest{hashes: hashes, time: f.clock.Now()} 813 txRequestOutMeter.Mark(int64(len(hashes))) 814 815 go func(peer string, hashes []common.Hash) { 816 // Try to fetch the transactions, but in case of a request 817 // failure (e.g. peer disconnected), reschedule the hashes. 818 if err := f.fetchTxs(peer, hashes); err != nil { 819 txRequestFailMeter.Mark(int64(len(hashes))) 820 f.Drop(peer) 821 } 822 }(peer, hashes) 823 } 824 }) 825 // If a new request was fired, schedule a timeout timer 826 if idle && len(f.requests) > 0 { 827 f.rescheduleTimeout(timer, timeout) 828 } 829 } 830 831 // forEachPeer does a range loop over a map of peers in production, but during 832 // testing it does a deterministic sorted random to allow reproducing issues. 833 func (f *TxFetcher) forEachPeer(peers map[string]struct{}, do func(peer string)) { 834 // If we're running production, use whatever Go's map gives us 835 if f.rand == nil { 836 for peer := range peers { 837 do(peer) 838 } 839 return 840 } 841 // We're running the test suite, make iteration deterministic 842 list := make([]string, 0, len(peers)) 843 for peer := range peers { 844 list = append(list, peer) 845 } 846 sort.Strings(list) 847 rotateStrings(list, f.rand.Intn(len(list))) 848 for _, peer := range list { 849 do(peer) 850 } 851 } 852 853 // forEachHash does a range loop over a map of hashes in production, but during 854 // testing it does a deterministic sorted random to allow reproducing issues. 855 func (f *TxFetcher) forEachHash(hashes map[common.Hash]struct{}, do func(hash common.Hash) bool) { 856 // If we're running production, use whatever Go's map gives us 857 if f.rand == nil { 858 for hash := range hashes { 859 if !do(hash) { 860 return 861 } 862 } 863 return 864 } 865 // We're running the test suite, make iteration deterministic 866 list := make([]common.Hash, 0, len(hashes)) 867 for hash := range hashes { 868 list = append(list, hash) 869 } 870 sortHashes(list) 871 rotateHashes(list, f.rand.Intn(len(list))) 872 for _, hash := range list { 873 if !do(hash) { 874 return 875 } 876 } 877 } 878 879 // rotateStrings rotates the contents of a slice by n steps. This method is only 880 // used in tests to simulate random map iteration but keep it deterministic. 881 func rotateStrings(slice []string, n int) { 882 orig := make([]string, len(slice)) 883 copy(orig, slice) 884 885 for i := 0; i < len(orig); i++ { 886 slice[i] = orig[(i+n)%len(orig)] 887 } 888 } 889 890 // sortHashes sorts a slice of hashes. This method is only used in tests in order 891 // to simulate random map iteration but keep it deterministic. 892 func sortHashes(slice []common.Hash) { 893 for i := 0; i < len(slice); i++ { 894 for j := i + 1; j < len(slice); j++ { 895 if bytes.Compare(slice[i][:], slice[j][:]) > 0 { 896 slice[i], slice[j] = slice[j], slice[i] 897 } 898 } 899 } 900 } 901 902 // rotateHashes rotates the contents of a slice by n steps. This method is only 903 // used in tests to simulate random map iteration but keep it deterministic. 904 func rotateHashes(slice []common.Hash, n int) { 905 orig := make([]common.Hash, len(slice)) 906 copy(orig, slice) 907 908 for i := 0; i < len(orig); i++ { 909 slice[i] = orig[(i+n)%len(orig)] 910 } 911 }