github.com/dominant-strategies/go-quai@v0.28.2/eth/fetcher/tx_fetcher.go (about) 1 // Copyright 2020 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package fetcher 18 19 import ( 20 "bytes" 21 "fmt" 22 mrand "math/rand" 23 "sort" 24 "time" 25 26 mapset "github.com/deckarep/golang-set" 27 "github.com/dominant-strategies/go-quai/common" 28 "github.com/dominant-strategies/go-quai/common/mclock" 29 "github.com/dominant-strategies/go-quai/core" 30 "github.com/dominant-strategies/go-quai/core/types" 31 "github.com/dominant-strategies/go-quai/log" 32 "github.com/dominant-strategies/go-quai/metrics" 33 ) 34 35 const ( 36 // maxTxAnnounces is the maximum number of unique transaction a peer 37 // can announce in a short time. 38 maxTxAnnounces = 4096 39 40 // maxTxRetrievals is the maximum transaction number can be fetched in one 41 // request. The rationale to pick 256 is: 42 // - In eth protocol, the softResponseLimit is 2MB. Nowadays according to 43 // Etherscan the average transaction size is around 200B, so in theory 44 // we can include lots of transaction in a single protocol packet. 45 // - However the maximum size of a single transaction is raised to 128KB, 46 // so pick a middle value here to ensure we can maximize the efficiency 47 // of the retrieval and response size overflow won't happen in most cases. 48 maxTxRetrievals = 256 49 50 // maxTxUnderpricedSetSize is the size of the underpriced transaction set that 51 // is used to track recent transactions that have been dropped so we don't 52 // re-request them. 53 maxTxUnderpricedSetSize = 32768 54 55 // txArriveTimeout is the time allowance before an announced transaction is 56 // explicitly requested. 57 txArriveTimeout = 500 * time.Millisecond 58 59 // txGatherSlack is the interval used to collate almost-expired announces 60 // with network fetches. 61 txGatherSlack = 100 * time.Millisecond 62 ) 63 64 var ( 65 // txFetchTimeout is the maximum allotted time to return an explicitly 66 // requested transaction. 67 txFetchTimeout = 5 * time.Second 68 ) 69 70 var ( 71 txAnnounceInMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/announces/in", nil) 72 txAnnounceKnownMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/announces/known", nil) 73 txAnnounceUnderpricedMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/announces/underpriced", nil) 74 txAnnounceDOSMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/announces/dos", nil) 75 76 txBroadcastInMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/broadcasts/in", nil) 77 txBroadcastKnownMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/broadcasts/known", nil) 78 txBroadcastUnderpricedMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/broadcasts/underpriced", nil) 79 txBroadcastOtherRejectMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/broadcasts/otherreject", nil) 80 81 txRequestOutMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/request/out", nil) 82 txRequestFailMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/request/fail", nil) 83 txRequestDoneMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/request/done", nil) 84 txRequestTimeoutMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/request/timeout", nil) 85 86 txReplyInMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/replies/in", nil) 87 txReplyKnownMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/replies/known", nil) 88 txReplyUnderpricedMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/replies/underpriced", nil) 89 txReplyOtherRejectMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/replies/otherreject", nil) 90 91 txFetcherWaitingPeers = metrics.NewRegisteredGauge("eth/fetcher/transaction/waiting/peers", nil) 92 txFetcherWaitingHashes = metrics.NewRegisteredGauge("eth/fetcher/transaction/waiting/hashes", nil) 93 txFetcherQueueingPeers = metrics.NewRegisteredGauge("eth/fetcher/transaction/queueing/peers", nil) 94 txFetcherQueueingHashes = metrics.NewRegisteredGauge("eth/fetcher/transaction/queueing/hashes", nil) 95 txFetcherFetchingPeers = metrics.NewRegisteredGauge("eth/fetcher/transaction/fetching/peers", nil) 96 txFetcherFetchingHashes = metrics.NewRegisteredGauge("eth/fetcher/transaction/fetching/hashes", nil) 97 ) 98 99 // txAnnounce is the notification of the availability of a batch 100 // of new transactions in the network. 101 type txAnnounce struct { 102 origin string // Identifier of the peer originating the notification 103 hashes []common.Hash // Batch of transaction hashes being announced 104 } 105 106 // txRequest represents an in-flight transaction retrieval request destined to 107 // a specific peers. 108 type txRequest struct { 109 hashes []common.Hash // Transactions having been requested 110 stolen map[common.Hash]struct{} // Deliveries by someone else (don't re-request) 111 time mclock.AbsTime // Timestamp of the request 112 } 113 114 // txDelivery is the notification that a batch of transactions have been added 115 // to the pool and should be untracked. 116 type txDelivery struct { 117 origin string // Identifier of the peer originating the notification 118 hashes []common.Hash // Batch of transaction hashes having been delivered 119 direct bool // Whether this is a direct reply or a broadcast 120 } 121 122 // txDrop is the notiication that a peer has disconnected. 123 type txDrop struct { 124 peer string 125 } 126 127 // TxFetcher is responsible for retrieving new transaction based on announcements. 128 // 129 // The fetcher operates in 3 stages: 130 // - Transactions that are newly discovered are moved into a wait list. 131 // - After ~500ms passes, transactions from the wait list that have not been 132 // broadcast to us in whole are moved into a queueing area. 133 // - When a connected peer doesn't have in-flight retrieval requests, any 134 // transaction queued up (and announced by the peer) are allocated to the 135 // peer and moved into a fetching status until it's fulfilled or fails. 136 // 137 // The invariants of the fetcher are: 138 // - Each tracked transaction (hash) must only be present in one of the 139 // three stages. This ensures that the fetcher operates akin to a finite 140 // state automata and there's do data leak. 141 // - Each peer that announced transactions may be scheduled retrievals, but 142 // only ever one concurrently. This ensures we can immediately know what is 143 // missing from a reply and reschedule it. 144 type TxFetcher struct { 145 notify chan *txAnnounce 146 cleanup chan *txDelivery 147 drop chan *txDrop 148 quit chan struct{} 149 150 underpriced mapset.Set // Transactions discarded as too cheap (don't re-fetch) 151 152 // Stage 1: Waiting lists for newly discovered transactions that might be 153 // broadcast without needing explicit request/reply round trips. 154 waitlist map[common.Hash]map[string]struct{} // Transactions waiting for an potential broadcast 155 waittime map[common.Hash]mclock.AbsTime // Timestamps when transactions were added to the waitlist 156 waitslots map[string]map[common.Hash]struct{} // Waiting announcement sgroupped by peer (DoS protection) 157 158 // Stage 2: Queue of transactions that waiting to be allocated to some peer 159 // to be retrieved directly. 160 announces map[string]map[common.Hash]struct{} // Set of announced transactions, grouped by origin peer 161 announced map[common.Hash]map[string]struct{} // Set of download locations, grouped by transaction hash 162 163 // Stage 3: Set of transactions currently being retrieved, some which may be 164 // fulfilled and some rescheduled. Note, this step shares 'announces' from the 165 // previous stage to avoid having to duplicate (need it for DoS checks). 166 fetching map[common.Hash]string // Transaction set currently being retrieved 167 requests map[string]*txRequest // In-flight transaction retrievals 168 alternates map[common.Hash]map[string]struct{} // In-flight transaction alternate origins if retrieval fails 169 170 // Callbacks 171 hasTx func(common.Hash) bool // Retrieves a tx from the local txpool 172 addTxs func([]*types.Transaction) []error // Insert a batch of transactions into local txpool 173 fetchTxs func(string, []common.Hash) error // Retrieves a set of txs from a remote peer 174 175 step chan struct{} // Notification channel when the fetcher loop iterates 176 clock mclock.Clock // Time wrapper to simulate in tests 177 rand *mrand.Rand // Randomizer to use in tests instead of map range loops (soft-random) 178 } 179 180 // NewTxFetcher creates a transaction fetcher to retrieve transaction 181 // based on hash announcements. 182 func NewTxFetcher(hasTx func(common.Hash) bool, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error) *TxFetcher { 183 return NewTxFetcherForTests(hasTx, addTxs, fetchTxs, mclock.System{}, nil) 184 } 185 186 // NewTxFetcherForTests is a testing method to mock out the realtime clock with 187 // a simulated version and the internal randomness with a deterministic one. 188 func NewTxFetcherForTests( 189 hasTx func(common.Hash) bool, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error, 190 clock mclock.Clock, rand *mrand.Rand) *TxFetcher { 191 return &TxFetcher{ 192 notify: make(chan *txAnnounce), 193 cleanup: make(chan *txDelivery), 194 drop: make(chan *txDrop), 195 quit: make(chan struct{}), 196 waitlist: make(map[common.Hash]map[string]struct{}), 197 waittime: make(map[common.Hash]mclock.AbsTime), 198 waitslots: make(map[string]map[common.Hash]struct{}), 199 announces: make(map[string]map[common.Hash]struct{}), 200 announced: make(map[common.Hash]map[string]struct{}), 201 fetching: make(map[common.Hash]string), 202 requests: make(map[string]*txRequest), 203 alternates: make(map[common.Hash]map[string]struct{}), 204 underpriced: mapset.NewSet(), 205 hasTx: hasTx, 206 addTxs: addTxs, 207 fetchTxs: fetchTxs, 208 clock: clock, 209 rand: rand, 210 } 211 } 212 213 // Notify announces the fetcher of the potential availability of a new batch of 214 // transactions in the network. 215 func (f *TxFetcher) Notify(peer string, hashes []common.Hash) error { 216 // Keep track of all the announced transactions 217 txAnnounceInMeter.Mark(int64(len(hashes))) 218 219 // Skip any transaction announcements that we already know of, or that we've 220 // previously marked as cheap and discarded. This check is of course racey, 221 // because multiple concurrent notifies will still manage to pass it, but it's 222 // still valuable to check here because it runs concurrent to the internal 223 // loop, so anything caught here is time saved internally. 224 var ( 225 unknowns = make([]common.Hash, 0, len(hashes)) 226 duplicate, underpriced int64 227 ) 228 for _, hash := range hashes { 229 switch { 230 case f.hasTx(hash): 231 duplicate++ 232 233 case f.underpriced.Contains(hash): 234 underpriced++ 235 236 default: 237 unknowns = append(unknowns, hash) 238 } 239 } 240 txAnnounceKnownMeter.Mark(duplicate) 241 txAnnounceUnderpricedMeter.Mark(underpriced) 242 243 // If anything's left to announce, push it into the internal loop 244 if len(unknowns) == 0 { 245 return nil 246 } 247 announce := &txAnnounce{ 248 origin: peer, 249 hashes: unknowns, 250 } 251 select { 252 case f.notify <- announce: 253 return nil 254 case <-f.quit: 255 return errTerminated 256 } 257 } 258 259 // Enqueue imports a batch of received transaction into the transaction pool 260 // and the fetcher. This method may be called by both transaction broadcasts and 261 // direct request replies. The differentiation is important so the fetcher can 262 // re-shedule missing transactions as soon as possible. 263 func (f *TxFetcher) Enqueue(peer string, txs []*types.Transaction, direct bool) error { 264 // Keep track of all the propagated transactions 265 if direct { 266 txReplyInMeter.Mark(int64(len(txs))) 267 } else { 268 txBroadcastInMeter.Mark(int64(len(txs))) 269 } 270 // Push all the transactions into the pool, tracking underpriced ones to avoid 271 // re-requesting them and dropping the peer in case of malicious transfers. 272 var ( 273 added = make([]common.Hash, 0, len(txs)) 274 duplicate int64 275 underpriced int64 276 otherreject int64 277 ) 278 errs := f.addTxs(txs) 279 for i, err := range errs { 280 if err != nil { 281 if err.Error() != core.ErrAlreadyKnown.Error() { 282 log.Debug("Failed to add transaction", "hash", txs[i].Hash().String(), "err", err) 283 } 284 // Track the transaction hash if the price is too low for us. 285 // Avoid re-request this transaction when we receive another 286 // announcement. 287 if err == core.ErrUnderpriced || err == core.ErrReplaceUnderpriced { 288 for f.underpriced.Cardinality() >= maxTxUnderpricedSetSize { 289 f.underpriced.Pop() 290 } 291 f.underpriced.Add(txs[i].Hash()) 292 } 293 // Track a few interesting failure types 294 switch err { 295 case nil: // Noop, but need to handle to not count these 296 297 case core.ErrAlreadyKnown: 298 duplicate++ 299 300 case core.ErrUnderpriced, core.ErrReplaceUnderpriced: 301 underpriced++ 302 303 default: 304 otherreject++ 305 } 306 } 307 added = append(added, txs[i].Hash()) 308 } 309 if direct { 310 txReplyKnownMeter.Mark(duplicate) 311 txReplyUnderpricedMeter.Mark(underpriced) 312 txReplyOtherRejectMeter.Mark(otherreject) 313 } else { 314 txBroadcastKnownMeter.Mark(duplicate) 315 txBroadcastUnderpricedMeter.Mark(underpriced) 316 txBroadcastOtherRejectMeter.Mark(otherreject) 317 } 318 select { 319 case f.cleanup <- &txDelivery{origin: peer, hashes: added, direct: direct}: 320 return nil 321 case <-f.quit: 322 return errTerminated 323 } 324 } 325 326 // Drop should be called when a peer disconnects. It cleans up all the internal 327 // data structures of the given node. 328 func (f *TxFetcher) Drop(peer string) error { 329 select { 330 case f.drop <- &txDrop{peer: peer}: 331 return nil 332 case <-f.quit: 333 return errTerminated 334 } 335 } 336 337 // Start boots up the announcement based synchroniser, accepting and processing 338 // hash notifications and block fetches until termination requested. 339 func (f *TxFetcher) Start() { 340 go f.loop() 341 } 342 343 // Stop terminates the announcement based synchroniser, canceling all pending 344 // operations. 345 func (f *TxFetcher) Stop() { 346 close(f.quit) 347 } 348 349 func (f *TxFetcher) loop() { 350 var ( 351 waitTimer = new(mclock.Timer) 352 timeoutTimer = new(mclock.Timer) 353 354 waitTrigger = make(chan struct{}, 1) 355 timeoutTrigger = make(chan struct{}, 1) 356 ) 357 for { 358 select { 359 case ann := <-f.notify: 360 // Drop part of the new announcements if there are too many accumulated. 361 // Note, we could but do not filter already known transactions here as 362 // the probability of something arriving between this call and the pre- 363 // filter outside is essentially zero. 364 used := len(f.waitslots[ann.origin]) + len(f.announces[ann.origin]) 365 if used >= maxTxAnnounces { 366 // This can happen if a set of transactions are requested but not 367 // all fulfilled, so the remainder are rescheduled without the cap 368 // check. Should be fine as the limit is in the thousands and the 369 // request size in the hundreds. 370 txAnnounceDOSMeter.Mark(int64(len(ann.hashes))) 371 break 372 } 373 want := used + len(ann.hashes) 374 if want > maxTxAnnounces { 375 txAnnounceDOSMeter.Mark(int64(want - maxTxAnnounces)) 376 ann.hashes = ann.hashes[:want-maxTxAnnounces] 377 } 378 // All is well, schedule the remainder of the transactions 379 idleWait := len(f.waittime) == 0 380 _, oldPeer := f.announces[ann.origin] 381 382 for _, hash := range ann.hashes { 383 // If the transaction is already downloading, add it to the list 384 // of possible alternates (in case the current retrieval fails) and 385 // also account it for the peer. 386 if f.alternates[hash] != nil { 387 f.alternates[hash][ann.origin] = struct{}{} 388 389 // Stage 2 and 3 share the set of origins per tx 390 if announces := f.announces[ann.origin]; announces != nil { 391 announces[hash] = struct{}{} 392 } else { 393 f.announces[ann.origin] = map[common.Hash]struct{}{hash: {}} 394 } 395 continue 396 } 397 // If the transaction is not downloading, but is already queued 398 // from a different peer, track it for the new peer too. 399 if f.announced[hash] != nil { 400 f.announced[hash][ann.origin] = struct{}{} 401 402 // Stage 2 and 3 share the set of origins per tx 403 if announces := f.announces[ann.origin]; announces != nil { 404 announces[hash] = struct{}{} 405 } else { 406 f.announces[ann.origin] = map[common.Hash]struct{}{hash: {}} 407 } 408 continue 409 } 410 // If the transaction is already known to the fetcher, but not 411 // yet downloading, add the peer as an alternate origin in the 412 // waiting list. 413 if f.waitlist[hash] != nil { 414 f.waitlist[hash][ann.origin] = struct{}{} 415 416 if waitslots := f.waitslots[ann.origin]; waitslots != nil { 417 waitslots[hash] = struct{}{} 418 } else { 419 f.waitslots[ann.origin] = map[common.Hash]struct{}{hash: {}} 420 } 421 continue 422 } 423 // Transaction unknown to the fetcher, insert it into the waiting list 424 f.waitlist[hash] = map[string]struct{}{ann.origin: {}} 425 f.waittime[hash] = f.clock.Now() 426 427 if waitslots := f.waitslots[ann.origin]; waitslots != nil { 428 waitslots[hash] = struct{}{} 429 } else { 430 f.waitslots[ann.origin] = map[common.Hash]struct{}{hash: {}} 431 } 432 } 433 // If a new item was added to the waitlist, schedule it into the fetcher 434 if idleWait && len(f.waittime) > 0 { 435 f.rescheduleWait(waitTimer, waitTrigger) 436 } 437 // If this peer is new and announced something already queued, maybe 438 // request transactions from them 439 if !oldPeer && len(f.announces[ann.origin]) > 0 { 440 f.scheduleFetches(timeoutTimer, timeoutTrigger, map[string]struct{}{ann.origin: {}}) 441 } 442 443 case <-waitTrigger: 444 // At least one transaction's waiting time ran out, push all expired 445 // ones into the retrieval queues 446 actives := make(map[string]struct{}) 447 for hash, instance := range f.waittime { 448 if time.Duration(f.clock.Now()-instance)+txGatherSlack > txArriveTimeout { 449 // Transaction expired without propagation, schedule for retrieval 450 if f.announced[hash] != nil { 451 panic("announce tracker already contains waitlist item") 452 } 453 f.announced[hash] = f.waitlist[hash] 454 for peer := range f.waitlist[hash] { 455 if announces := f.announces[peer]; announces != nil { 456 announces[hash] = struct{}{} 457 } else { 458 f.announces[peer] = map[common.Hash]struct{}{hash: {}} 459 } 460 delete(f.waitslots[peer], hash) 461 if len(f.waitslots[peer]) == 0 { 462 delete(f.waitslots, peer) 463 } 464 actives[peer] = struct{}{} 465 } 466 delete(f.waittime, hash) 467 delete(f.waitlist, hash) 468 } 469 } 470 // If transactions are still waiting for propagation, reschedule the wait timer 471 if len(f.waittime) > 0 { 472 f.rescheduleWait(waitTimer, waitTrigger) 473 } 474 // If any peers became active and are idle, request transactions from them 475 if len(actives) > 0 { 476 f.scheduleFetches(timeoutTimer, timeoutTrigger, actives) 477 } 478 479 case <-timeoutTrigger: 480 // Clean up any expired retrievals and avoid re-requesting them from the 481 // same peer (either overloaded or malicious, useless in both cases). We 482 // could also penalize (Drop), but there's nothing to gain, and if could 483 // possibly further increase the load on it. 484 for peer, req := range f.requests { 485 if time.Duration(f.clock.Now()-req.time)+txGatherSlack > txFetchTimeout { 486 txRequestTimeoutMeter.Mark(int64(len(req.hashes))) 487 488 // Reschedule all the not-yet-delivered fetches to alternate peers 489 for _, hash := range req.hashes { 490 // Skip rescheduling hashes already delivered by someone else 491 if req.stolen != nil { 492 if _, ok := req.stolen[hash]; ok { 493 continue 494 } 495 } 496 // Move the delivery back from fetching to queued 497 if _, ok := f.announced[hash]; ok { 498 panic("announced tracker already contains alternate item") 499 } 500 if f.alternates[hash] != nil { // nil if tx was broadcast during fetch 501 f.announced[hash] = f.alternates[hash] 502 } 503 delete(f.announced[hash], peer) 504 if len(f.announced[hash]) == 0 { 505 delete(f.announced, hash) 506 } 507 delete(f.announces[peer], hash) 508 delete(f.alternates, hash) 509 delete(f.fetching, hash) 510 } 511 if len(f.announces[peer]) == 0 { 512 delete(f.announces, peer) 513 } 514 // Keep track of the request as dangling, but never expire 515 f.requests[peer].hashes = nil 516 } 517 } 518 // Schedule a new transaction retrieval 519 f.scheduleFetches(timeoutTimer, timeoutTrigger, nil) 520 521 // No idea if we scheduled something or not, trigger the timer if needed 522 // TODO: can't we dump it into scheduleFetches somehow? 523 f.rescheduleTimeout(timeoutTimer, timeoutTrigger) 524 525 case delivery := <-f.cleanup: 526 // Independent if the delivery was direct or broadcast, remove all 527 // traces of the hash from internal trackers 528 for _, hash := range delivery.hashes { 529 if _, ok := f.waitlist[hash]; ok { 530 for peer, txset := range f.waitslots { 531 delete(txset, hash) 532 if len(txset) == 0 { 533 delete(f.waitslots, peer) 534 } 535 } 536 delete(f.waitlist, hash) 537 delete(f.waittime, hash) 538 } else { 539 for peer, txset := range f.announces { 540 delete(txset, hash) 541 if len(txset) == 0 { 542 delete(f.announces, peer) 543 } 544 } 545 delete(f.announced, hash) 546 delete(f.alternates, hash) 547 548 // If a transaction currently being fetched from a different 549 // origin was delivered (delivery stolen), mark it so the 550 // actual delivery won't double schedule it. 551 if origin, ok := f.fetching[hash]; ok && (origin != delivery.origin || !delivery.direct) { 552 stolen := f.requests[origin].stolen 553 if stolen == nil { 554 f.requests[origin].stolen = make(map[common.Hash]struct{}) 555 stolen = f.requests[origin].stolen 556 } 557 stolen[hash] = struct{}{} 558 } 559 delete(f.fetching, hash) 560 } 561 } 562 // In case of a direct delivery, also reschedule anything missing 563 // from the original query 564 if delivery.direct { 565 // Mark the reqesting successful (independent of individual status) 566 txRequestDoneMeter.Mark(int64(len(delivery.hashes))) 567 568 // Make sure something was pending, nuke it 569 req := f.requests[delivery.origin] 570 if req == nil { 571 log.Warn("Unexpected transaction delivery", "peer", delivery.origin) 572 break 573 } 574 delete(f.requests, delivery.origin) 575 576 // Anything not delivered should be re-scheduled (with or without 577 // this peer, depending on the response cutoff) 578 delivered := make(map[common.Hash]struct{}) 579 for _, hash := range delivery.hashes { 580 delivered[hash] = struct{}{} 581 } 582 cutoff := len(req.hashes) // If nothing is delivered, assume everything is missing, don't retry!!! 583 for i, hash := range req.hashes { 584 if _, ok := delivered[hash]; ok { 585 cutoff = i 586 } 587 } 588 // Reschedule missing hashes from alternates, not-fulfilled from alt+self 589 for i, hash := range req.hashes { 590 // Skip rescheduling hashes already delivered by someone else 591 if req.stolen != nil { 592 if _, ok := req.stolen[hash]; ok { 593 continue 594 } 595 } 596 if _, ok := delivered[hash]; !ok { 597 if i < cutoff { 598 delete(f.alternates[hash], delivery.origin) 599 delete(f.announces[delivery.origin], hash) 600 if len(f.announces[delivery.origin]) == 0 { 601 delete(f.announces, delivery.origin) 602 } 603 } 604 if len(f.alternates[hash]) > 0 { 605 if _, ok := f.announced[hash]; ok { 606 panic(fmt.Sprintf("announced tracker already contains alternate item: %v", f.announced[hash])) 607 } 608 f.announced[hash] = f.alternates[hash] 609 } 610 } 611 delete(f.alternates, hash) 612 delete(f.fetching, hash) 613 } 614 // Something was delivered, try to rechedule requests 615 f.scheduleFetches(timeoutTimer, timeoutTrigger, nil) // Partial delivery may enable others to deliver too 616 } 617 618 case drop := <-f.drop: 619 // A peer was dropped, remove all traces of it 620 if _, ok := f.waitslots[drop.peer]; ok { 621 for hash := range f.waitslots[drop.peer] { 622 delete(f.waitlist[hash], drop.peer) 623 if len(f.waitlist[hash]) == 0 { 624 delete(f.waitlist, hash) 625 delete(f.waittime, hash) 626 } 627 } 628 delete(f.waitslots, drop.peer) 629 if len(f.waitlist) > 0 { 630 f.rescheduleWait(waitTimer, waitTrigger) 631 } 632 } 633 // Clean up any active requests 634 var request *txRequest 635 if request = f.requests[drop.peer]; request != nil { 636 for _, hash := range request.hashes { 637 // Skip rescheduling hashes already delivered by someone else 638 if request.stolen != nil { 639 if _, ok := request.stolen[hash]; ok { 640 continue 641 } 642 } 643 // Undelivered hash, reschedule if there's an alternative origin available 644 delete(f.alternates[hash], drop.peer) 645 if len(f.alternates[hash]) == 0 { 646 delete(f.alternates, hash) 647 } else { 648 f.announced[hash] = f.alternates[hash] 649 delete(f.alternates, hash) 650 } 651 delete(f.fetching, hash) 652 } 653 delete(f.requests, drop.peer) 654 } 655 // Clean up general announcement tracking 656 if _, ok := f.announces[drop.peer]; ok { 657 for hash := range f.announces[drop.peer] { 658 delete(f.announced[hash], drop.peer) 659 if len(f.announced[hash]) == 0 { 660 delete(f.announced, hash) 661 } 662 } 663 delete(f.announces, drop.peer) 664 } 665 // If a request was cancelled, check if anything needs to be rescheduled 666 if request != nil { 667 f.scheduleFetches(timeoutTimer, timeoutTrigger, nil) 668 f.rescheduleTimeout(timeoutTimer, timeoutTrigger) 669 } 670 671 case <-f.quit: 672 return 673 } 674 // No idea what happened, but bump some sanity metrics 675 txFetcherWaitingPeers.Update(int64(len(f.waitslots))) 676 txFetcherWaitingHashes.Update(int64(len(f.waitlist))) 677 txFetcherQueueingPeers.Update(int64(len(f.announces) - len(f.requests))) 678 txFetcherQueueingHashes.Update(int64(len(f.announced))) 679 txFetcherFetchingPeers.Update(int64(len(f.requests))) 680 txFetcherFetchingHashes.Update(int64(len(f.fetching))) 681 682 // Loop did something, ping the step notifier if needed (tests) 683 if f.step != nil { 684 f.step <- struct{}{} 685 } 686 } 687 } 688 689 // rescheduleWait iterates over all the transactions currently in the waitlist 690 // and schedules the movement into the fetcher for the earliest. 691 // 692 // The method has a granularity of 'gatherSlack', since there's not much point in 693 // spinning over all the transactions just to maybe find one that should trigger 694 // a few ms earlier. 695 func (f *TxFetcher) rescheduleWait(timer *mclock.Timer, trigger chan struct{}) { 696 if *timer != nil { 697 (*timer).Stop() 698 } 699 now := f.clock.Now() 700 701 earliest := now 702 for _, instance := range f.waittime { 703 if earliest > instance { 704 earliest = instance 705 if txArriveTimeout-time.Duration(now-earliest) < gatherSlack { 706 break 707 } 708 } 709 } 710 *timer = f.clock.AfterFunc(txArriveTimeout-time.Duration(now-earliest), func() { 711 trigger <- struct{}{} 712 }) 713 } 714 715 // rescheduleTimeout iterates over all the transactions currently in flight and 716 // schedules a cleanup run when the first would trigger. 717 // 718 // The method has a granularity of 'gatherSlack', since there's not much point in 719 // spinning over all the transactions just to maybe find one that should trigger 720 // a few ms earlier. 721 // 722 // This method is a bit "flaky" "by design". In theory the timeout timer only ever 723 // should be rescheduled if some request is pending. In practice, a timeout will 724 // cause the timer to be rescheduled every 5 secs (until the peer comes through or 725 // disconnects). This is a limitation of the fetcher code because we don't trac 726 // pending requests and timed out requests separatey. Without double tracking, if 727 // we simply didn't reschedule the timer on all-timeout then the timer would never 728 // be set again since len(request) > 0 => something's running. 729 func (f *TxFetcher) rescheduleTimeout(timer *mclock.Timer, trigger chan struct{}) { 730 if *timer != nil { 731 (*timer).Stop() 732 } 733 now := f.clock.Now() 734 735 earliest := now 736 for _, req := range f.requests { 737 // If this request already timed out, skip it altogether 738 if req.hashes == nil { 739 continue 740 } 741 if earliest > req.time { 742 earliest = req.time 743 if txFetchTimeout-time.Duration(now-earliest) < gatherSlack { 744 break 745 } 746 } 747 } 748 *timer = f.clock.AfterFunc(txFetchTimeout-time.Duration(now-earliest), func() { 749 trigger <- struct{}{} 750 }) 751 } 752 753 // scheduleFetches starts a batch of retrievals for all available idle peers. 754 func (f *TxFetcher) scheduleFetches(timer *mclock.Timer, timeout chan struct{}, whitelist map[string]struct{}) { 755 // Gather the set of peers we want to retrieve from (default to all) 756 actives := whitelist 757 if actives == nil { 758 actives = make(map[string]struct{}) 759 for peer := range f.announces { 760 actives[peer] = struct{}{} 761 } 762 } 763 if len(actives) == 0 { 764 return 765 } 766 // For each active peer, try to schedule some transaction fetches 767 idle := len(f.requests) == 0 768 769 f.forEachPeer(actives, func(peer string) { 770 if f.requests[peer] != nil { 771 return // continue in the for-each 772 } 773 if len(f.announces[peer]) == 0 { 774 return // continue in the for-each 775 } 776 hashes := make([]common.Hash, 0, maxTxRetrievals) 777 f.forEachHash(f.announces[peer], func(hash common.Hash) bool { 778 if _, ok := f.fetching[hash]; !ok { 779 // Mark the hash as fetching and stash away possible alternates 780 f.fetching[hash] = peer 781 782 if _, ok := f.alternates[hash]; ok { 783 panic(fmt.Sprintf("alternate tracker already contains fetching item: %v", f.alternates[hash])) 784 } 785 f.alternates[hash] = f.announced[hash] 786 delete(f.announced, hash) 787 788 // Accumulate the hash and stop if the limit was reached 789 hashes = append(hashes, hash) 790 if len(hashes) >= maxTxRetrievals { 791 return false // break in the for-each 792 } 793 } 794 return true // continue in the for-each 795 }) 796 // If any hashes were allocated, request them from the peer 797 if len(hashes) > 0 { 798 f.requests[peer] = &txRequest{hashes: hashes, time: f.clock.Now()} 799 txRequestOutMeter.Mark(int64(len(hashes))) 800 801 go func(peer string, hashes []common.Hash) { 802 // Try to fetch the transactions, but in case of a request 803 // failure (e.g. peer disconnected), reschedule the hashes. 804 if err := f.fetchTxs(peer, hashes); err != nil { 805 txRequestFailMeter.Mark(int64(len(hashes))) 806 f.Drop(peer) 807 } 808 }(peer, hashes) 809 } 810 }) 811 // If a new request was fired, schedule a timeout timer 812 if idle && len(f.requests) > 0 { 813 f.rescheduleTimeout(timer, timeout) 814 } 815 } 816 817 // forEachPeer does a range loop over a map of peers in production, but during 818 // testing it does a deterministic sorted random to allow reproducing issues. 819 func (f *TxFetcher) forEachPeer(peers map[string]struct{}, do func(peer string)) { 820 // If we're running production, use whatever Go's map gives us 821 if f.rand == nil { 822 for peer := range peers { 823 do(peer) 824 } 825 return 826 } 827 // We're running the test suite, make iteration deterministic 828 list := make([]string, 0, len(peers)) 829 for peer := range peers { 830 list = append(list, peer) 831 } 832 sort.Strings(list) 833 rotateStrings(list, f.rand.Intn(len(list))) 834 for _, peer := range list { 835 do(peer) 836 } 837 } 838 839 // forEachHash does a range loop over a map of hashes in production, but during 840 // testing it does a deterministic sorted random to allow reproducing issues. 841 func (f *TxFetcher) forEachHash(hashes map[common.Hash]struct{}, do func(hash common.Hash) bool) { 842 // If we're running production, use whatever Go's map gives us 843 if f.rand == nil { 844 for hash := range hashes { 845 if !do(hash) { 846 return 847 } 848 } 849 return 850 } 851 // We're running the test suite, make iteration deterministic 852 list := make([]common.Hash, 0, len(hashes)) 853 for hash := range hashes { 854 list = append(list, hash) 855 } 856 sortHashes(list) 857 rotateHashes(list, f.rand.Intn(len(list))) 858 for _, hash := range list { 859 if !do(hash) { 860 return 861 } 862 } 863 } 864 865 // rotateStrings rotates the contents of a slice by n steps. This method is only 866 // used in tests to simulate random map iteration but keep it deterministic. 867 func rotateStrings(slice []string, n int) { 868 orig := make([]string, len(slice)) 869 copy(orig, slice) 870 871 for i := 0; i < len(orig); i++ { 872 slice[i] = orig[(i+n)%len(orig)] 873 } 874 } 875 876 // sortHashes sorts a slice of hashes. This method is only used in tests in order 877 // to simulate random map iteration but keep it deterministic. 878 func sortHashes(slice []common.Hash) { 879 for i := 0; i < len(slice); i++ { 880 for j := i + 1; j < len(slice); j++ { 881 if bytes.Compare(slice[i][:], slice[j][:]) > 0 { 882 slice[i], slice[j] = slice[j], slice[i] 883 } 884 } 885 } 886 } 887 888 // rotateHashes rotates the contents of a slice by n steps. This method is only 889 // used in tests to simulate random map iteration but keep it deterministic. 890 func rotateHashes(slice []common.Hash, n int) { 891 orig := make([]common.Hash, len(slice)) 892 copy(orig, slice) 893 894 for i := 0; i < len(orig); i++ { 895 slice[i] = orig[(i+n)%len(orig)] 896 } 897 }