github.com/ethereum/go-ethereum@v1.16.1/eth/fetcher/tx_fetcher.go (about) 1 // Copyright 2019 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package fetcher 18 19 import ( 20 "errors" 21 "fmt" 22 "math" 23 mrand "math/rand" 24 "sort" 25 "time" 26 27 "github.com/ethereum/go-ethereum/common" 28 "github.com/ethereum/go-ethereum/common/lru" 29 "github.com/ethereum/go-ethereum/common/mclock" 30 "github.com/ethereum/go-ethereum/core/txpool" 31 "github.com/ethereum/go-ethereum/core/types" 32 "github.com/ethereum/go-ethereum/log" 33 "github.com/ethereum/go-ethereum/metrics" 34 ) 35 36 const ( 37 // maxTxAnnounces is the maximum number of unique transactions a peer 38 // can announce in a short time. 39 maxTxAnnounces = 4096 40 41 // maxTxRetrievals is the maximum number of transactions that can be fetched 42 // in one request. The rationale for picking 256 is to have a reasonabe lower 43 // bound for the transferred data (don't waste RTTs, transfer more meaningful 44 // batch sizes), but also have an upper bound on the sequentiality to allow 45 // using our entire peerset for deliveries. 46 // 47 // This number also acts as a failsafe against malicious announces which might 48 // cause us to request more data than we'd expect. 49 maxTxRetrievals = 256 50 51 // maxTxRetrievalSize is the max number of bytes that delivered transactions 52 // should weigh according to the announcements. The 128KB was chosen to limit 53 // retrieving a maximum of one blob transaction at a time to minimize hogging 54 // a connection between two peers. 55 maxTxRetrievalSize = 128 * 1024 56 57 // maxTxUnderpricedSetSize is the size of the underpriced transaction set that 58 // is used to track recent transactions that have been dropped so we don't 59 // re-request them. 60 maxTxUnderpricedSetSize = 32768 61 62 // maxTxUnderpricedTimeout is the max time a transaction should be stuck in the underpriced set. 63 maxTxUnderpricedTimeout = 5 * time.Minute 64 65 // txArriveTimeout is the time allowance before an announced transaction is 66 // explicitly requested. 67 txArriveTimeout = 500 * time.Millisecond 68 69 // txGatherSlack is the interval used to collate almost-expired announces 70 // with network fetches. 71 txGatherSlack = 100 * time.Millisecond 72 73 // addTxsBatchSize it the max number of transactions to add in a single batch from a peer. 74 addTxsBatchSize = 128 75 ) 76 77 var ( 78 // txFetchTimeout is the maximum allotted time to return an explicitly 79 // requested transaction. 80 txFetchTimeout = 5 * time.Second 81 ) 82 83 var ( 84 txAnnounceInMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/announces/in", nil) 85 txAnnounceKnownMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/announces/known", nil) 86 txAnnounceUnderpricedMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/announces/underpriced", nil) 87 txAnnounceDOSMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/announces/dos", nil) 88 89 txBroadcastInMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/broadcasts/in", nil) 90 txBroadcastKnownMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/broadcasts/known", nil) 91 txBroadcastUnderpricedMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/broadcasts/underpriced", nil) 92 txBroadcastOtherRejectMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/broadcasts/otherreject", nil) 93 94 txRequestOutMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/request/out", nil) 95 txRequestFailMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/request/fail", nil) 96 txRequestDoneMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/request/done", nil) 97 txRequestTimeoutMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/request/timeout", nil) 98 99 txReplyInMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/replies/in", nil) 100 txReplyKnownMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/replies/known", nil) 101 txReplyUnderpricedMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/replies/underpriced", nil) 102 txReplyOtherRejectMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/replies/otherreject", nil) 103 104 txFetcherWaitingPeers = metrics.NewRegisteredGauge("eth/fetcher/transaction/waiting/peers", nil) 105 txFetcherWaitingHashes = metrics.NewRegisteredGauge("eth/fetcher/transaction/waiting/hashes", nil) 106 txFetcherQueueingPeers = metrics.NewRegisteredGauge("eth/fetcher/transaction/queueing/peers", nil) 107 txFetcherQueueingHashes = metrics.NewRegisteredGauge("eth/fetcher/transaction/queueing/hashes", nil) 108 txFetcherFetchingPeers = metrics.NewRegisteredGauge("eth/fetcher/transaction/fetching/peers", nil) 109 txFetcherFetchingHashes = metrics.NewRegisteredGauge("eth/fetcher/transaction/fetching/hashes", nil) 110 ) 111 112 var errTerminated = errors.New("terminated") 113 114 // txAnnounce is the notification of the availability of a batch 115 // of new transactions in the network. 116 type txAnnounce struct { 117 origin string // Identifier of the peer originating the notification 118 hashes []common.Hash // Batch of transaction hashes being announced 119 metas []txMetadata // Batch of metadata associated with the hashes 120 } 121 122 // txMetadata provides the extra data transmitted along with the announcement 123 // for better fetch scheduling. 124 type txMetadata struct { 125 kind byte // Transaction consensus type 126 size uint32 // Transaction size in bytes 127 } 128 129 // txMetadataWithSeq is a wrapper of transaction metadata with an extra field 130 // tracking the transaction sequence number. 131 type txMetadataWithSeq struct { 132 txMetadata 133 seq uint64 134 } 135 136 // txRequest represents an in-flight transaction retrieval request destined to 137 // a specific peers. 138 type txRequest struct { 139 hashes []common.Hash // Transactions having been requested 140 stolen map[common.Hash]struct{} // Deliveries by someone else (don't re-request) 141 time mclock.AbsTime // Timestamp of the request 142 } 143 144 // txDelivery is the notification that a batch of transactions have been added 145 // to the pool and should be untracked. 146 type txDelivery struct { 147 origin string // Identifier of the peer originating the notification 148 hashes []common.Hash // Batch of transaction hashes having been delivered 149 metas []txMetadata // Batch of metadata associated with the delivered hashes 150 direct bool // Whether this is a direct reply or a broadcast 151 } 152 153 // txDrop is the notification that a peer has disconnected. 154 type txDrop struct { 155 peer string 156 } 157 158 // TxFetcher is responsible for retrieving new transaction based on announcements. 159 // 160 // The fetcher operates in 3 stages: 161 // - Transactions that are newly discovered are moved into a wait list. 162 // - After ~500ms passes, transactions from the wait list that have not been 163 // broadcast to us in whole are moved into a queueing area. 164 // - When a connected peer doesn't have in-flight retrieval requests, any 165 // transaction queued up (and announced by the peer) are allocated to the 166 // peer and moved into a fetching status until it's fulfilled or fails. 167 // 168 // The invariants of the fetcher are: 169 // - Each tracked transaction (hash) must only be present in one of the 170 // three stages. This ensures that the fetcher operates akin to a finite 171 // state automata and there's no data leak. 172 // - Each peer that announced transactions may be scheduled retrievals, but 173 // only ever one concurrently. This ensures we can immediately know what is 174 // missing from a reply and reschedule it. 175 type TxFetcher struct { 176 notify chan *txAnnounce 177 cleanup chan *txDelivery 178 drop chan *txDrop 179 quit chan struct{} 180 181 txSeq uint64 // Unique transaction sequence number 182 underpriced *lru.Cache[common.Hash, time.Time] // Transactions discarded as too cheap (don't re-fetch) 183 184 // Stage 1: Waiting lists for newly discovered transactions that might be 185 // broadcast without needing explicit request/reply round trips. 186 waitlist map[common.Hash]map[string]struct{} // Transactions waiting for an potential broadcast 187 waittime map[common.Hash]mclock.AbsTime // Timestamps when transactions were added to the waitlist 188 waitslots map[string]map[common.Hash]*txMetadataWithSeq // Waiting announcements grouped by peer (DoS protection) 189 190 // Stage 2: Queue of transactions that waiting to be allocated to some peer 191 // to be retrieved directly. 192 announces map[string]map[common.Hash]*txMetadataWithSeq // Set of announced transactions, grouped by origin peer 193 announced map[common.Hash]map[string]struct{} // Set of download locations, grouped by transaction hash 194 195 // Stage 3: Set of transactions currently being retrieved, some which may be 196 // fulfilled and some rescheduled. Note, this step shares 'announces' from the 197 // previous stage to avoid having to duplicate (need it for DoS checks). 198 fetching map[common.Hash]string // Transaction set currently being retrieved 199 requests map[string]*txRequest // In-flight transaction retrievals 200 alternates map[common.Hash]map[string]struct{} // In-flight transaction alternate origins if retrieval fails 201 202 // Callbacks 203 hasTx func(common.Hash) bool // Retrieves a tx from the local txpool 204 addTxs func([]*types.Transaction) []error // Insert a batch of transactions into local txpool 205 fetchTxs func(string, []common.Hash) error // Retrieves a set of txs from a remote peer 206 dropPeer func(string) // Drops a peer in case of announcement violation 207 208 step chan struct{} // Notification channel when the fetcher loop iterates 209 clock mclock.Clock // Monotonic clock or simulated clock for tests 210 realTime func() time.Time // Real system time or simulated time for tests 211 rand *mrand.Rand // Randomizer to use in tests instead of map range loops (soft-random) 212 } 213 214 // NewTxFetcher creates a transaction fetcher to retrieve transaction 215 // based on hash announcements. 216 func NewTxFetcher(hasTx func(common.Hash) bool, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error, dropPeer func(string)) *TxFetcher { 217 return NewTxFetcherForTests(hasTx, addTxs, fetchTxs, dropPeer, mclock.System{}, time.Now, nil) 218 } 219 220 // NewTxFetcherForTests is a testing method to mock out the realtime clock with 221 // a simulated version and the internal randomness with a deterministic one. 222 func NewTxFetcherForTests( 223 hasTx func(common.Hash) bool, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error, dropPeer func(string), 224 clock mclock.Clock, realTime func() time.Time, rand *mrand.Rand) *TxFetcher { 225 return &TxFetcher{ 226 notify: make(chan *txAnnounce), 227 cleanup: make(chan *txDelivery), 228 drop: make(chan *txDrop), 229 quit: make(chan struct{}), 230 waitlist: make(map[common.Hash]map[string]struct{}), 231 waittime: make(map[common.Hash]mclock.AbsTime), 232 waitslots: make(map[string]map[common.Hash]*txMetadataWithSeq), 233 announces: make(map[string]map[common.Hash]*txMetadataWithSeq), 234 announced: make(map[common.Hash]map[string]struct{}), 235 fetching: make(map[common.Hash]string), 236 requests: make(map[string]*txRequest), 237 alternates: make(map[common.Hash]map[string]struct{}), 238 underpriced: lru.NewCache[common.Hash, time.Time](maxTxUnderpricedSetSize), 239 hasTx: hasTx, 240 addTxs: addTxs, 241 fetchTxs: fetchTxs, 242 dropPeer: dropPeer, 243 clock: clock, 244 realTime: realTime, 245 rand: rand, 246 } 247 } 248 249 // Notify announces the fetcher of the potential availability of a new batch of 250 // transactions in the network. 251 func (f *TxFetcher) Notify(peer string, types []byte, sizes []uint32, hashes []common.Hash) error { 252 // Keep track of all the announced transactions 253 txAnnounceInMeter.Mark(int64(len(hashes))) 254 255 // Skip any transaction announcements that we already know of, or that we've 256 // previously marked as cheap and discarded. This check is of course racy, 257 // because multiple concurrent notifies will still manage to pass it, but it's 258 // still valuable to check here because it runs concurrent to the internal 259 // loop, so anything caught here is time saved internally. 260 var ( 261 unknownHashes = make([]common.Hash, 0, len(hashes)) 262 unknownMetas = make([]txMetadata, 0, len(hashes)) 263 264 duplicate int64 265 underpriced int64 266 ) 267 for i, hash := range hashes { 268 switch { 269 case f.hasTx(hash): 270 duplicate++ 271 case f.isKnownUnderpriced(hash): 272 underpriced++ 273 default: 274 unknownHashes = append(unknownHashes, hash) 275 276 // Transaction metadata has been available since eth68, and all 277 // legacy eth protocols (prior to eth68) have been deprecated. 278 // Therefore, metadata is always expected in the announcement. 279 unknownMetas = append(unknownMetas, txMetadata{kind: types[i], size: sizes[i]}) 280 } 281 } 282 txAnnounceKnownMeter.Mark(duplicate) 283 txAnnounceUnderpricedMeter.Mark(underpriced) 284 285 // If anything's left to announce, push it into the internal loop 286 if len(unknownHashes) == 0 { 287 return nil 288 } 289 announce := &txAnnounce{origin: peer, hashes: unknownHashes, metas: unknownMetas} 290 select { 291 case f.notify <- announce: 292 return nil 293 case <-f.quit: 294 return errTerminated 295 } 296 } 297 298 // isKnownUnderpriced reports whether a transaction hash was recently found to be underpriced. 299 func (f *TxFetcher) isKnownUnderpriced(hash common.Hash) bool { 300 prevTime, ok := f.underpriced.Peek(hash) 301 if ok && prevTime.Before(f.realTime().Add(-maxTxUnderpricedTimeout)) { 302 f.underpriced.Remove(hash) 303 return false 304 } 305 return ok 306 } 307 308 // Enqueue imports a batch of received transaction into the transaction pool 309 // and the fetcher. This method may be called by both transaction broadcasts and 310 // direct request replies. The differentiation is important so the fetcher can 311 // re-schedule missing transactions as soon as possible. 312 func (f *TxFetcher) Enqueue(peer string, txs []*types.Transaction, direct bool) error { 313 var ( 314 inMeter = txReplyInMeter 315 knownMeter = txReplyKnownMeter 316 underpricedMeter = txReplyUnderpricedMeter 317 otherRejectMeter = txReplyOtherRejectMeter 318 ) 319 if !direct { 320 inMeter = txBroadcastInMeter 321 knownMeter = txBroadcastKnownMeter 322 underpricedMeter = txBroadcastUnderpricedMeter 323 otherRejectMeter = txBroadcastOtherRejectMeter 324 } 325 // Keep track of all the propagated transactions 326 inMeter.Mark(int64(len(txs))) 327 328 // Push all the transactions into the pool, tracking underpriced ones to avoid 329 // re-requesting them and dropping the peer in case of malicious transfers. 330 var ( 331 added = make([]common.Hash, 0, len(txs)) 332 metas = make([]txMetadata, 0, len(txs)) 333 ) 334 // proceed in batches 335 for i := 0; i < len(txs); i += addTxsBatchSize { 336 end := i + addTxsBatchSize 337 if end > len(txs) { 338 end = len(txs) 339 } 340 var ( 341 duplicate int64 342 underpriced int64 343 otherreject int64 344 ) 345 batch := txs[i:end] 346 347 for j, err := range f.addTxs(batch) { 348 // Track the transaction hash if the price is too low for us. 349 // Avoid re-request this transaction when we receive another 350 // announcement. 351 if errors.Is(err, txpool.ErrUnderpriced) || errors.Is(err, txpool.ErrReplaceUnderpriced) || errors.Is(err, txpool.ErrTxGasPriceTooLow) { 352 f.underpriced.Add(batch[j].Hash(), batch[j].Time()) 353 } 354 // Track a few interesting failure types 355 switch { 356 case err == nil: // Noop, but need to handle to not count these 357 358 case errors.Is(err, txpool.ErrAlreadyKnown): 359 duplicate++ 360 361 case errors.Is(err, txpool.ErrUnderpriced) || errors.Is(err, txpool.ErrReplaceUnderpriced) || errors.Is(err, txpool.ErrTxGasPriceTooLow): 362 underpriced++ 363 364 default: 365 otherreject++ 366 } 367 added = append(added, batch[j].Hash()) 368 metas = append(metas, txMetadata{ 369 kind: batch[j].Type(), 370 size: uint32(batch[j].Size()), 371 }) 372 } 373 knownMeter.Mark(duplicate) 374 underpricedMeter.Mark(underpriced) 375 otherRejectMeter.Mark(otherreject) 376 377 // If 'other reject' is >25% of the deliveries in any batch, sleep a bit. 378 if otherreject > addTxsBatchSize/4 { 379 time.Sleep(200 * time.Millisecond) 380 log.Debug("Peer delivering stale transactions", "peer", peer, "rejected", otherreject) 381 } 382 } 383 select { 384 case f.cleanup <- &txDelivery{origin: peer, hashes: added, metas: metas, direct: direct}: 385 return nil 386 case <-f.quit: 387 return errTerminated 388 } 389 } 390 391 // Drop should be called when a peer disconnects. It cleans up all the internal 392 // data structures of the given node. 393 func (f *TxFetcher) Drop(peer string) error { 394 select { 395 case f.drop <- &txDrop{peer: peer}: 396 return nil 397 case <-f.quit: 398 return errTerminated 399 } 400 } 401 402 // Start boots up the announcement based synchroniser, accepting and processing 403 // hash notifications and block fetches until termination requested. 404 func (f *TxFetcher) Start() { 405 go f.loop() 406 } 407 408 // Stop terminates the announcement based synchroniser, canceling all pending 409 // operations. 410 func (f *TxFetcher) Stop() { 411 close(f.quit) 412 } 413 414 func (f *TxFetcher) loop() { 415 var ( 416 waitTimer = new(mclock.Timer) 417 timeoutTimer = new(mclock.Timer) 418 419 waitTrigger = make(chan struct{}, 1) 420 timeoutTrigger = make(chan struct{}, 1) 421 ) 422 for { 423 select { 424 case ann := <-f.notify: 425 // Drop part of the new announcements if there are too many accumulated. 426 // Note, we could but do not filter already known transactions here as 427 // the probability of something arriving between this call and the pre- 428 // filter outside is essentially zero. 429 used := len(f.waitslots[ann.origin]) + len(f.announces[ann.origin]) 430 if used >= maxTxAnnounces { 431 // This can happen if a set of transactions are requested but not 432 // all fulfilled, so the remainder are rescheduled without the cap 433 // check. Should be fine as the limit is in the thousands and the 434 // request size in the hundreds. 435 txAnnounceDOSMeter.Mark(int64(len(ann.hashes))) 436 break 437 } 438 want := used + len(ann.hashes) 439 if want > maxTxAnnounces { 440 txAnnounceDOSMeter.Mark(int64(want - maxTxAnnounces)) 441 442 ann.hashes = ann.hashes[:want-maxTxAnnounces] 443 ann.metas = ann.metas[:want-maxTxAnnounces] 444 } 445 // All is well, schedule the remainder of the transactions 446 var ( 447 idleWait = len(f.waittime) == 0 448 _, oldPeer = f.announces[ann.origin] 449 hasBlob bool 450 451 // nextSeq returns the next available sequence number for tagging 452 // transaction announcement and also bump it internally. 453 nextSeq = func() uint64 { 454 seq := f.txSeq 455 f.txSeq++ 456 return seq 457 } 458 ) 459 for i, hash := range ann.hashes { 460 // If the transaction is already downloading, add it to the list 461 // of possible alternates (in case the current retrieval fails) and 462 // also account it for the peer. 463 if f.alternates[hash] != nil { 464 f.alternates[hash][ann.origin] = struct{}{} 465 466 // Stage 2 and 3 share the set of origins per tx 467 if announces := f.announces[ann.origin]; announces != nil { 468 announces[hash] = &txMetadataWithSeq{ 469 txMetadata: ann.metas[i], 470 seq: nextSeq(), 471 } 472 } else { 473 f.announces[ann.origin] = map[common.Hash]*txMetadataWithSeq{ 474 hash: { 475 txMetadata: ann.metas[i], 476 seq: nextSeq(), 477 }, 478 } 479 } 480 continue 481 } 482 // If the transaction is not downloading, but is already queued 483 // from a different peer, track it for the new peer too. 484 if f.announced[hash] != nil { 485 f.announced[hash][ann.origin] = struct{}{} 486 487 // Stage 2 and 3 share the set of origins per tx 488 if announces := f.announces[ann.origin]; announces != nil { 489 announces[hash] = &txMetadataWithSeq{ 490 txMetadata: ann.metas[i], 491 seq: nextSeq(), 492 } 493 } else { 494 f.announces[ann.origin] = map[common.Hash]*txMetadataWithSeq{ 495 hash: { 496 txMetadata: ann.metas[i], 497 seq: nextSeq(), 498 }, 499 } 500 } 501 continue 502 } 503 // If the transaction is already known to the fetcher, but not 504 // yet downloading, add the peer as an alternate origin in the 505 // waiting list. 506 if f.waitlist[hash] != nil { 507 // Ignore double announcements from the same peer. This is 508 // especially important if metadata is also passed along to 509 // prevent malicious peers flip-flopping good/bad values. 510 if _, ok := f.waitlist[hash][ann.origin]; ok { 511 continue 512 } 513 f.waitlist[hash][ann.origin] = struct{}{} 514 515 if waitslots := f.waitslots[ann.origin]; waitslots != nil { 516 waitslots[hash] = &txMetadataWithSeq{ 517 txMetadata: ann.metas[i], 518 seq: nextSeq(), 519 } 520 } else { 521 f.waitslots[ann.origin] = map[common.Hash]*txMetadataWithSeq{ 522 hash: { 523 txMetadata: ann.metas[i], 524 seq: nextSeq(), 525 }, 526 } 527 } 528 continue 529 } 530 // Transaction unknown to the fetcher, insert it into the waiting list 531 f.waitlist[hash] = map[string]struct{}{ann.origin: {}} 532 533 // Assign the current timestamp as the wait time, but for blob transactions, 534 // skip the wait time since they are only announced. 535 if ann.metas[i].kind != types.BlobTxType { 536 f.waittime[hash] = f.clock.Now() 537 } else { 538 hasBlob = true 539 f.waittime[hash] = f.clock.Now() - mclock.AbsTime(txArriveTimeout) 540 } 541 if waitslots := f.waitslots[ann.origin]; waitslots != nil { 542 waitslots[hash] = &txMetadataWithSeq{ 543 txMetadata: ann.metas[i], 544 seq: nextSeq(), 545 } 546 } else { 547 f.waitslots[ann.origin] = map[common.Hash]*txMetadataWithSeq{ 548 hash: { 549 txMetadata: ann.metas[i], 550 seq: nextSeq(), 551 }, 552 } 553 } 554 } 555 // If a new item was added to the waitlist, schedule it into the fetcher 556 if hasBlob || (idleWait && len(f.waittime) > 0) { 557 f.rescheduleWait(waitTimer, waitTrigger) 558 } 559 // If this peer is new and announced something already queued, maybe 560 // request transactions from them 561 if !oldPeer && len(f.announces[ann.origin]) > 0 { 562 f.scheduleFetches(timeoutTimer, timeoutTrigger, map[string]struct{}{ann.origin: {}}) 563 } 564 565 case <-waitTrigger: 566 // At least one transaction's waiting time ran out, push all expired 567 // ones into the retrieval queues 568 actives := make(map[string]struct{}) 569 for hash, instance := range f.waittime { 570 if time.Duration(f.clock.Now()-instance)+txGatherSlack > txArriveTimeout { 571 // Transaction expired without propagation, schedule for retrieval 572 if f.announced[hash] != nil { 573 panic("announce tracker already contains waitlist item") 574 } 575 f.announced[hash] = f.waitlist[hash] 576 for peer := range f.waitlist[hash] { 577 if announces := f.announces[peer]; announces != nil { 578 announces[hash] = f.waitslots[peer][hash] 579 } else { 580 f.announces[peer] = map[common.Hash]*txMetadataWithSeq{hash: f.waitslots[peer][hash]} 581 } 582 delete(f.waitslots[peer], hash) 583 if len(f.waitslots[peer]) == 0 { 584 delete(f.waitslots, peer) 585 } 586 actives[peer] = struct{}{} 587 } 588 delete(f.waittime, hash) 589 delete(f.waitlist, hash) 590 } 591 } 592 // If transactions are still waiting for propagation, reschedule the wait timer 593 if len(f.waittime) > 0 { 594 f.rescheduleWait(waitTimer, waitTrigger) 595 } 596 // If any peers became active and are idle, request transactions from them 597 if len(actives) > 0 { 598 f.scheduleFetches(timeoutTimer, timeoutTrigger, actives) 599 } 600 601 case <-timeoutTrigger: 602 // Clean up any expired retrievals and avoid re-requesting them from the 603 // same peer (either overloaded or malicious, useless in both cases). We 604 // could also penalize (Drop), but there's nothing to gain, and if could 605 // possibly further increase the load on it. 606 for peer, req := range f.requests { 607 if time.Duration(f.clock.Now()-req.time)+txGatherSlack > txFetchTimeout { 608 txRequestTimeoutMeter.Mark(int64(len(req.hashes))) 609 610 // Reschedule all the not-yet-delivered fetches to alternate peers 611 for _, hash := range req.hashes { 612 // Skip rescheduling hashes already delivered by someone else 613 if req.stolen != nil { 614 if _, ok := req.stolen[hash]; ok { 615 continue 616 } 617 } 618 // Move the delivery back from fetching to queued 619 if _, ok := f.announced[hash]; ok { 620 panic("announced tracker already contains alternate item") 621 } 622 if f.alternates[hash] != nil { // nil if tx was broadcast during fetch 623 f.announced[hash] = f.alternates[hash] 624 } 625 delete(f.announced[hash], peer) 626 if len(f.announced[hash]) == 0 { 627 delete(f.announced, hash) 628 } 629 delete(f.announces[peer], hash) 630 delete(f.alternates, hash) 631 delete(f.fetching, hash) 632 } 633 if len(f.announces[peer]) == 0 { 634 delete(f.announces, peer) 635 } 636 // Keep track of the request as dangling, but never expire 637 f.requests[peer].hashes = nil 638 } 639 } 640 // Schedule a new transaction retrieval 641 f.scheduleFetches(timeoutTimer, timeoutTrigger, nil) 642 643 // No idea if we scheduled something or not, trigger the timer if needed 644 // TODO(karalabe): this is kind of lame, can't we dump it into scheduleFetches somehow? 645 f.rescheduleTimeout(timeoutTimer, timeoutTrigger) 646 647 case delivery := <-f.cleanup: 648 // Independent if the delivery was direct or broadcast, remove all 649 // traces of the hash from internal trackers. That said, compare any 650 // advertised metadata with the real ones and drop bad peers. 651 for i, hash := range delivery.hashes { 652 if _, ok := f.waitlist[hash]; ok { 653 for peer, txset := range f.waitslots { 654 if meta := txset[hash]; meta != nil { 655 if delivery.metas[i].kind != meta.kind { 656 log.Warn("Announced transaction type mismatch", "peer", peer, "tx", hash, "type", delivery.metas[i].kind, "ann", meta.kind) 657 f.dropPeer(peer) 658 } else if delivery.metas[i].size != meta.size { 659 if math.Abs(float64(delivery.metas[i].size)-float64(meta.size)) > 8 { 660 log.Warn("Announced transaction size mismatch", "peer", peer, "tx", hash, "size", delivery.metas[i].size, "ann", meta.size) 661 662 // Normally we should drop a peer considering this is a protocol violation. 663 // However, due to the RLP vs consensus format messyness, allow a few bytes 664 // wiggle-room where we only warn, but don't drop. 665 // 666 // TODO(karalabe): Get rid of this relaxation when clients are proven stable. 667 f.dropPeer(peer) 668 } 669 } 670 } 671 delete(txset, hash) 672 if len(txset) == 0 { 673 delete(f.waitslots, peer) 674 } 675 } 676 delete(f.waitlist, hash) 677 delete(f.waittime, hash) 678 } else { 679 for peer, txset := range f.announces { 680 if meta := txset[hash]; meta != nil { 681 if delivery.metas[i].kind != meta.kind { 682 log.Warn("Announced transaction type mismatch", "peer", peer, "tx", hash, "type", delivery.metas[i].kind, "ann", meta.kind) 683 f.dropPeer(peer) 684 } else if delivery.metas[i].size != meta.size { 685 if math.Abs(float64(delivery.metas[i].size)-float64(meta.size)) > 8 { 686 log.Warn("Announced transaction size mismatch", "peer", peer, "tx", hash, "size", delivery.metas[i].size, "ann", meta.size) 687 688 // Normally we should drop a peer considering this is a protocol violation. 689 // However, due to the RLP vs consensus format messyness, allow a few bytes 690 // wiggle-room where we only warn, but don't drop. 691 // 692 // TODO(karalabe): Get rid of this relaxation when clients are proven stable. 693 f.dropPeer(peer) 694 } 695 } 696 } 697 delete(txset, hash) 698 if len(txset) == 0 { 699 delete(f.announces, peer) 700 } 701 } 702 delete(f.announced, hash) 703 delete(f.alternates, hash) 704 705 // If a transaction currently being fetched from a different 706 // origin was delivered (delivery stolen), mark it so the 707 // actual delivery won't double schedule it. 708 if origin, ok := f.fetching[hash]; ok && (origin != delivery.origin || !delivery.direct) { 709 stolen := f.requests[origin].stolen 710 if stolen == nil { 711 f.requests[origin].stolen = make(map[common.Hash]struct{}) 712 stolen = f.requests[origin].stolen 713 } 714 stolen[hash] = struct{}{} 715 } 716 delete(f.fetching, hash) 717 } 718 } 719 // In case of a direct delivery, also reschedule anything missing 720 // from the original query 721 if delivery.direct { 722 // Mark the requesting successful (independent of individual status) 723 txRequestDoneMeter.Mark(int64(len(delivery.hashes))) 724 725 // Make sure something was pending, nuke it 726 req := f.requests[delivery.origin] 727 if req == nil { 728 log.Warn("Unexpected transaction delivery", "peer", delivery.origin) 729 break 730 } 731 delete(f.requests, delivery.origin) 732 733 // Anything not delivered should be re-scheduled (with or without 734 // this peer, depending on the response cutoff) 735 delivered := make(map[common.Hash]struct{}) 736 for _, hash := range delivery.hashes { 737 delivered[hash] = struct{}{} 738 } 739 cutoff := len(req.hashes) // If nothing is delivered, assume everything is missing, don't retry!!! 740 for i, hash := range req.hashes { 741 if _, ok := delivered[hash]; ok { 742 cutoff = i 743 } 744 } 745 // Reschedule missing hashes from alternates, not-fulfilled from alt+self 746 for i, hash := range req.hashes { 747 // Skip rescheduling hashes already delivered by someone else 748 if req.stolen != nil { 749 if _, ok := req.stolen[hash]; ok { 750 continue 751 } 752 } 753 if _, ok := delivered[hash]; !ok { 754 if i < cutoff { 755 delete(f.alternates[hash], delivery.origin) 756 delete(f.announces[delivery.origin], hash) 757 if len(f.announces[delivery.origin]) == 0 { 758 delete(f.announces, delivery.origin) 759 } 760 } 761 if len(f.alternates[hash]) > 0 { 762 if _, ok := f.announced[hash]; ok { 763 panic(fmt.Sprintf("announced tracker already contains alternate item: %v", f.announced[hash])) 764 } 765 f.announced[hash] = f.alternates[hash] 766 } 767 } 768 delete(f.alternates, hash) 769 delete(f.fetching, hash) 770 } 771 // Something was delivered, try to reschedule requests 772 f.scheduleFetches(timeoutTimer, timeoutTrigger, nil) // Partial delivery may enable others to deliver too 773 } 774 775 case drop := <-f.drop: 776 // A peer was dropped, remove all traces of it 777 if _, ok := f.waitslots[drop.peer]; ok { 778 for hash := range f.waitslots[drop.peer] { 779 delete(f.waitlist[hash], drop.peer) 780 if len(f.waitlist[hash]) == 0 { 781 delete(f.waitlist, hash) 782 delete(f.waittime, hash) 783 } 784 } 785 delete(f.waitslots, drop.peer) 786 if len(f.waitlist) > 0 { 787 f.rescheduleWait(waitTimer, waitTrigger) 788 } 789 } 790 // Clean up any active requests 791 var request *txRequest 792 if request = f.requests[drop.peer]; request != nil { 793 for _, hash := range request.hashes { 794 // Skip rescheduling hashes already delivered by someone else 795 if request.stolen != nil { 796 if _, ok := request.stolen[hash]; ok { 797 continue 798 } 799 } 800 // Undelivered hash, reschedule if there's an alternative origin available 801 delete(f.alternates[hash], drop.peer) 802 if len(f.alternates[hash]) == 0 { 803 delete(f.alternates, hash) 804 } else { 805 f.announced[hash] = f.alternates[hash] 806 delete(f.alternates, hash) 807 } 808 delete(f.fetching, hash) 809 } 810 delete(f.requests, drop.peer) 811 } 812 // Clean up general announcement tracking 813 if _, ok := f.announces[drop.peer]; ok { 814 for hash := range f.announces[drop.peer] { 815 delete(f.announced[hash], drop.peer) 816 if len(f.announced[hash]) == 0 { 817 delete(f.announced, hash) 818 } 819 } 820 delete(f.announces, drop.peer) 821 } 822 // If a request was cancelled, check if anything needs to be rescheduled 823 if request != nil { 824 f.scheduleFetches(timeoutTimer, timeoutTrigger, nil) 825 f.rescheduleTimeout(timeoutTimer, timeoutTrigger) 826 } 827 828 case <-f.quit: 829 return 830 } 831 // No idea what happened, but bump some sanity metrics 832 txFetcherWaitingPeers.Update(int64(len(f.waitslots))) 833 txFetcherWaitingHashes.Update(int64(len(f.waitlist))) 834 txFetcherQueueingPeers.Update(int64(len(f.announces) - len(f.requests))) 835 txFetcherQueueingHashes.Update(int64(len(f.announced))) 836 txFetcherFetchingPeers.Update(int64(len(f.requests))) 837 txFetcherFetchingHashes.Update(int64(len(f.fetching))) 838 839 // Loop did something, ping the step notifier if needed (tests) 840 if f.step != nil { 841 f.step <- struct{}{} 842 } 843 } 844 } 845 846 // rescheduleWait iterates over all the transactions currently in the waitlist 847 // and schedules the movement into the fetcher for the earliest. 848 // 849 // The method has a granularity of 'txGatherSlack', since there's not much point in 850 // spinning over all the transactions just to maybe find one that should trigger 851 // a few ms earlier. 852 func (f *TxFetcher) rescheduleWait(timer *mclock.Timer, trigger chan struct{}) { 853 if *timer != nil { 854 (*timer).Stop() 855 } 856 now := f.clock.Now() 857 858 earliest := now 859 for _, instance := range f.waittime { 860 if earliest > instance { 861 earliest = instance 862 if txArriveTimeout-time.Duration(now-earliest) < txGatherSlack { 863 break 864 } 865 } 866 } 867 *timer = f.clock.AfterFunc(txArriveTimeout-time.Duration(now-earliest), func() { 868 trigger <- struct{}{} 869 }) 870 } 871 872 // rescheduleTimeout iterates over all the transactions currently in flight and 873 // schedules a cleanup run when the first would trigger. 874 // 875 // The method has a granularity of 'txGatherSlack', since there's not much point in 876 // spinning over all the transactions just to maybe find one that should trigger 877 // a few ms earlier. 878 // 879 // This method is a bit "flaky" "by design". In theory the timeout timer only ever 880 // should be rescheduled if some request is pending. In practice, a timeout will 881 // cause the timer to be rescheduled every 5 secs (until the peer comes through or 882 // disconnects). This is a limitation of the fetcher code because we don't trac 883 // pending requests and timed out requests separately. Without double tracking, if 884 // we simply didn't reschedule the timer on all-timeout then the timer would never 885 // be set again since len(request) > 0 => something's running. 886 func (f *TxFetcher) rescheduleTimeout(timer *mclock.Timer, trigger chan struct{}) { 887 if *timer != nil { 888 (*timer).Stop() 889 } 890 now := f.clock.Now() 891 892 earliest := now 893 for _, req := range f.requests { 894 // If this request already timed out, skip it altogether 895 if req.hashes == nil { 896 continue 897 } 898 if earliest > req.time { 899 earliest = req.time 900 if txFetchTimeout-time.Duration(now-earliest) < txGatherSlack { 901 break 902 } 903 } 904 } 905 *timer = f.clock.AfterFunc(txFetchTimeout-time.Duration(now-earliest), func() { 906 trigger <- struct{}{} 907 }) 908 } 909 910 // scheduleFetches starts a batch of retrievals for all available idle peers. 911 func (f *TxFetcher) scheduleFetches(timer *mclock.Timer, timeout chan struct{}, whitelist map[string]struct{}) { 912 // Gather the set of peers we want to retrieve from (default to all) 913 actives := whitelist 914 if actives == nil { 915 actives = make(map[string]struct{}) 916 for peer := range f.announces { 917 actives[peer] = struct{}{} 918 } 919 } 920 if len(actives) == 0 { 921 return 922 } 923 // For each active peer, try to schedule some transaction fetches 924 idle := len(f.requests) == 0 925 926 f.forEachPeer(actives, func(peer string) { 927 if f.requests[peer] != nil { 928 return // continue in the for-each 929 } 930 if len(f.announces[peer]) == 0 { 931 return // continue in the for-each 932 } 933 var ( 934 hashes = make([]common.Hash, 0, maxTxRetrievals) 935 bytes uint64 936 ) 937 f.forEachAnnounce(f.announces[peer], func(hash common.Hash, meta txMetadata) bool { 938 // If the transaction is already fetching, skip to the next one 939 if _, ok := f.fetching[hash]; ok { 940 return true 941 } 942 // Mark the hash as fetching and stash away possible alternates 943 f.fetching[hash] = peer 944 945 if _, ok := f.alternates[hash]; ok { 946 panic(fmt.Sprintf("alternate tracker already contains fetching item: %v", f.alternates[hash])) 947 } 948 f.alternates[hash] = f.announced[hash] 949 delete(f.announced, hash) 950 951 // Accumulate the hash and stop if the limit was reached 952 hashes = append(hashes, hash) 953 if len(hashes) >= maxTxRetrievals { 954 return false // break in the for-each 955 } 956 bytes += uint64(meta.size) 957 return bytes < maxTxRetrievalSize 958 }) 959 // If any hashes were allocated, request them from the peer 960 if len(hashes) > 0 { 961 f.requests[peer] = &txRequest{hashes: hashes, time: f.clock.Now()} 962 txRequestOutMeter.Mark(int64(len(hashes))) 963 964 go func(peer string, hashes []common.Hash) { 965 // Try to fetch the transactions, but in case of a request 966 // failure (e.g. peer disconnected), reschedule the hashes. 967 if err := f.fetchTxs(peer, hashes); err != nil { 968 txRequestFailMeter.Mark(int64(len(hashes))) 969 f.Drop(peer) 970 } 971 }(peer, hashes) 972 } 973 }) 974 // If a new request was fired, schedule a timeout timer 975 if idle && len(f.requests) > 0 { 976 f.rescheduleTimeout(timer, timeout) 977 } 978 } 979 980 // forEachPeer does a range loop over a map of peers in production, but during 981 // testing it does a deterministic sorted random to allow reproducing issues. 982 func (f *TxFetcher) forEachPeer(peers map[string]struct{}, do func(peer string)) { 983 // If we're running production, use whatever Go's map gives us 984 if f.rand == nil { 985 for peer := range peers { 986 do(peer) 987 } 988 return 989 } 990 // We're running the test suite, make iteration deterministic 991 list := make([]string, 0, len(peers)) 992 for peer := range peers { 993 list = append(list, peer) 994 } 995 sort.Strings(list) 996 rotateStrings(list, f.rand.Intn(len(list))) 997 for _, peer := range list { 998 do(peer) 999 } 1000 } 1001 1002 // forEachAnnounce loops over the given announcements in arrival order, invoking 1003 // the do function for each until it returns false. We enforce an arrival 1004 // ordering to minimize the chances of transaction nonce-gaps, which result in 1005 // transactions being rejected by the txpool. 1006 func (f *TxFetcher) forEachAnnounce(announces map[common.Hash]*txMetadataWithSeq, do func(hash common.Hash, meta txMetadata) bool) { 1007 type announcement struct { 1008 hash common.Hash 1009 meta txMetadata 1010 seq uint64 1011 } 1012 // Process announcements by their arrival order 1013 list := make([]announcement, 0, len(announces)) 1014 for hash, entry := range announces { 1015 list = append(list, announcement{hash: hash, meta: entry.txMetadata, seq: entry.seq}) 1016 } 1017 sort.Slice(list, func(i, j int) bool { 1018 return list[i].seq < list[j].seq 1019 }) 1020 for i := range list { 1021 if !do(list[i].hash, list[i].meta) { 1022 return 1023 } 1024 } 1025 } 1026 1027 // rotateStrings rotates the contents of a slice by n steps. This method is only 1028 // used in tests to simulate random map iteration but keep it deterministic. 1029 func rotateStrings(slice []string, n int) { 1030 orig := make([]string, len(slice)) 1031 copy(orig, slice) 1032 1033 for i := 0; i < len(orig); i++ { 1034 slice[i] = orig[(i+n)%len(orig)] 1035 } 1036 }