github.1485827954.workers.dev/ethereum/go-ethereum@v1.14.3/eth/fetcher/tx_fetcher.go (about) 1 // Copyright 2019 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package fetcher 18 19 import ( 20 "bytes" 21 "errors" 22 "fmt" 23 "math" 24 mrand "math/rand" 25 "sort" 26 "time" 27 28 "github.com/ethereum/go-ethereum/common" 29 "github.com/ethereum/go-ethereum/common/lru" 30 "github.com/ethereum/go-ethereum/common/mclock" 31 "github.com/ethereum/go-ethereum/core/txpool" 32 "github.com/ethereum/go-ethereum/core/types" 33 "github.com/ethereum/go-ethereum/log" 34 "github.com/ethereum/go-ethereum/metrics" 35 ) 36 37 const ( 38 // maxTxAnnounces is the maximum number of unique transaction a peer 39 // can announce in a short time. 40 maxTxAnnounces = 4096 41 42 // maxTxRetrievals is the maximum number of transactions that can be fetched 43 // in one request. The rationale for picking 256 is to have a reasonabe lower 44 // bound for the transferred data (don't waste RTTs, transfer more meaningful 45 // batch sizes), but also have an upper bound on the sequentiality to allow 46 // using our entire peerset for deliveries. 47 // 48 // This number also acts as a failsafe against malicious announces which might 49 // cause us to request more data than we'd expect. 50 maxTxRetrievals = 256 51 52 // maxTxRetrievalSize is the max number of bytes that delivered transactions 53 // should weigh according to the announcements. The 128KB was chosen to limit 54 // retrieving a maximum of one blob transaction at a time to minimize hogging 55 // a connection between two peers. 56 maxTxRetrievalSize = 128 * 1024 57 58 // maxTxUnderpricedSetSize is the size of the underpriced transaction set that 59 // is used to track recent transactions that have been dropped so we don't 60 // re-request them. 61 maxTxUnderpricedSetSize = 32768 62 63 // maxTxUnderpricedTimeout is the max time a transaction should be stuck in the underpriced set. 64 maxTxUnderpricedTimeout = 5 * time.Minute 65 66 // txArriveTimeout is the time allowance before an announced transaction is 67 // explicitly requested. 68 txArriveTimeout = 500 * time.Millisecond 69 70 // txGatherSlack is the interval used to collate almost-expired announces 71 // with network fetches. 72 txGatherSlack = 100 * time.Millisecond 73 ) 74 75 var ( 76 // txFetchTimeout is the maximum allotted time to return an explicitly 77 // requested transaction. 78 txFetchTimeout = 5 * time.Second 79 ) 80 81 var ( 82 txAnnounceInMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/announces/in", nil) 83 txAnnounceKnownMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/announces/known", nil) 84 txAnnounceUnderpricedMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/announces/underpriced", nil) 85 txAnnounceDOSMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/announces/dos", nil) 86 87 txBroadcastInMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/broadcasts/in", nil) 88 txBroadcastKnownMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/broadcasts/known", nil) 89 txBroadcastUnderpricedMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/broadcasts/underpriced", nil) 90 txBroadcastOtherRejectMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/broadcasts/otherreject", nil) 91 92 txRequestOutMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/request/out", nil) 93 txRequestFailMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/request/fail", nil) 94 txRequestDoneMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/request/done", nil) 95 txRequestTimeoutMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/request/timeout", nil) 96 97 txReplyInMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/replies/in", nil) 98 txReplyKnownMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/replies/known", nil) 99 txReplyUnderpricedMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/replies/underpriced", nil) 100 txReplyOtherRejectMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/replies/otherreject", nil) 101 102 txFetcherWaitingPeers = metrics.NewRegisteredGauge("eth/fetcher/transaction/waiting/peers", nil) 103 txFetcherWaitingHashes = metrics.NewRegisteredGauge("eth/fetcher/transaction/waiting/hashes", nil) 104 txFetcherQueueingPeers = metrics.NewRegisteredGauge("eth/fetcher/transaction/queueing/peers", nil) 105 txFetcherQueueingHashes = metrics.NewRegisteredGauge("eth/fetcher/transaction/queueing/hashes", nil) 106 txFetcherFetchingPeers = metrics.NewRegisteredGauge("eth/fetcher/transaction/fetching/peers", nil) 107 txFetcherFetchingHashes = metrics.NewRegisteredGauge("eth/fetcher/transaction/fetching/hashes", nil) 108 ) 109 110 var errTerminated = errors.New("terminated") 111 112 // txAnnounce is the notification of the availability of a batch 113 // of new transactions in the network. 114 type txAnnounce struct { 115 origin string // Identifier of the peer originating the notification 116 hashes []common.Hash // Batch of transaction hashes being announced 117 metas []*txMetadata // Batch of metadatas associated with the hashes (nil before eth/68) 118 } 119 120 // txMetadata is a set of extra data transmitted along the announcement for better 121 // fetch scheduling. 122 type txMetadata struct { 123 kind byte // Transaction consensus type 124 size uint32 // Transaction size in bytes 125 } 126 127 // txRequest represents an in-flight transaction retrieval request destined to 128 // a specific peers. 129 type txRequest struct { 130 hashes []common.Hash // Transactions having been requested 131 stolen map[common.Hash]struct{} // Deliveries by someone else (don't re-request) 132 time mclock.AbsTime // Timestamp of the request 133 } 134 135 // txDelivery is the notification that a batch of transactions have been added 136 // to the pool and should be untracked. 137 type txDelivery struct { 138 origin string // Identifier of the peer originating the notification 139 hashes []common.Hash // Batch of transaction hashes having been delivered 140 metas []txMetadata // Batch of metadatas associated with the delivered hashes 141 direct bool // Whether this is a direct reply or a broadcast 142 } 143 144 // txDrop is the notification that a peer has disconnected. 145 type txDrop struct { 146 peer string 147 } 148 149 // TxFetcher is responsible for retrieving new transaction based on announcements. 150 // 151 // The fetcher operates in 3 stages: 152 // - Transactions that are newly discovered are moved into a wait list. 153 // - After ~500ms passes, transactions from the wait list that have not been 154 // broadcast to us in whole are moved into a queueing area. 155 // - When a connected peer doesn't have in-flight retrieval requests, any 156 // transaction queued up (and announced by the peer) are allocated to the 157 // peer and moved into a fetching status until it's fulfilled or fails. 158 // 159 // The invariants of the fetcher are: 160 // - Each tracked transaction (hash) must only be present in one of the 161 // three stages. This ensures that the fetcher operates akin to a finite 162 // state automata and there's do data leak. 163 // - Each peer that announced transactions may be scheduled retrievals, but 164 // only ever one concurrently. This ensures we can immediately know what is 165 // missing from a reply and reschedule it. 166 type TxFetcher struct { 167 notify chan *txAnnounce 168 cleanup chan *txDelivery 169 drop chan *txDrop 170 quit chan struct{} 171 172 underpriced *lru.Cache[common.Hash, time.Time] // Transactions discarded as too cheap (don't re-fetch) 173 174 // Stage 1: Waiting lists for newly discovered transactions that might be 175 // broadcast without needing explicit request/reply round trips. 176 waitlist map[common.Hash]map[string]struct{} // Transactions waiting for an potential broadcast 177 waittime map[common.Hash]mclock.AbsTime // Timestamps when transactions were added to the waitlist 178 waitslots map[string]map[common.Hash]*txMetadata // Waiting announcements grouped by peer (DoS protection) 179 180 // Stage 2: Queue of transactions that waiting to be allocated to some peer 181 // to be retrieved directly. 182 announces map[string]map[common.Hash]*txMetadata // Set of announced transactions, grouped by origin peer 183 announced map[common.Hash]map[string]struct{} // Set of download locations, grouped by transaction hash 184 185 // Stage 3: Set of transactions currently being retrieved, some which may be 186 // fulfilled and some rescheduled. Note, this step shares 'announces' from the 187 // previous stage to avoid having to duplicate (need it for DoS checks). 188 fetching map[common.Hash]string // Transaction set currently being retrieved 189 requests map[string]*txRequest // In-flight transaction retrievals 190 alternates map[common.Hash]map[string]struct{} // In-flight transaction alternate origins if retrieval fails 191 192 // Callbacks 193 hasTx func(common.Hash) bool // Retrieves a tx from the local txpool 194 addTxs func([]*types.Transaction) []error // Insert a batch of transactions into local txpool 195 fetchTxs func(string, []common.Hash) error // Retrieves a set of txs from a remote peer 196 dropPeer func(string) // Drops a peer in case of announcement violation 197 198 step chan struct{} // Notification channel when the fetcher loop iterates 199 clock mclock.Clock // Time wrapper to simulate in tests 200 rand *mrand.Rand // Randomizer to use in tests instead of map range loops (soft-random) 201 } 202 203 // NewTxFetcher creates a transaction fetcher to retrieve transaction 204 // based on hash announcements. 205 func NewTxFetcher(hasTx func(common.Hash) bool, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error, dropPeer func(string)) *TxFetcher { 206 return NewTxFetcherForTests(hasTx, addTxs, fetchTxs, dropPeer, mclock.System{}, nil) 207 } 208 209 // NewTxFetcherForTests is a testing method to mock out the realtime clock with 210 // a simulated version and the internal randomness with a deterministic one. 211 func NewTxFetcherForTests( 212 hasTx func(common.Hash) bool, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error, dropPeer func(string), 213 clock mclock.Clock, rand *mrand.Rand) *TxFetcher { 214 return &TxFetcher{ 215 notify: make(chan *txAnnounce), 216 cleanup: make(chan *txDelivery), 217 drop: make(chan *txDrop), 218 quit: make(chan struct{}), 219 waitlist: make(map[common.Hash]map[string]struct{}), 220 waittime: make(map[common.Hash]mclock.AbsTime), 221 waitslots: make(map[string]map[common.Hash]*txMetadata), 222 announces: make(map[string]map[common.Hash]*txMetadata), 223 announced: make(map[common.Hash]map[string]struct{}), 224 fetching: make(map[common.Hash]string), 225 requests: make(map[string]*txRequest), 226 alternates: make(map[common.Hash]map[string]struct{}), 227 underpriced: lru.NewCache[common.Hash, time.Time](maxTxUnderpricedSetSize), 228 hasTx: hasTx, 229 addTxs: addTxs, 230 fetchTxs: fetchTxs, 231 dropPeer: dropPeer, 232 clock: clock, 233 rand: rand, 234 } 235 } 236 237 // Notify announces the fetcher of the potential availability of a new batch of 238 // transactions in the network. 239 func (f *TxFetcher) Notify(peer string, types []byte, sizes []uint32, hashes []common.Hash) error { 240 // Keep track of all the announced transactions 241 txAnnounceInMeter.Mark(int64(len(hashes))) 242 243 // Skip any transaction announcements that we already know of, or that we've 244 // previously marked as cheap and discarded. This check is of course racy, 245 // because multiple concurrent notifies will still manage to pass it, but it's 246 // still valuable to check here because it runs concurrent to the internal 247 // loop, so anything caught here is time saved internally. 248 var ( 249 unknownHashes = make([]common.Hash, 0, len(hashes)) 250 unknownMetas = make([]*txMetadata, 0, len(hashes)) 251 252 duplicate int64 253 underpriced int64 254 ) 255 for i, hash := range hashes { 256 switch { 257 case f.hasTx(hash): 258 duplicate++ 259 case f.isKnownUnderpriced(hash): 260 underpriced++ 261 default: 262 unknownHashes = append(unknownHashes, hash) 263 if types == nil { 264 unknownMetas = append(unknownMetas, nil) 265 } else { 266 unknownMetas = append(unknownMetas, &txMetadata{kind: types[i], size: sizes[i]}) 267 } 268 } 269 } 270 txAnnounceKnownMeter.Mark(duplicate) 271 txAnnounceUnderpricedMeter.Mark(underpriced) 272 273 // If anything's left to announce, push it into the internal loop 274 if len(unknownHashes) == 0 { 275 return nil 276 } 277 announce := &txAnnounce{origin: peer, hashes: unknownHashes, metas: unknownMetas} 278 select { 279 case f.notify <- announce: 280 return nil 281 case <-f.quit: 282 return errTerminated 283 } 284 } 285 286 // isKnownUnderpriced reports whether a transaction hash was recently found to be underpriced. 287 func (f *TxFetcher) isKnownUnderpriced(hash common.Hash) bool { 288 prevTime, ok := f.underpriced.Peek(hash) 289 if ok && prevTime.Before(time.Now().Add(-maxTxUnderpricedTimeout)) { 290 f.underpriced.Remove(hash) 291 return false 292 } 293 return ok 294 } 295 296 // Enqueue imports a batch of received transaction into the transaction pool 297 // and the fetcher. This method may be called by both transaction broadcasts and 298 // direct request replies. The differentiation is important so the fetcher can 299 // re-schedule missing transactions as soon as possible. 300 func (f *TxFetcher) Enqueue(peer string, txs []*types.Transaction, direct bool) error { 301 var ( 302 inMeter = txReplyInMeter 303 knownMeter = txReplyKnownMeter 304 underpricedMeter = txReplyUnderpricedMeter 305 otherRejectMeter = txReplyOtherRejectMeter 306 ) 307 if !direct { 308 inMeter = txBroadcastInMeter 309 knownMeter = txBroadcastKnownMeter 310 underpricedMeter = txBroadcastUnderpricedMeter 311 otherRejectMeter = txBroadcastOtherRejectMeter 312 } 313 // Keep track of all the propagated transactions 314 inMeter.Mark(int64(len(txs))) 315 316 // Push all the transactions into the pool, tracking underpriced ones to avoid 317 // re-requesting them and dropping the peer in case of malicious transfers. 318 var ( 319 added = make([]common.Hash, 0, len(txs)) 320 metas = make([]txMetadata, 0, len(txs)) 321 ) 322 // proceed in batches 323 for i := 0; i < len(txs); i += 128 { 324 end := i + 128 325 if end > len(txs) { 326 end = len(txs) 327 } 328 var ( 329 duplicate int64 330 underpriced int64 331 otherreject int64 332 ) 333 batch := txs[i:end] 334 335 for j, err := range f.addTxs(batch) { 336 // Track the transaction hash if the price is too low for us. 337 // Avoid re-request this transaction when we receive another 338 // announcement. 339 if errors.Is(err, txpool.ErrUnderpriced) || errors.Is(err, txpool.ErrReplaceUnderpriced) { 340 f.underpriced.Add(batch[j].Hash(), batch[j].Time()) 341 } 342 // Track a few interesting failure types 343 switch { 344 case err == nil: // Noop, but need to handle to not count these 345 346 case errors.Is(err, txpool.ErrAlreadyKnown): 347 duplicate++ 348 349 case errors.Is(err, txpool.ErrUnderpriced) || errors.Is(err, txpool.ErrReplaceUnderpriced): 350 underpriced++ 351 352 default: 353 otherreject++ 354 } 355 added = append(added, batch[j].Hash()) 356 metas = append(metas, txMetadata{ 357 kind: batch[j].Type(), 358 size: uint32(batch[j].Size()), 359 }) 360 } 361 knownMeter.Mark(duplicate) 362 underpricedMeter.Mark(underpriced) 363 otherRejectMeter.Mark(otherreject) 364 365 // If 'other reject' is >25% of the deliveries in any batch, sleep a bit. 366 if otherreject > 128/4 { 367 time.Sleep(200 * time.Millisecond) 368 log.Debug("Peer delivering stale transactions", "peer", peer, "rejected", otherreject) 369 } 370 } 371 select { 372 case f.cleanup <- &txDelivery{origin: peer, hashes: added, metas: metas, direct: direct}: 373 return nil 374 case <-f.quit: 375 return errTerminated 376 } 377 } 378 379 // Drop should be called when a peer disconnects. It cleans up all the internal 380 // data structures of the given node. 381 func (f *TxFetcher) Drop(peer string) error { 382 select { 383 case f.drop <- &txDrop{peer: peer}: 384 return nil 385 case <-f.quit: 386 return errTerminated 387 } 388 } 389 390 // Start boots up the announcement based synchroniser, accepting and processing 391 // hash notifications and block fetches until termination requested. 392 func (f *TxFetcher) Start() { 393 go f.loop() 394 } 395 396 // Stop terminates the announcement based synchroniser, canceling all pending 397 // operations. 398 func (f *TxFetcher) Stop() { 399 close(f.quit) 400 } 401 402 func (f *TxFetcher) loop() { 403 var ( 404 waitTimer = new(mclock.Timer) 405 timeoutTimer = new(mclock.Timer) 406 407 waitTrigger = make(chan struct{}, 1) 408 timeoutTrigger = make(chan struct{}, 1) 409 ) 410 for { 411 select { 412 case ann := <-f.notify: 413 // Drop part of the new announcements if there are too many accumulated. 414 // Note, we could but do not filter already known transactions here as 415 // the probability of something arriving between this call and the pre- 416 // filter outside is essentially zero. 417 used := len(f.waitslots[ann.origin]) + len(f.announces[ann.origin]) 418 if used >= maxTxAnnounces { 419 // This can happen if a set of transactions are requested but not 420 // all fulfilled, so the remainder are rescheduled without the cap 421 // check. Should be fine as the limit is in the thousands and the 422 // request size in the hundreds. 423 txAnnounceDOSMeter.Mark(int64(len(ann.hashes))) 424 break 425 } 426 want := used + len(ann.hashes) 427 if want > maxTxAnnounces { 428 txAnnounceDOSMeter.Mark(int64(want - maxTxAnnounces)) 429 430 ann.hashes = ann.hashes[:want-maxTxAnnounces] 431 ann.metas = ann.metas[:want-maxTxAnnounces] 432 } 433 // All is well, schedule the remainder of the transactions 434 idleWait := len(f.waittime) == 0 435 _, oldPeer := f.announces[ann.origin] 436 437 for i, hash := range ann.hashes { 438 // If the transaction is already downloading, add it to the list 439 // of possible alternates (in case the current retrieval fails) and 440 // also account it for the peer. 441 if f.alternates[hash] != nil { 442 f.alternates[hash][ann.origin] = struct{}{} 443 444 // Stage 2 and 3 share the set of origins per tx 445 if announces := f.announces[ann.origin]; announces != nil { 446 announces[hash] = ann.metas[i] 447 } else { 448 f.announces[ann.origin] = map[common.Hash]*txMetadata{hash: ann.metas[i]} 449 } 450 continue 451 } 452 // If the transaction is not downloading, but is already queued 453 // from a different peer, track it for the new peer too. 454 if f.announced[hash] != nil { 455 f.announced[hash][ann.origin] = struct{}{} 456 457 // Stage 2 and 3 share the set of origins per tx 458 if announces := f.announces[ann.origin]; announces != nil { 459 announces[hash] = ann.metas[i] 460 } else { 461 f.announces[ann.origin] = map[common.Hash]*txMetadata{hash: ann.metas[i]} 462 } 463 continue 464 } 465 // If the transaction is already known to the fetcher, but not 466 // yet downloading, add the peer as an alternate origin in the 467 // waiting list. 468 if f.waitlist[hash] != nil { 469 // Ignore double announcements from the same peer. This is 470 // especially important if metadata is also passed along to 471 // prevent malicious peers flip-flopping good/bad values. 472 if _, ok := f.waitlist[hash][ann.origin]; ok { 473 continue 474 } 475 f.waitlist[hash][ann.origin] = struct{}{} 476 477 if waitslots := f.waitslots[ann.origin]; waitslots != nil { 478 waitslots[hash] = ann.metas[i] 479 } else { 480 f.waitslots[ann.origin] = map[common.Hash]*txMetadata{hash: ann.metas[i]} 481 } 482 continue 483 } 484 // Transaction unknown to the fetcher, insert it into the waiting list 485 f.waitlist[hash] = map[string]struct{}{ann.origin: {}} 486 f.waittime[hash] = f.clock.Now() 487 488 if waitslots := f.waitslots[ann.origin]; waitslots != nil { 489 waitslots[hash] = ann.metas[i] 490 } else { 491 f.waitslots[ann.origin] = map[common.Hash]*txMetadata{hash: ann.metas[i]} 492 } 493 } 494 // If a new item was added to the waitlist, schedule it into the fetcher 495 if idleWait && len(f.waittime) > 0 { 496 f.rescheduleWait(waitTimer, waitTrigger) 497 } 498 // If this peer is new and announced something already queued, maybe 499 // request transactions from them 500 if !oldPeer && len(f.announces[ann.origin]) > 0 { 501 f.scheduleFetches(timeoutTimer, timeoutTrigger, map[string]struct{}{ann.origin: {}}) 502 } 503 504 case <-waitTrigger: 505 // At least one transaction's waiting time ran out, push all expired 506 // ones into the retrieval queues 507 actives := make(map[string]struct{}) 508 for hash, instance := range f.waittime { 509 if time.Duration(f.clock.Now()-instance)+txGatherSlack > txArriveTimeout { 510 // Transaction expired without propagation, schedule for retrieval 511 if f.announced[hash] != nil { 512 panic("announce tracker already contains waitlist item") 513 } 514 f.announced[hash] = f.waitlist[hash] 515 for peer := range f.waitlist[hash] { 516 if announces := f.announces[peer]; announces != nil { 517 announces[hash] = f.waitslots[peer][hash] 518 } else { 519 f.announces[peer] = map[common.Hash]*txMetadata{hash: f.waitslots[peer][hash]} 520 } 521 delete(f.waitslots[peer], hash) 522 if len(f.waitslots[peer]) == 0 { 523 delete(f.waitslots, peer) 524 } 525 actives[peer] = struct{}{} 526 } 527 delete(f.waittime, hash) 528 delete(f.waitlist, hash) 529 } 530 } 531 // If transactions are still waiting for propagation, reschedule the wait timer 532 if len(f.waittime) > 0 { 533 f.rescheduleWait(waitTimer, waitTrigger) 534 } 535 // If any peers became active and are idle, request transactions from them 536 if len(actives) > 0 { 537 f.scheduleFetches(timeoutTimer, timeoutTrigger, actives) 538 } 539 540 case <-timeoutTrigger: 541 // Clean up any expired retrievals and avoid re-requesting them from the 542 // same peer (either overloaded or malicious, useless in both cases). We 543 // could also penalize (Drop), but there's nothing to gain, and if could 544 // possibly further increase the load on it. 545 for peer, req := range f.requests { 546 if time.Duration(f.clock.Now()-req.time)+txGatherSlack > txFetchTimeout { 547 txRequestTimeoutMeter.Mark(int64(len(req.hashes))) 548 549 // Reschedule all the not-yet-delivered fetches to alternate peers 550 for _, hash := range req.hashes { 551 // Skip rescheduling hashes already delivered by someone else 552 if req.stolen != nil { 553 if _, ok := req.stolen[hash]; ok { 554 continue 555 } 556 } 557 // Move the delivery back from fetching to queued 558 if _, ok := f.announced[hash]; ok { 559 panic("announced tracker already contains alternate item") 560 } 561 if f.alternates[hash] != nil { // nil if tx was broadcast during fetch 562 f.announced[hash] = f.alternates[hash] 563 } 564 delete(f.announced[hash], peer) 565 if len(f.announced[hash]) == 0 { 566 delete(f.announced, hash) 567 } 568 delete(f.announces[peer], hash) 569 delete(f.alternates, hash) 570 delete(f.fetching, hash) 571 } 572 if len(f.announces[peer]) == 0 { 573 delete(f.announces, peer) 574 } 575 // Keep track of the request as dangling, but never expire 576 f.requests[peer].hashes = nil 577 } 578 } 579 // Schedule a new transaction retrieval 580 f.scheduleFetches(timeoutTimer, timeoutTrigger, nil) 581 582 // No idea if we scheduled something or not, trigger the timer if needed 583 // TODO(karalabe): this is kind of lame, can't we dump it into scheduleFetches somehow? 584 f.rescheduleTimeout(timeoutTimer, timeoutTrigger) 585 586 case delivery := <-f.cleanup: 587 // Independent if the delivery was direct or broadcast, remove all 588 // traces of the hash from internal trackers. That said, compare any 589 // advertised metadata with the real ones and drop bad peers. 590 for i, hash := range delivery.hashes { 591 if _, ok := f.waitlist[hash]; ok { 592 for peer, txset := range f.waitslots { 593 if meta := txset[hash]; meta != nil { 594 if delivery.metas[i].kind != meta.kind { 595 log.Warn("Announced transaction type mismatch", "peer", peer, "tx", hash, "type", delivery.metas[i].kind, "ann", meta.kind) 596 f.dropPeer(peer) 597 } else if delivery.metas[i].size != meta.size { 598 if math.Abs(float64(delivery.metas[i].size)-float64(meta.size)) > 8 { 599 log.Warn("Announced transaction size mismatch", "peer", peer, "tx", hash, "size", delivery.metas[i].size, "ann", meta.size) 600 601 // Normally we should drop a peer considering this is a protocol violation. 602 // However, due to the RLP vs consensus format messyness, allow a few bytes 603 // wiggle-room where we only warn, but don't drop. 604 // 605 // TODO(karalabe): Get rid of this relaxation when clients are proven stable. 606 f.dropPeer(peer) 607 } 608 } 609 } 610 delete(txset, hash) 611 if len(txset) == 0 { 612 delete(f.waitslots, peer) 613 } 614 } 615 delete(f.waitlist, hash) 616 delete(f.waittime, hash) 617 } else { 618 for peer, txset := range f.announces { 619 if meta := txset[hash]; meta != nil { 620 if delivery.metas[i].kind != meta.kind { 621 log.Warn("Announced transaction type mismatch", "peer", peer, "tx", hash, "type", delivery.metas[i].kind, "ann", meta.kind) 622 f.dropPeer(peer) 623 } else if delivery.metas[i].size != meta.size { 624 if math.Abs(float64(delivery.metas[i].size)-float64(meta.size)) > 8 { 625 log.Warn("Announced transaction size mismatch", "peer", peer, "tx", hash, "size", delivery.metas[i].size, "ann", meta.size) 626 627 // Normally we should drop a peer considering this is a protocol violation. 628 // However, due to the RLP vs consensus format messyness, allow a few bytes 629 // wiggle-room where we only warn, but don't drop. 630 // 631 // TODO(karalabe): Get rid of this relaxation when clients are proven stable. 632 f.dropPeer(peer) 633 } 634 } 635 } 636 delete(txset, hash) 637 if len(txset) == 0 { 638 delete(f.announces, peer) 639 } 640 } 641 delete(f.announced, hash) 642 delete(f.alternates, hash) 643 644 // If a transaction currently being fetched from a different 645 // origin was delivered (delivery stolen), mark it so the 646 // actual delivery won't double schedule it. 647 if origin, ok := f.fetching[hash]; ok && (origin != delivery.origin || !delivery.direct) { 648 stolen := f.requests[origin].stolen 649 if stolen == nil { 650 f.requests[origin].stolen = make(map[common.Hash]struct{}) 651 stolen = f.requests[origin].stolen 652 } 653 stolen[hash] = struct{}{} 654 } 655 delete(f.fetching, hash) 656 } 657 } 658 // In case of a direct delivery, also reschedule anything missing 659 // from the original query 660 if delivery.direct { 661 // Mark the requesting successful (independent of individual status) 662 txRequestDoneMeter.Mark(int64(len(delivery.hashes))) 663 664 // Make sure something was pending, nuke it 665 req := f.requests[delivery.origin] 666 if req == nil { 667 log.Warn("Unexpected transaction delivery", "peer", delivery.origin) 668 break 669 } 670 delete(f.requests, delivery.origin) 671 672 // Anything not delivered should be re-scheduled (with or without 673 // this peer, depending on the response cutoff) 674 delivered := make(map[common.Hash]struct{}) 675 for _, hash := range delivery.hashes { 676 delivered[hash] = struct{}{} 677 } 678 cutoff := len(req.hashes) // If nothing is delivered, assume everything is missing, don't retry!!! 679 for i, hash := range req.hashes { 680 if _, ok := delivered[hash]; ok { 681 cutoff = i 682 } 683 } 684 // Reschedule missing hashes from alternates, not-fulfilled from alt+self 685 for i, hash := range req.hashes { 686 // Skip rescheduling hashes already delivered by someone else 687 if req.stolen != nil { 688 if _, ok := req.stolen[hash]; ok { 689 continue 690 } 691 } 692 if _, ok := delivered[hash]; !ok { 693 if i < cutoff { 694 delete(f.alternates[hash], delivery.origin) 695 delete(f.announces[delivery.origin], hash) 696 if len(f.announces[delivery.origin]) == 0 { 697 delete(f.announces, delivery.origin) 698 } 699 } 700 if len(f.alternates[hash]) > 0 { 701 if _, ok := f.announced[hash]; ok { 702 panic(fmt.Sprintf("announced tracker already contains alternate item: %v", f.announced[hash])) 703 } 704 f.announced[hash] = f.alternates[hash] 705 } 706 } 707 delete(f.alternates, hash) 708 delete(f.fetching, hash) 709 } 710 // Something was delivered, try to reschedule requests 711 f.scheduleFetches(timeoutTimer, timeoutTrigger, nil) // Partial delivery may enable others to deliver too 712 } 713 714 case drop := <-f.drop: 715 // A peer was dropped, remove all traces of it 716 if _, ok := f.waitslots[drop.peer]; ok { 717 for hash := range f.waitslots[drop.peer] { 718 delete(f.waitlist[hash], drop.peer) 719 if len(f.waitlist[hash]) == 0 { 720 delete(f.waitlist, hash) 721 delete(f.waittime, hash) 722 } 723 } 724 delete(f.waitslots, drop.peer) 725 if len(f.waitlist) > 0 { 726 f.rescheduleWait(waitTimer, waitTrigger) 727 } 728 } 729 // Clean up any active requests 730 var request *txRequest 731 if request = f.requests[drop.peer]; request != nil { 732 for _, hash := range request.hashes { 733 // Skip rescheduling hashes already delivered by someone else 734 if request.stolen != nil { 735 if _, ok := request.stolen[hash]; ok { 736 continue 737 } 738 } 739 // Undelivered hash, reschedule if there's an alternative origin available 740 delete(f.alternates[hash], drop.peer) 741 if len(f.alternates[hash]) == 0 { 742 delete(f.alternates, hash) 743 } else { 744 f.announced[hash] = f.alternates[hash] 745 delete(f.alternates, hash) 746 } 747 delete(f.fetching, hash) 748 } 749 delete(f.requests, drop.peer) 750 } 751 // Clean up general announcement tracking 752 if _, ok := f.announces[drop.peer]; ok { 753 for hash := range f.announces[drop.peer] { 754 delete(f.announced[hash], drop.peer) 755 if len(f.announced[hash]) == 0 { 756 delete(f.announced, hash) 757 } 758 } 759 delete(f.announces, drop.peer) 760 } 761 // If a request was cancelled, check if anything needs to be rescheduled 762 if request != nil { 763 f.scheduleFetches(timeoutTimer, timeoutTrigger, nil) 764 f.rescheduleTimeout(timeoutTimer, timeoutTrigger) 765 } 766 767 case <-f.quit: 768 return 769 } 770 // No idea what happened, but bump some sanity metrics 771 txFetcherWaitingPeers.Update(int64(len(f.waitslots))) 772 txFetcherWaitingHashes.Update(int64(len(f.waitlist))) 773 txFetcherQueueingPeers.Update(int64(len(f.announces) - len(f.requests))) 774 txFetcherQueueingHashes.Update(int64(len(f.announced))) 775 txFetcherFetchingPeers.Update(int64(len(f.requests))) 776 txFetcherFetchingHashes.Update(int64(len(f.fetching))) 777 778 // Loop did something, ping the step notifier if needed (tests) 779 if f.step != nil { 780 f.step <- struct{}{} 781 } 782 } 783 } 784 785 // rescheduleWait iterates over all the transactions currently in the waitlist 786 // and schedules the movement into the fetcher for the earliest. 787 // 788 // The method has a granularity of 'txGatherSlack', since there's not much point in 789 // spinning over all the transactions just to maybe find one that should trigger 790 // a few ms earlier. 791 func (f *TxFetcher) rescheduleWait(timer *mclock.Timer, trigger chan struct{}) { 792 if *timer != nil { 793 (*timer).Stop() 794 } 795 now := f.clock.Now() 796 797 earliest := now 798 for _, instance := range f.waittime { 799 if earliest > instance { 800 earliest = instance 801 if txArriveTimeout-time.Duration(now-earliest) < txGatherSlack { 802 break 803 } 804 } 805 } 806 *timer = f.clock.AfterFunc(txArriveTimeout-time.Duration(now-earliest), func() { 807 trigger <- struct{}{} 808 }) 809 } 810 811 // rescheduleTimeout iterates over all the transactions currently in flight and 812 // schedules a cleanup run when the first would trigger. 813 // 814 // The method has a granularity of 'txGatherSlack', since there's not much point in 815 // spinning over all the transactions just to maybe find one that should trigger 816 // a few ms earlier. 817 // 818 // This method is a bit "flaky" "by design". In theory the timeout timer only ever 819 // should be rescheduled if some request is pending. In practice, a timeout will 820 // cause the timer to be rescheduled every 5 secs (until the peer comes through or 821 // disconnects). This is a limitation of the fetcher code because we don't trac 822 // pending requests and timed out requests separately. Without double tracking, if 823 // we simply didn't reschedule the timer on all-timeout then the timer would never 824 // be set again since len(request) > 0 => something's running. 825 func (f *TxFetcher) rescheduleTimeout(timer *mclock.Timer, trigger chan struct{}) { 826 if *timer != nil { 827 (*timer).Stop() 828 } 829 now := f.clock.Now() 830 831 earliest := now 832 for _, req := range f.requests { 833 // If this request already timed out, skip it altogether 834 if req.hashes == nil { 835 continue 836 } 837 if earliest > req.time { 838 earliest = req.time 839 if txFetchTimeout-time.Duration(now-earliest) < txGatherSlack { 840 break 841 } 842 } 843 } 844 *timer = f.clock.AfterFunc(txFetchTimeout-time.Duration(now-earliest), func() { 845 trigger <- struct{}{} 846 }) 847 } 848 849 // scheduleFetches starts a batch of retrievals for all available idle peers. 850 func (f *TxFetcher) scheduleFetches(timer *mclock.Timer, timeout chan struct{}, whitelist map[string]struct{}) { 851 // Gather the set of peers we want to retrieve from (default to all) 852 actives := whitelist 853 if actives == nil { 854 actives = make(map[string]struct{}) 855 for peer := range f.announces { 856 actives[peer] = struct{}{} 857 } 858 } 859 if len(actives) == 0 { 860 return 861 } 862 // For each active peer, try to schedule some transaction fetches 863 idle := len(f.requests) == 0 864 865 f.forEachPeer(actives, func(peer string) { 866 if f.requests[peer] != nil { 867 return // continue in the for-each 868 } 869 if len(f.announces[peer]) == 0 { 870 return // continue in the for-each 871 } 872 var ( 873 hashes = make([]common.Hash, 0, maxTxRetrievals) 874 bytes uint64 875 ) 876 f.forEachAnnounce(f.announces[peer], func(hash common.Hash, meta *txMetadata) bool { 877 // If the transaction is already fetching, skip to the next one 878 if _, ok := f.fetching[hash]; ok { 879 return true 880 } 881 // Mark the hash as fetching and stash away possible alternates 882 f.fetching[hash] = peer 883 884 if _, ok := f.alternates[hash]; ok { 885 panic(fmt.Sprintf("alternate tracker already contains fetching item: %v", f.alternates[hash])) 886 } 887 f.alternates[hash] = f.announced[hash] 888 delete(f.announced, hash) 889 890 // Accumulate the hash and stop if the limit was reached 891 hashes = append(hashes, hash) 892 if len(hashes) >= maxTxRetrievals { 893 return false // break in the for-each 894 } 895 if meta != nil { // Only set eth/68 and upwards 896 bytes += uint64(meta.size) 897 if bytes >= maxTxRetrievalSize { 898 return false 899 } 900 } 901 return true // scheduled, try to add more 902 }) 903 // If any hashes were allocated, request them from the peer 904 if len(hashes) > 0 { 905 f.requests[peer] = &txRequest{hashes: hashes, time: f.clock.Now()} 906 txRequestOutMeter.Mark(int64(len(hashes))) 907 908 go func(peer string, hashes []common.Hash) { 909 // Try to fetch the transactions, but in case of a request 910 // failure (e.g. peer disconnected), reschedule the hashes. 911 if err := f.fetchTxs(peer, hashes); err != nil { 912 txRequestFailMeter.Mark(int64(len(hashes))) 913 f.Drop(peer) 914 } 915 }(peer, hashes) 916 } 917 }) 918 // If a new request was fired, schedule a timeout timer 919 if idle && len(f.requests) > 0 { 920 f.rescheduleTimeout(timer, timeout) 921 } 922 } 923 924 // forEachPeer does a range loop over a map of peers in production, but during 925 // testing it does a deterministic sorted random to allow reproducing issues. 926 func (f *TxFetcher) forEachPeer(peers map[string]struct{}, do func(peer string)) { 927 // If we're running production, use whatever Go's map gives us 928 if f.rand == nil { 929 for peer := range peers { 930 do(peer) 931 } 932 return 933 } 934 // We're running the test suite, make iteration deterministic 935 list := make([]string, 0, len(peers)) 936 for peer := range peers { 937 list = append(list, peer) 938 } 939 sort.Strings(list) 940 rotateStrings(list, f.rand.Intn(len(list))) 941 for _, peer := range list { 942 do(peer) 943 } 944 } 945 946 // forEachAnnounce does a range loop over a map of announcements in production, 947 // but during testing it does a deterministic sorted random to allow reproducing 948 // issues. 949 func (f *TxFetcher) forEachAnnounce(announces map[common.Hash]*txMetadata, do func(hash common.Hash, meta *txMetadata) bool) { 950 // If we're running production, use whatever Go's map gives us 951 if f.rand == nil { 952 for hash, meta := range announces { 953 if !do(hash, meta) { 954 return 955 } 956 } 957 return 958 } 959 // We're running the test suite, make iteration deterministic 960 list := make([]common.Hash, 0, len(announces)) 961 for hash := range announces { 962 list = append(list, hash) 963 } 964 sortHashes(list) 965 rotateHashes(list, f.rand.Intn(len(list))) 966 for _, hash := range list { 967 if !do(hash, announces[hash]) { 968 return 969 } 970 } 971 } 972 973 // rotateStrings rotates the contents of a slice by n steps. This method is only 974 // used in tests to simulate random map iteration but keep it deterministic. 975 func rotateStrings(slice []string, n int) { 976 orig := make([]string, len(slice)) 977 copy(orig, slice) 978 979 for i := 0; i < len(orig); i++ { 980 slice[i] = orig[(i+n)%len(orig)] 981 } 982 } 983 984 // sortHashes sorts a slice of hashes. This method is only used in tests in order 985 // to simulate random map iteration but keep it deterministic. 986 func sortHashes(slice []common.Hash) { 987 for i := 0; i < len(slice); i++ { 988 for j := i + 1; j < len(slice); j++ { 989 if bytes.Compare(slice[i][:], slice[j][:]) > 0 { 990 slice[i], slice[j] = slice[j], slice[i] 991 } 992 } 993 } 994 } 995 996 // rotateHashes rotates the contents of a slice by n steps. This method is only 997 // used in tests to simulate random map iteration but keep it deterministic. 998 func rotateHashes(slice []common.Hash, n int) { 999 orig := make([]common.Hash, len(slice)) 1000 copy(orig, slice) 1001 1002 for i := 0; i < len(orig); i++ { 1003 slice[i] = orig[(i+n)%len(orig)] 1004 } 1005 }