github.com/dominant-strategies/go-quai@v0.28.2/eth/fetcher/tx_fetcher.go (about)

     1  // Copyright 2020 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package fetcher
    18  
    19  import (
    20  	"bytes"
    21  	"fmt"
    22  	mrand "math/rand"
    23  	"sort"
    24  	"time"
    25  
    26  	mapset "github.com/deckarep/golang-set"
    27  	"github.com/dominant-strategies/go-quai/common"
    28  	"github.com/dominant-strategies/go-quai/common/mclock"
    29  	"github.com/dominant-strategies/go-quai/core"
    30  	"github.com/dominant-strategies/go-quai/core/types"
    31  	"github.com/dominant-strategies/go-quai/log"
    32  	"github.com/dominant-strategies/go-quai/metrics"
    33  )
    34  
    35  const (
    36  	// maxTxAnnounces is the maximum number of unique transaction a peer
    37  	// can announce in a short time.
    38  	maxTxAnnounces = 4096
    39  
    40  	// maxTxRetrievals is the maximum transaction number can be fetched in one
    41  	// request. The rationale to pick 256 is:
    42  	//   - In eth protocol, the softResponseLimit is 2MB. Nowadays according to
    43  	//     Etherscan the average transaction size is around 200B, so in theory
    44  	//     we can include lots of transaction in a single protocol packet.
    45  	//   - However the maximum size of a single transaction is raised to 128KB,
    46  	//     so pick a middle value here to ensure we can maximize the efficiency
    47  	//     of the retrieval and response size overflow won't happen in most cases.
    48  	maxTxRetrievals = 256
    49  
    50  	// maxTxUnderpricedSetSize is the size of the underpriced transaction set that
    51  	// is used to track recent transactions that have been dropped so we don't
    52  	// re-request them.
    53  	maxTxUnderpricedSetSize = 32768
    54  
    55  	// txArriveTimeout is the time allowance before an announced transaction is
    56  	// explicitly requested.
    57  	txArriveTimeout = 500 * time.Millisecond
    58  
    59  	// txGatherSlack is the interval used to collate almost-expired announces
    60  	// with network fetches.
    61  	txGatherSlack = 100 * time.Millisecond
    62  )
    63  
    64  var (
    65  	// txFetchTimeout is the maximum allotted time to return an explicitly
    66  	// requested transaction.
    67  	txFetchTimeout = 5 * time.Second
    68  )
    69  
    70  var (
    71  	txAnnounceInMeter          = metrics.NewRegisteredMeter("eth/fetcher/transaction/announces/in", nil)
    72  	txAnnounceKnownMeter       = metrics.NewRegisteredMeter("eth/fetcher/transaction/announces/known", nil)
    73  	txAnnounceUnderpricedMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/announces/underpriced", nil)
    74  	txAnnounceDOSMeter         = metrics.NewRegisteredMeter("eth/fetcher/transaction/announces/dos", nil)
    75  
    76  	txBroadcastInMeter          = metrics.NewRegisteredMeter("eth/fetcher/transaction/broadcasts/in", nil)
    77  	txBroadcastKnownMeter       = metrics.NewRegisteredMeter("eth/fetcher/transaction/broadcasts/known", nil)
    78  	txBroadcastUnderpricedMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/broadcasts/underpriced", nil)
    79  	txBroadcastOtherRejectMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/broadcasts/otherreject", nil)
    80  
    81  	txRequestOutMeter     = metrics.NewRegisteredMeter("eth/fetcher/transaction/request/out", nil)
    82  	txRequestFailMeter    = metrics.NewRegisteredMeter("eth/fetcher/transaction/request/fail", nil)
    83  	txRequestDoneMeter    = metrics.NewRegisteredMeter("eth/fetcher/transaction/request/done", nil)
    84  	txRequestTimeoutMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/request/timeout", nil)
    85  
    86  	txReplyInMeter          = metrics.NewRegisteredMeter("eth/fetcher/transaction/replies/in", nil)
    87  	txReplyKnownMeter       = metrics.NewRegisteredMeter("eth/fetcher/transaction/replies/known", nil)
    88  	txReplyUnderpricedMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/replies/underpriced", nil)
    89  	txReplyOtherRejectMeter = metrics.NewRegisteredMeter("eth/fetcher/transaction/replies/otherreject", nil)
    90  
    91  	txFetcherWaitingPeers   = metrics.NewRegisteredGauge("eth/fetcher/transaction/waiting/peers", nil)
    92  	txFetcherWaitingHashes  = metrics.NewRegisteredGauge("eth/fetcher/transaction/waiting/hashes", nil)
    93  	txFetcherQueueingPeers  = metrics.NewRegisteredGauge("eth/fetcher/transaction/queueing/peers", nil)
    94  	txFetcherQueueingHashes = metrics.NewRegisteredGauge("eth/fetcher/transaction/queueing/hashes", nil)
    95  	txFetcherFetchingPeers  = metrics.NewRegisteredGauge("eth/fetcher/transaction/fetching/peers", nil)
    96  	txFetcherFetchingHashes = metrics.NewRegisteredGauge("eth/fetcher/transaction/fetching/hashes", nil)
    97  )
    98  
    99  // txAnnounce is the notification of the availability of a batch
   100  // of new transactions in the network.
   101  type txAnnounce struct {
   102  	origin string        // Identifier of the peer originating the notification
   103  	hashes []common.Hash // Batch of transaction hashes being announced
   104  }
   105  
   106  // txRequest represents an in-flight transaction retrieval request destined to
   107  // a specific peers.
   108  type txRequest struct {
   109  	hashes []common.Hash            // Transactions having been requested
   110  	stolen map[common.Hash]struct{} // Deliveries by someone else (don't re-request)
   111  	time   mclock.AbsTime           // Timestamp of the request
   112  }
   113  
   114  // txDelivery is the notification that a batch of transactions have been added
   115  // to the pool and should be untracked.
   116  type txDelivery struct {
   117  	origin string        // Identifier of the peer originating the notification
   118  	hashes []common.Hash // Batch of transaction hashes having been delivered
   119  	direct bool          // Whether this is a direct reply or a broadcast
   120  }
   121  
   122  // txDrop is the notiication that a peer has disconnected.
   123  type txDrop struct {
   124  	peer string
   125  }
   126  
   127  // TxFetcher is responsible for retrieving new transaction based on announcements.
   128  //
   129  // The fetcher operates in 3 stages:
   130  //   - Transactions that are newly discovered are moved into a wait list.
   131  //   - After ~500ms passes, transactions from the wait list that have not been
   132  //     broadcast to us in whole are moved into a queueing area.
   133  //   - When a connected peer doesn't have in-flight retrieval requests, any
   134  //     transaction queued up (and announced by the peer) are allocated to the
   135  //     peer and moved into a fetching status until it's fulfilled or fails.
   136  //
   137  // The invariants of the fetcher are:
   138  //   - Each tracked transaction (hash) must only be present in one of the
   139  //     three stages. This ensures that the fetcher operates akin to a finite
   140  //     state automata and there's do data leak.
   141  //   - Each peer that announced transactions may be scheduled retrievals, but
   142  //     only ever one concurrently. This ensures we can immediately know what is
   143  //     missing from a reply and reschedule it.
   144  type TxFetcher struct {
   145  	notify  chan *txAnnounce
   146  	cleanup chan *txDelivery
   147  	drop    chan *txDrop
   148  	quit    chan struct{}
   149  
   150  	underpriced mapset.Set // Transactions discarded as too cheap (don't re-fetch)
   151  
   152  	// Stage 1: Waiting lists for newly discovered transactions that might be
   153  	// broadcast without needing explicit request/reply round trips.
   154  	waitlist  map[common.Hash]map[string]struct{} // Transactions waiting for an potential broadcast
   155  	waittime  map[common.Hash]mclock.AbsTime      // Timestamps when transactions were added to the waitlist
   156  	waitslots map[string]map[common.Hash]struct{} // Waiting announcement sgroupped by peer (DoS protection)
   157  
   158  	// Stage 2: Queue of transactions that waiting to be allocated to some peer
   159  	// to be retrieved directly.
   160  	announces map[string]map[common.Hash]struct{} // Set of announced transactions, grouped by origin peer
   161  	announced map[common.Hash]map[string]struct{} // Set of download locations, grouped by transaction hash
   162  
   163  	// Stage 3: Set of transactions currently being retrieved, some which may be
   164  	// fulfilled and some rescheduled. Note, this step shares 'announces' from the
   165  	// previous stage to avoid having to duplicate (need it for DoS checks).
   166  	fetching   map[common.Hash]string              // Transaction set currently being retrieved
   167  	requests   map[string]*txRequest               // In-flight transaction retrievals
   168  	alternates map[common.Hash]map[string]struct{} // In-flight transaction alternate origins if retrieval fails
   169  
   170  	// Callbacks
   171  	hasTx    func(common.Hash) bool             // Retrieves a tx from the local txpool
   172  	addTxs   func([]*types.Transaction) []error // Insert a batch of transactions into local txpool
   173  	fetchTxs func(string, []common.Hash) error  // Retrieves a set of txs from a remote peer
   174  
   175  	step  chan struct{} // Notification channel when the fetcher loop iterates
   176  	clock mclock.Clock  // Time wrapper to simulate in tests
   177  	rand  *mrand.Rand   // Randomizer to use in tests instead of map range loops (soft-random)
   178  }
   179  
   180  // NewTxFetcher creates a transaction fetcher to retrieve transaction
   181  // based on hash announcements.
   182  func NewTxFetcher(hasTx func(common.Hash) bool, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error) *TxFetcher {
   183  	return NewTxFetcherForTests(hasTx, addTxs, fetchTxs, mclock.System{}, nil)
   184  }
   185  
   186  // NewTxFetcherForTests is a testing method to mock out the realtime clock with
   187  // a simulated version and the internal randomness with a deterministic one.
   188  func NewTxFetcherForTests(
   189  	hasTx func(common.Hash) bool, addTxs func([]*types.Transaction) []error, fetchTxs func(string, []common.Hash) error,
   190  	clock mclock.Clock, rand *mrand.Rand) *TxFetcher {
   191  	return &TxFetcher{
   192  		notify:      make(chan *txAnnounce),
   193  		cleanup:     make(chan *txDelivery),
   194  		drop:        make(chan *txDrop),
   195  		quit:        make(chan struct{}),
   196  		waitlist:    make(map[common.Hash]map[string]struct{}),
   197  		waittime:    make(map[common.Hash]mclock.AbsTime),
   198  		waitslots:   make(map[string]map[common.Hash]struct{}),
   199  		announces:   make(map[string]map[common.Hash]struct{}),
   200  		announced:   make(map[common.Hash]map[string]struct{}),
   201  		fetching:    make(map[common.Hash]string),
   202  		requests:    make(map[string]*txRequest),
   203  		alternates:  make(map[common.Hash]map[string]struct{}),
   204  		underpriced: mapset.NewSet(),
   205  		hasTx:       hasTx,
   206  		addTxs:      addTxs,
   207  		fetchTxs:    fetchTxs,
   208  		clock:       clock,
   209  		rand:        rand,
   210  	}
   211  }
   212  
   213  // Notify announces the fetcher of the potential availability of a new batch of
   214  // transactions in the network.
   215  func (f *TxFetcher) Notify(peer string, hashes []common.Hash) error {
   216  	// Keep track of all the announced transactions
   217  	txAnnounceInMeter.Mark(int64(len(hashes)))
   218  
   219  	// Skip any transaction announcements that we already know of, or that we've
   220  	// previously marked as cheap and discarded. This check is of course racey,
   221  	// because multiple concurrent notifies will still manage to pass it, but it's
   222  	// still valuable to check here because it runs concurrent  to the internal
   223  	// loop, so anything caught here is time saved internally.
   224  	var (
   225  		unknowns               = make([]common.Hash, 0, len(hashes))
   226  		duplicate, underpriced int64
   227  	)
   228  	for _, hash := range hashes {
   229  		switch {
   230  		case f.hasTx(hash):
   231  			duplicate++
   232  
   233  		case f.underpriced.Contains(hash):
   234  			underpriced++
   235  
   236  		default:
   237  			unknowns = append(unknowns, hash)
   238  		}
   239  	}
   240  	txAnnounceKnownMeter.Mark(duplicate)
   241  	txAnnounceUnderpricedMeter.Mark(underpriced)
   242  
   243  	// If anything's left to announce, push it into the internal loop
   244  	if len(unknowns) == 0 {
   245  		return nil
   246  	}
   247  	announce := &txAnnounce{
   248  		origin: peer,
   249  		hashes: unknowns,
   250  	}
   251  	select {
   252  	case f.notify <- announce:
   253  		return nil
   254  	case <-f.quit:
   255  		return errTerminated
   256  	}
   257  }
   258  
   259  // Enqueue imports a batch of received transaction into the transaction pool
   260  // and the fetcher. This method may be called by both transaction broadcasts and
   261  // direct request replies. The differentiation is important so the fetcher can
   262  // re-shedule missing transactions as soon as possible.
   263  func (f *TxFetcher) Enqueue(peer string, txs []*types.Transaction, direct bool) error {
   264  	// Keep track of all the propagated transactions
   265  	if direct {
   266  		txReplyInMeter.Mark(int64(len(txs)))
   267  	} else {
   268  		txBroadcastInMeter.Mark(int64(len(txs)))
   269  	}
   270  	// Push all the transactions into the pool, tracking underpriced ones to avoid
   271  	// re-requesting them and dropping the peer in case of malicious transfers.
   272  	var (
   273  		added       = make([]common.Hash, 0, len(txs))
   274  		duplicate   int64
   275  		underpriced int64
   276  		otherreject int64
   277  	)
   278  	errs := f.addTxs(txs)
   279  	for i, err := range errs {
   280  		if err != nil {
   281  			if err.Error() != core.ErrAlreadyKnown.Error() {
   282  				log.Debug("Failed to add transaction", "hash", txs[i].Hash().String(), "err", err)
   283  			}
   284  			// Track the transaction hash if the price is too low for us.
   285  			// Avoid re-request this transaction when we receive another
   286  			// announcement.
   287  			if err == core.ErrUnderpriced || err == core.ErrReplaceUnderpriced {
   288  				for f.underpriced.Cardinality() >= maxTxUnderpricedSetSize {
   289  					f.underpriced.Pop()
   290  				}
   291  				f.underpriced.Add(txs[i].Hash())
   292  			}
   293  			// Track a few interesting failure types
   294  			switch err {
   295  			case nil: // Noop, but need to handle to not count these
   296  
   297  			case core.ErrAlreadyKnown:
   298  				duplicate++
   299  
   300  			case core.ErrUnderpriced, core.ErrReplaceUnderpriced:
   301  				underpriced++
   302  
   303  			default:
   304  				otherreject++
   305  			}
   306  		}
   307  		added = append(added, txs[i].Hash())
   308  	}
   309  	if direct {
   310  		txReplyKnownMeter.Mark(duplicate)
   311  		txReplyUnderpricedMeter.Mark(underpriced)
   312  		txReplyOtherRejectMeter.Mark(otherreject)
   313  	} else {
   314  		txBroadcastKnownMeter.Mark(duplicate)
   315  		txBroadcastUnderpricedMeter.Mark(underpriced)
   316  		txBroadcastOtherRejectMeter.Mark(otherreject)
   317  	}
   318  	select {
   319  	case f.cleanup <- &txDelivery{origin: peer, hashes: added, direct: direct}:
   320  		return nil
   321  	case <-f.quit:
   322  		return errTerminated
   323  	}
   324  }
   325  
   326  // Drop should be called when a peer disconnects. It cleans up all the internal
   327  // data structures of the given node.
   328  func (f *TxFetcher) Drop(peer string) error {
   329  	select {
   330  	case f.drop <- &txDrop{peer: peer}:
   331  		return nil
   332  	case <-f.quit:
   333  		return errTerminated
   334  	}
   335  }
   336  
   337  // Start boots up the announcement based synchroniser, accepting and processing
   338  // hash notifications and block fetches until termination requested.
   339  func (f *TxFetcher) Start() {
   340  	go f.loop()
   341  }
   342  
   343  // Stop terminates the announcement based synchroniser, canceling all pending
   344  // operations.
   345  func (f *TxFetcher) Stop() {
   346  	close(f.quit)
   347  }
   348  
   349  func (f *TxFetcher) loop() {
   350  	var (
   351  		waitTimer    = new(mclock.Timer)
   352  		timeoutTimer = new(mclock.Timer)
   353  
   354  		waitTrigger    = make(chan struct{}, 1)
   355  		timeoutTrigger = make(chan struct{}, 1)
   356  	)
   357  	for {
   358  		select {
   359  		case ann := <-f.notify:
   360  			// Drop part of the new announcements if there are too many accumulated.
   361  			// Note, we could but do not filter already known transactions here as
   362  			// the probability of something arriving between this call and the pre-
   363  			// filter outside is essentially zero.
   364  			used := len(f.waitslots[ann.origin]) + len(f.announces[ann.origin])
   365  			if used >= maxTxAnnounces {
   366  				// This can happen if a set of transactions are requested but not
   367  				// all fulfilled, so the remainder are rescheduled without the cap
   368  				// check. Should be fine as the limit is in the thousands and the
   369  				// request size in the hundreds.
   370  				txAnnounceDOSMeter.Mark(int64(len(ann.hashes)))
   371  				break
   372  			}
   373  			want := used + len(ann.hashes)
   374  			if want > maxTxAnnounces {
   375  				txAnnounceDOSMeter.Mark(int64(want - maxTxAnnounces))
   376  				ann.hashes = ann.hashes[:want-maxTxAnnounces]
   377  			}
   378  			// All is well, schedule the remainder of the transactions
   379  			idleWait := len(f.waittime) == 0
   380  			_, oldPeer := f.announces[ann.origin]
   381  
   382  			for _, hash := range ann.hashes {
   383  				// If the transaction is already downloading, add it to the list
   384  				// of possible alternates (in case the current retrieval fails) and
   385  				// also account it for the peer.
   386  				if f.alternates[hash] != nil {
   387  					f.alternates[hash][ann.origin] = struct{}{}
   388  
   389  					// Stage 2 and 3 share the set of origins per tx
   390  					if announces := f.announces[ann.origin]; announces != nil {
   391  						announces[hash] = struct{}{}
   392  					} else {
   393  						f.announces[ann.origin] = map[common.Hash]struct{}{hash: {}}
   394  					}
   395  					continue
   396  				}
   397  				// If the transaction is not downloading, but is already queued
   398  				// from a different peer, track it for the new peer too.
   399  				if f.announced[hash] != nil {
   400  					f.announced[hash][ann.origin] = struct{}{}
   401  
   402  					// Stage 2 and 3 share the set of origins per tx
   403  					if announces := f.announces[ann.origin]; announces != nil {
   404  						announces[hash] = struct{}{}
   405  					} else {
   406  						f.announces[ann.origin] = map[common.Hash]struct{}{hash: {}}
   407  					}
   408  					continue
   409  				}
   410  				// If the transaction is already known to the fetcher, but not
   411  				// yet downloading, add the peer as an alternate origin in the
   412  				// waiting list.
   413  				if f.waitlist[hash] != nil {
   414  					f.waitlist[hash][ann.origin] = struct{}{}
   415  
   416  					if waitslots := f.waitslots[ann.origin]; waitslots != nil {
   417  						waitslots[hash] = struct{}{}
   418  					} else {
   419  						f.waitslots[ann.origin] = map[common.Hash]struct{}{hash: {}}
   420  					}
   421  					continue
   422  				}
   423  				// Transaction unknown to the fetcher, insert it into the waiting list
   424  				f.waitlist[hash] = map[string]struct{}{ann.origin: {}}
   425  				f.waittime[hash] = f.clock.Now()
   426  
   427  				if waitslots := f.waitslots[ann.origin]; waitslots != nil {
   428  					waitslots[hash] = struct{}{}
   429  				} else {
   430  					f.waitslots[ann.origin] = map[common.Hash]struct{}{hash: {}}
   431  				}
   432  			}
   433  			// If a new item was added to the waitlist, schedule it into the fetcher
   434  			if idleWait && len(f.waittime) > 0 {
   435  				f.rescheduleWait(waitTimer, waitTrigger)
   436  			}
   437  			// If this peer is new and announced something already queued, maybe
   438  			// request transactions from them
   439  			if !oldPeer && len(f.announces[ann.origin]) > 0 {
   440  				f.scheduleFetches(timeoutTimer, timeoutTrigger, map[string]struct{}{ann.origin: {}})
   441  			}
   442  
   443  		case <-waitTrigger:
   444  			// At least one transaction's waiting time ran out, push all expired
   445  			// ones into the retrieval queues
   446  			actives := make(map[string]struct{})
   447  			for hash, instance := range f.waittime {
   448  				if time.Duration(f.clock.Now()-instance)+txGatherSlack > txArriveTimeout {
   449  					// Transaction expired without propagation, schedule for retrieval
   450  					if f.announced[hash] != nil {
   451  						panic("announce tracker already contains waitlist item")
   452  					}
   453  					f.announced[hash] = f.waitlist[hash]
   454  					for peer := range f.waitlist[hash] {
   455  						if announces := f.announces[peer]; announces != nil {
   456  							announces[hash] = struct{}{}
   457  						} else {
   458  							f.announces[peer] = map[common.Hash]struct{}{hash: {}}
   459  						}
   460  						delete(f.waitslots[peer], hash)
   461  						if len(f.waitslots[peer]) == 0 {
   462  							delete(f.waitslots, peer)
   463  						}
   464  						actives[peer] = struct{}{}
   465  					}
   466  					delete(f.waittime, hash)
   467  					delete(f.waitlist, hash)
   468  				}
   469  			}
   470  			// If transactions are still waiting for propagation, reschedule the wait timer
   471  			if len(f.waittime) > 0 {
   472  				f.rescheduleWait(waitTimer, waitTrigger)
   473  			}
   474  			// If any peers became active and are idle, request transactions from them
   475  			if len(actives) > 0 {
   476  				f.scheduleFetches(timeoutTimer, timeoutTrigger, actives)
   477  			}
   478  
   479  		case <-timeoutTrigger:
   480  			// Clean up any expired retrievals and avoid re-requesting them from the
   481  			// same peer (either overloaded or malicious, useless in both cases). We
   482  			// could also penalize (Drop), but there's nothing to gain, and if could
   483  			// possibly further increase the load on it.
   484  			for peer, req := range f.requests {
   485  				if time.Duration(f.clock.Now()-req.time)+txGatherSlack > txFetchTimeout {
   486  					txRequestTimeoutMeter.Mark(int64(len(req.hashes)))
   487  
   488  					// Reschedule all the not-yet-delivered fetches to alternate peers
   489  					for _, hash := range req.hashes {
   490  						// Skip rescheduling hashes already delivered by someone else
   491  						if req.stolen != nil {
   492  							if _, ok := req.stolen[hash]; ok {
   493  								continue
   494  							}
   495  						}
   496  						// Move the delivery back from fetching to queued
   497  						if _, ok := f.announced[hash]; ok {
   498  							panic("announced tracker already contains alternate item")
   499  						}
   500  						if f.alternates[hash] != nil { // nil if tx was broadcast during fetch
   501  							f.announced[hash] = f.alternates[hash]
   502  						}
   503  						delete(f.announced[hash], peer)
   504  						if len(f.announced[hash]) == 0 {
   505  							delete(f.announced, hash)
   506  						}
   507  						delete(f.announces[peer], hash)
   508  						delete(f.alternates, hash)
   509  						delete(f.fetching, hash)
   510  					}
   511  					if len(f.announces[peer]) == 0 {
   512  						delete(f.announces, peer)
   513  					}
   514  					// Keep track of the request as dangling, but never expire
   515  					f.requests[peer].hashes = nil
   516  				}
   517  			}
   518  			// Schedule a new transaction retrieval
   519  			f.scheduleFetches(timeoutTimer, timeoutTrigger, nil)
   520  
   521  			// No idea if we scheduled something or not, trigger the timer if needed
   522  			// TODO: can't we dump it into scheduleFetches somehow?
   523  			f.rescheduleTimeout(timeoutTimer, timeoutTrigger)
   524  
   525  		case delivery := <-f.cleanup:
   526  			// Independent if the delivery was direct or broadcast, remove all
   527  			// traces of the hash from internal trackers
   528  			for _, hash := range delivery.hashes {
   529  				if _, ok := f.waitlist[hash]; ok {
   530  					for peer, txset := range f.waitslots {
   531  						delete(txset, hash)
   532  						if len(txset) == 0 {
   533  							delete(f.waitslots, peer)
   534  						}
   535  					}
   536  					delete(f.waitlist, hash)
   537  					delete(f.waittime, hash)
   538  				} else {
   539  					for peer, txset := range f.announces {
   540  						delete(txset, hash)
   541  						if len(txset) == 0 {
   542  							delete(f.announces, peer)
   543  						}
   544  					}
   545  					delete(f.announced, hash)
   546  					delete(f.alternates, hash)
   547  
   548  					// If a transaction currently being fetched from a different
   549  					// origin was delivered (delivery stolen), mark it so the
   550  					// actual delivery won't double schedule it.
   551  					if origin, ok := f.fetching[hash]; ok && (origin != delivery.origin || !delivery.direct) {
   552  						stolen := f.requests[origin].stolen
   553  						if stolen == nil {
   554  							f.requests[origin].stolen = make(map[common.Hash]struct{})
   555  							stolen = f.requests[origin].stolen
   556  						}
   557  						stolen[hash] = struct{}{}
   558  					}
   559  					delete(f.fetching, hash)
   560  				}
   561  			}
   562  			// In case of a direct delivery, also reschedule anything missing
   563  			// from the original query
   564  			if delivery.direct {
   565  				// Mark the reqesting successful (independent of individual status)
   566  				txRequestDoneMeter.Mark(int64(len(delivery.hashes)))
   567  
   568  				// Make sure something was pending, nuke it
   569  				req := f.requests[delivery.origin]
   570  				if req == nil {
   571  					log.Warn("Unexpected transaction delivery", "peer", delivery.origin)
   572  					break
   573  				}
   574  				delete(f.requests, delivery.origin)
   575  
   576  				// Anything not delivered should be re-scheduled (with or without
   577  				// this peer, depending on the response cutoff)
   578  				delivered := make(map[common.Hash]struct{})
   579  				for _, hash := range delivery.hashes {
   580  					delivered[hash] = struct{}{}
   581  				}
   582  				cutoff := len(req.hashes) // If nothing is delivered, assume everything is missing, don't retry!!!
   583  				for i, hash := range req.hashes {
   584  					if _, ok := delivered[hash]; ok {
   585  						cutoff = i
   586  					}
   587  				}
   588  				// Reschedule missing hashes from alternates, not-fulfilled from alt+self
   589  				for i, hash := range req.hashes {
   590  					// Skip rescheduling hashes already delivered by someone else
   591  					if req.stolen != nil {
   592  						if _, ok := req.stolen[hash]; ok {
   593  							continue
   594  						}
   595  					}
   596  					if _, ok := delivered[hash]; !ok {
   597  						if i < cutoff {
   598  							delete(f.alternates[hash], delivery.origin)
   599  							delete(f.announces[delivery.origin], hash)
   600  							if len(f.announces[delivery.origin]) == 0 {
   601  								delete(f.announces, delivery.origin)
   602  							}
   603  						}
   604  						if len(f.alternates[hash]) > 0 {
   605  							if _, ok := f.announced[hash]; ok {
   606  								panic(fmt.Sprintf("announced tracker already contains alternate item: %v", f.announced[hash]))
   607  							}
   608  							f.announced[hash] = f.alternates[hash]
   609  						}
   610  					}
   611  					delete(f.alternates, hash)
   612  					delete(f.fetching, hash)
   613  				}
   614  				// Something was delivered, try to rechedule requests
   615  				f.scheduleFetches(timeoutTimer, timeoutTrigger, nil) // Partial delivery may enable others to deliver too
   616  			}
   617  
   618  		case drop := <-f.drop:
   619  			// A peer was dropped, remove all traces of it
   620  			if _, ok := f.waitslots[drop.peer]; ok {
   621  				for hash := range f.waitslots[drop.peer] {
   622  					delete(f.waitlist[hash], drop.peer)
   623  					if len(f.waitlist[hash]) == 0 {
   624  						delete(f.waitlist, hash)
   625  						delete(f.waittime, hash)
   626  					}
   627  				}
   628  				delete(f.waitslots, drop.peer)
   629  				if len(f.waitlist) > 0 {
   630  					f.rescheduleWait(waitTimer, waitTrigger)
   631  				}
   632  			}
   633  			// Clean up any active requests
   634  			var request *txRequest
   635  			if request = f.requests[drop.peer]; request != nil {
   636  				for _, hash := range request.hashes {
   637  					// Skip rescheduling hashes already delivered by someone else
   638  					if request.stolen != nil {
   639  						if _, ok := request.stolen[hash]; ok {
   640  							continue
   641  						}
   642  					}
   643  					// Undelivered hash, reschedule if there's an alternative origin available
   644  					delete(f.alternates[hash], drop.peer)
   645  					if len(f.alternates[hash]) == 0 {
   646  						delete(f.alternates, hash)
   647  					} else {
   648  						f.announced[hash] = f.alternates[hash]
   649  						delete(f.alternates, hash)
   650  					}
   651  					delete(f.fetching, hash)
   652  				}
   653  				delete(f.requests, drop.peer)
   654  			}
   655  			// Clean up general announcement tracking
   656  			if _, ok := f.announces[drop.peer]; ok {
   657  				for hash := range f.announces[drop.peer] {
   658  					delete(f.announced[hash], drop.peer)
   659  					if len(f.announced[hash]) == 0 {
   660  						delete(f.announced, hash)
   661  					}
   662  				}
   663  				delete(f.announces, drop.peer)
   664  			}
   665  			// If a request was cancelled, check if anything needs to be rescheduled
   666  			if request != nil {
   667  				f.scheduleFetches(timeoutTimer, timeoutTrigger, nil)
   668  				f.rescheduleTimeout(timeoutTimer, timeoutTrigger)
   669  			}
   670  
   671  		case <-f.quit:
   672  			return
   673  		}
   674  		// No idea what happened, but bump some sanity metrics
   675  		txFetcherWaitingPeers.Update(int64(len(f.waitslots)))
   676  		txFetcherWaitingHashes.Update(int64(len(f.waitlist)))
   677  		txFetcherQueueingPeers.Update(int64(len(f.announces) - len(f.requests)))
   678  		txFetcherQueueingHashes.Update(int64(len(f.announced)))
   679  		txFetcherFetchingPeers.Update(int64(len(f.requests)))
   680  		txFetcherFetchingHashes.Update(int64(len(f.fetching)))
   681  
   682  		// Loop did something, ping the step notifier if needed (tests)
   683  		if f.step != nil {
   684  			f.step <- struct{}{}
   685  		}
   686  	}
   687  }
   688  
   689  // rescheduleWait iterates over all the transactions currently in the waitlist
   690  // and schedules the movement into the fetcher for the earliest.
   691  //
   692  // The method has a granularity of 'gatherSlack', since there's not much point in
   693  // spinning over all the transactions just to maybe find one that should trigger
   694  // a few ms earlier.
   695  func (f *TxFetcher) rescheduleWait(timer *mclock.Timer, trigger chan struct{}) {
   696  	if *timer != nil {
   697  		(*timer).Stop()
   698  	}
   699  	now := f.clock.Now()
   700  
   701  	earliest := now
   702  	for _, instance := range f.waittime {
   703  		if earliest > instance {
   704  			earliest = instance
   705  			if txArriveTimeout-time.Duration(now-earliest) < gatherSlack {
   706  				break
   707  			}
   708  		}
   709  	}
   710  	*timer = f.clock.AfterFunc(txArriveTimeout-time.Duration(now-earliest), func() {
   711  		trigger <- struct{}{}
   712  	})
   713  }
   714  
   715  // rescheduleTimeout iterates over all the transactions currently in flight and
   716  // schedules a cleanup run when the first would trigger.
   717  //
   718  // The method has a granularity of 'gatherSlack', since there's not much point in
   719  // spinning over all the transactions just to maybe find one that should trigger
   720  // a few ms earlier.
   721  //
   722  // This method is a bit "flaky" "by design". In theory the timeout timer only ever
   723  // should be rescheduled if some request is pending. In practice, a timeout will
   724  // cause the timer to be rescheduled every 5 secs (until the peer comes through or
   725  // disconnects). This is a limitation of the fetcher code because we don't trac
   726  // pending requests and timed out requests separatey. Without double tracking, if
   727  // we simply didn't reschedule the timer on all-timeout then the timer would never
   728  // be set again since len(request) > 0 => something's running.
   729  func (f *TxFetcher) rescheduleTimeout(timer *mclock.Timer, trigger chan struct{}) {
   730  	if *timer != nil {
   731  		(*timer).Stop()
   732  	}
   733  	now := f.clock.Now()
   734  
   735  	earliest := now
   736  	for _, req := range f.requests {
   737  		// If this request already timed out, skip it altogether
   738  		if req.hashes == nil {
   739  			continue
   740  		}
   741  		if earliest > req.time {
   742  			earliest = req.time
   743  			if txFetchTimeout-time.Duration(now-earliest) < gatherSlack {
   744  				break
   745  			}
   746  		}
   747  	}
   748  	*timer = f.clock.AfterFunc(txFetchTimeout-time.Duration(now-earliest), func() {
   749  		trigger <- struct{}{}
   750  	})
   751  }
   752  
   753  // scheduleFetches starts a batch of retrievals for all available idle peers.
   754  func (f *TxFetcher) scheduleFetches(timer *mclock.Timer, timeout chan struct{}, whitelist map[string]struct{}) {
   755  	// Gather the set of peers we want to retrieve from (default to all)
   756  	actives := whitelist
   757  	if actives == nil {
   758  		actives = make(map[string]struct{})
   759  		for peer := range f.announces {
   760  			actives[peer] = struct{}{}
   761  		}
   762  	}
   763  	if len(actives) == 0 {
   764  		return
   765  	}
   766  	// For each active peer, try to schedule some transaction fetches
   767  	idle := len(f.requests) == 0
   768  
   769  	f.forEachPeer(actives, func(peer string) {
   770  		if f.requests[peer] != nil {
   771  			return // continue in the for-each
   772  		}
   773  		if len(f.announces[peer]) == 0 {
   774  			return // continue in the for-each
   775  		}
   776  		hashes := make([]common.Hash, 0, maxTxRetrievals)
   777  		f.forEachHash(f.announces[peer], func(hash common.Hash) bool {
   778  			if _, ok := f.fetching[hash]; !ok {
   779  				// Mark the hash as fetching and stash away possible alternates
   780  				f.fetching[hash] = peer
   781  
   782  				if _, ok := f.alternates[hash]; ok {
   783  					panic(fmt.Sprintf("alternate tracker already contains fetching item: %v", f.alternates[hash]))
   784  				}
   785  				f.alternates[hash] = f.announced[hash]
   786  				delete(f.announced, hash)
   787  
   788  				// Accumulate the hash and stop if the limit was reached
   789  				hashes = append(hashes, hash)
   790  				if len(hashes) >= maxTxRetrievals {
   791  					return false // break in the for-each
   792  				}
   793  			}
   794  			return true // continue in the for-each
   795  		})
   796  		// If any hashes were allocated, request them from the peer
   797  		if len(hashes) > 0 {
   798  			f.requests[peer] = &txRequest{hashes: hashes, time: f.clock.Now()}
   799  			txRequestOutMeter.Mark(int64(len(hashes)))
   800  
   801  			go func(peer string, hashes []common.Hash) {
   802  				// Try to fetch the transactions, but in case of a request
   803  				// failure (e.g. peer disconnected), reschedule the hashes.
   804  				if err := f.fetchTxs(peer, hashes); err != nil {
   805  					txRequestFailMeter.Mark(int64(len(hashes)))
   806  					f.Drop(peer)
   807  				}
   808  			}(peer, hashes)
   809  		}
   810  	})
   811  	// If a new request was fired, schedule a timeout timer
   812  	if idle && len(f.requests) > 0 {
   813  		f.rescheduleTimeout(timer, timeout)
   814  	}
   815  }
   816  
   817  // forEachPeer does a range loop over a map of peers in production, but during
   818  // testing it does a deterministic sorted random to allow reproducing issues.
   819  func (f *TxFetcher) forEachPeer(peers map[string]struct{}, do func(peer string)) {
   820  	// If we're running production, use whatever Go's map gives us
   821  	if f.rand == nil {
   822  		for peer := range peers {
   823  			do(peer)
   824  		}
   825  		return
   826  	}
   827  	// We're running the test suite, make iteration deterministic
   828  	list := make([]string, 0, len(peers))
   829  	for peer := range peers {
   830  		list = append(list, peer)
   831  	}
   832  	sort.Strings(list)
   833  	rotateStrings(list, f.rand.Intn(len(list)))
   834  	for _, peer := range list {
   835  		do(peer)
   836  	}
   837  }
   838  
   839  // forEachHash does a range loop over a map of hashes in production, but during
   840  // testing it does a deterministic sorted random to allow reproducing issues.
   841  func (f *TxFetcher) forEachHash(hashes map[common.Hash]struct{}, do func(hash common.Hash) bool) {
   842  	// If we're running production, use whatever Go's map gives us
   843  	if f.rand == nil {
   844  		for hash := range hashes {
   845  			if !do(hash) {
   846  				return
   847  			}
   848  		}
   849  		return
   850  	}
   851  	// We're running the test suite, make iteration deterministic
   852  	list := make([]common.Hash, 0, len(hashes))
   853  	for hash := range hashes {
   854  		list = append(list, hash)
   855  	}
   856  	sortHashes(list)
   857  	rotateHashes(list, f.rand.Intn(len(list)))
   858  	for _, hash := range list {
   859  		if !do(hash) {
   860  			return
   861  		}
   862  	}
   863  }
   864  
   865  // rotateStrings rotates the contents of a slice by n steps. This method is only
   866  // used in tests to simulate random map iteration but keep it deterministic.
   867  func rotateStrings(slice []string, n int) {
   868  	orig := make([]string, len(slice))
   869  	copy(orig, slice)
   870  
   871  	for i := 0; i < len(orig); i++ {
   872  		slice[i] = orig[(i+n)%len(orig)]
   873  	}
   874  }
   875  
   876  // sortHashes sorts a slice of hashes. This method is only used in tests in order
   877  // to simulate random map iteration but keep it deterministic.
   878  func sortHashes(slice []common.Hash) {
   879  	for i := 0; i < len(slice); i++ {
   880  		for j := i + 1; j < len(slice); j++ {
   881  			if bytes.Compare(slice[i][:], slice[j][:]) > 0 {
   882  				slice[i], slice[j] = slice[j], slice[i]
   883  			}
   884  		}
   885  	}
   886  }
   887  
   888  // rotateHashes rotates the contents of a slice by n steps. This method is only
   889  // used in tests to simulate random map iteration but keep it deterministic.
   890  func rotateHashes(slice []common.Hash, n int) {
   891  	orig := make([]common.Hash, len(slice))
   892  	copy(orig, slice)
   893  
   894  	for i := 0; i < len(orig); i++ {
   895  		slice[i] = orig[(i+n)%len(orig)]
   896  	}
   897  }