github.com/klaytn/klaytn@v1.10.2/networks/p2p/discover/table.go (about)

     1  // Modifications Copyright 2018 The klaytn Authors
     2  // Copyright 2015 The go-ethereum Authors
     3  // This file is part of the go-ethereum library.
     4  //
     5  // The go-ethereum library is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Lesser General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // The go-ethereum library is distributed in the hope that it will be useful,
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    13  // GNU Lesser General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Lesser General Public License
    16  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    17  //
    18  // This file is derived from p2p/discover/table.go (2018/06/04).
    19  // Modified and improved for the klaytn development.
    20  
    21  package discover
    22  
    23  import (
    24  	crand "crypto/rand"
    25  	"encoding/binary"
    26  	"errors"
    27  	"fmt"
    28  	mrand "math/rand"
    29  	"net"
    30  	"sort"
    31  	"sync"
    32  	"time"
    33  
    34  	"github.com/klaytn/klaytn/common"
    35  	"github.com/klaytn/klaytn/crypto"
    36  	"github.com/klaytn/klaytn/log"
    37  	"github.com/klaytn/klaytn/networks/p2p/netutil"
    38  )
    39  
    40  const (
    41  	alpha           = 3  // Kademlia concurrency factor
    42  	bucketSize      = 16 // Kademlia bucket size
    43  	maxReplacements = 10 // Size of per-bucket replacement list
    44  
    45  	maxBondingPingPongs = 16 // Limit on the number of concurrent ping/pong interactions
    46  	maxFindnodeFailures = 5  // Nodes exceeding this limit are dropped
    47  
    48  	refreshInterval    = 30 * time.Minute
    49  	revalidateInterval = 10 * time.Second
    50  	copyNodesInterval  = 30 * time.Second
    51  
    52  	seedCount  = 30
    53  	seedMaxAge = 5 * 24 * time.Hour
    54  )
    55  
    56  type DiscoveryType uint8
    57  
    58  type Discovery interface {
    59  	Self() *Node
    60  	Close()
    61  	Resolve(target NodeID, targetType NodeType) *Node
    62  	Lookup(target NodeID, targetType NodeType) []*Node
    63  	GetNodes(targetType NodeType, max int) []*Node
    64  	ReadRandomNodes([]*Node, NodeType) int
    65  	RetrieveNodes(target common.Hash, nType NodeType, nresults int) []*Node // replace of closest():Table
    66  
    67  	HasBond(id NodeID) bool
    68  	Bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16, nType NodeType) (*Node, error)
    69  	IsAuthorized(fromID NodeID, nType NodeType) bool
    70  
    71  	// interfaces for API
    72  	Name() string
    73  	CreateUpdateNodeOnDB(n *Node) error
    74  	CreateUpdateNodeOnTable(n *Node) error
    75  	GetNodeFromDB(id NodeID) (*Node, error)
    76  	DeleteNodeFromDB(n *Node) error
    77  	DeleteNodeFromTable(n *Node) error
    78  	GetBucketEntries() []*Node
    79  	GetReplacements() []*Node
    80  
    81  	GetAuthorizedNodes() []*Node
    82  	PutAuthorizedNodes(nodes []*Node)
    83  	DeleteAuthorizedNodes(nodes []*Node)
    84  }
    85  
    86  type Table struct {
    87  	nursery []*Node     // bootstrap nodes
    88  	rand    *mrand.Rand // source of randomness, periodically reseeded
    89  	randMu  sync.Mutex
    90  	ips     netutil.DistinctNetSet
    91  
    92  	db         *nodeDB // database of known nodes
    93  	refreshReq chan chan struct{}
    94  	initDone   chan struct{}
    95  	closeReq   chan struct{}
    96  	closed     chan struct{}
    97  
    98  	bondmu    sync.Mutex
    99  	bonding   map[NodeID]*bondproc
   100  	bondslots chan struct{} // limits total number of active bonding processes
   101  
   102  	nodeAddedHook func(*Node) // for testing
   103  
   104  	net  transport
   105  	self *Node // metadata of the local node
   106  
   107  	storages   map[NodeType]discoverStorage
   108  	storagesMu sync.RWMutex
   109  
   110  	localLogger log.Logger
   111  }
   112  
   113  type bondproc struct {
   114  	err  error
   115  	n    *Node
   116  	done chan struct{}
   117  }
   118  
   119  // transport is implemented by the UDP transport.
   120  // it is an interface so we can test without opening lots of UDP
   121  // sockets and without generating a private key.
   122  type transport interface {
   123  	ping(toid NodeID, toaddr *net.UDPAddr) error
   124  	waitping(NodeID) error
   125  	findnode(toid NodeID, toaddr *net.UDPAddr, target NodeID, targetNT NodeType, max int) ([]*Node, error)
   126  	close()
   127  }
   128  
   129  func newTable(cfg *Config) (Discovery, error) {
   130  	// If no node database was given, use an in-memory one
   131  	db, err := newNodeDB(cfg.NodeDBPath, Version, cfg.Id)
   132  	if err != nil {
   133  		return nil, err
   134  	}
   135  
   136  	tab := &Table{
   137  		net:         cfg.udp,
   138  		db:          db,
   139  		self:        NewNode(cfg.Id, cfg.Addr.IP, uint16(cfg.Addr.Port), uint16(cfg.Addr.Port), nil, cfg.NodeType),
   140  		bonding:     make(map[NodeID]*bondproc),
   141  		bondslots:   make(chan struct{}, maxBondingPingPongs),
   142  		refreshReq:  make(chan chan struct{}),
   143  		initDone:    make(chan struct{}),
   144  		closeReq:    make(chan struct{}),
   145  		closed:      make(chan struct{}),
   146  		rand:        mrand.New(mrand.NewSource(0)),
   147  		storages:    make(map[NodeType]discoverStorage),
   148  		localLogger: logger.NewWith("Discover", "Table"),
   149  	}
   150  
   151  	switch cfg.NodeType {
   152  	case NodeTypeCN:
   153  		tab.addStorage(NodeTypeCN, &simpleStorage{targetType: NodeTypeCN, noDiscover: true, max: 100})
   154  		tab.addStorage(NodeTypeBN, &simpleStorage{targetType: NodeTypeBN, noDiscover: true, max: 3})
   155  	case NodeTypePN:
   156  		tab.addStorage(NodeTypePN, &simpleStorage{targetType: NodeTypePN, noDiscover: true, max: 1})
   157  		tab.addStorage(NodeTypeEN, &KademliaStorage{targetType: NodeTypeEN, noDiscover: true})
   158  		tab.addStorage(NodeTypeBN, &simpleStorage{targetType: NodeTypeBN, noDiscover: true, max: 3})
   159  	case NodeTypeEN:
   160  		tab.addStorage(NodeTypePN, &simpleStorage{targetType: NodeTypePN, noDiscover: true, max: 2})
   161  		tab.addStorage(NodeTypeEN, &KademliaStorage{targetType: NodeTypeEN})
   162  		tab.addStorage(NodeTypeBN, &simpleStorage{targetType: NodeTypeBN, noDiscover: true, max: 3})
   163  	case NodeTypeBN:
   164  		tab.addStorage(NodeTypeCN, NewSimpleStorage(NodeTypeCN, true, 100, cfg.AuthorizedNodes))
   165  		tab.addStorage(NodeTypePN, NewSimpleStorage(NodeTypePN, true, 100, cfg.AuthorizedNodes))
   166  		tab.addStorage(NodeTypeEN, &KademliaStorage{targetType: NodeTypeEN, noDiscover: true})
   167  		tab.addStorage(NodeTypeBN, &simpleStorage{targetType: NodeTypeBN, max: 3})
   168  	}
   169  
   170  	if err := tab.setFallbackNodes(cfg.Bootnodes); err != nil {
   171  		return nil, err
   172  	}
   173  	for i := 0; i < cap(tab.bondslots); i++ {
   174  		tab.bondslots <- struct{}{}
   175  	}
   176  
   177  	tab.seedRand()
   178  	tab.loadSeedNodes(false)
   179  	// Start the background expiration goroutine after loading seeds so that the search for
   180  	// seed nodes also considers older nodes that would otherwise be removed by the
   181  	// expiration.
   182  	tab.db.ensureExpirer()
   183  	tab.localLogger.Debug("new "+tab.Name()+" created", "err", nil)
   184  	return tab, nil
   185  }
   186  
   187  func (tab *Table) IsAuthorized(fromID NodeID, nType NodeType) bool {
   188  	tab.storagesMu.RLock()
   189  	defer tab.storagesMu.RUnlock()
   190  	if tab.storages[nType] != nil {
   191  		return tab.storages[nType].isAuthorized(fromID)
   192  	}
   193  	return true
   194  }
   195  
   196  // setFallbackNodes sets the initial points of contact. These nodes
   197  // are used to connect to the network if the table is empty and there
   198  // are no known nodes in the database.
   199  func (tab *Table) setFallbackNodes(nodes []*Node) error {
   200  	for _, n := range nodes {
   201  		if err := n.validateComplete(); err != nil {
   202  			return fmt.Errorf("bad bootstrap/fallback node %q (%v)", n, err)
   203  		}
   204  	}
   205  	tab.nursery = make([]*Node, 0, len(nodes))
   206  	for _, n := range nodes {
   207  		cpy := *n
   208  		// Recompute cpy.sha because the node might not have been
   209  		// created by NewNode or ParseNode.
   210  		cpy.sha = crypto.Keccak256Hash(n.ID[:])
   211  		tab.nursery = append(tab.nursery, &cpy)
   212  	}
   213  	return nil
   214  }
   215  
   216  func (tab *Table) findNewNode(seeds *nodesByDistance, targetID NodeID, targetNT NodeType, recursiveFind bool, max int) []*Node {
   217  	var (
   218  		asked          = make(map[NodeID]bool)
   219  		seen           = make(map[NodeID]bool)
   220  		reply          = make(chan []*Node, alpha)
   221  		pendingQueries = 0
   222  	)
   223  
   224  	// don't query further if we hit ourself.
   225  	// unlikely to happen often in practice.
   226  	asked[tab.self.ID] = true
   227  	for _, e := range seeds.entries {
   228  		seen[e.ID] = true
   229  	}
   230  
   231  	for {
   232  		// ask the alpha closest nodes that we haven't asked yet
   233  		for i := 0; i < len(seeds.entries) && pendingQueries < alpha; i++ {
   234  			n := seeds.entries[i]
   235  			if !asked[n.ID] {
   236  				asked[n.ID] = true
   237  				pendingQueries++
   238  				go func() {
   239  					// Find potential neighbors to bond with
   240  					r, err := tab.net.findnode(n.ID, n.addr(), targetID, targetNT, max)
   241  					if err != nil {
   242  						// Bump the failure counter to detect and evacuate non-bonded entries
   243  						fails := tab.db.findFails(n.ID) + 1
   244  						tab.db.updateFindFails(n.ID, fails)
   245  						tab.localLogger.Trace("Bumping findnode failure counter", "id", n.ID, "failcount", fails)
   246  
   247  						if fails >= maxFindnodeFailures {
   248  							tab.localLogger.Trace("Too many findnode failures, dropping", "id", n.ID, "failcount", fails)
   249  							tab.delete(n)
   250  						}
   251  					}
   252  					if targetNT != NodeTypeBN {
   253  						r = removeBn(r)
   254  					}
   255  					reply <- tab.bondall(r)
   256  				}()
   257  			}
   258  		}
   259  		if pendingQueries == 0 {
   260  			// we have asked all closest nodes, stop the search
   261  			break
   262  		}
   263  
   264  		if recursiveFind {
   265  			// wait for the next reply
   266  			for _, n := range <-reply {
   267  				if n != nil && !seen[n.ID] {
   268  					seen[n.ID] = true
   269  					seeds.push(n, max)
   270  				}
   271  			}
   272  			pendingQueries--
   273  		} else {
   274  			for i := 0; i < pendingQueries; i++ {
   275  				for _, n := range <-reply {
   276  					if n != nil && !seen[n.ID] {
   277  						seen[n.ID] = true
   278  						seeds.push(n, max)
   279  					}
   280  				}
   281  			}
   282  			break
   283  		}
   284  	}
   285  	if targetNT != NodeTypeBN {
   286  		seeds.entries = removeBn(seeds.entries)
   287  	}
   288  	tab.localLogger.Debug("findNewNode: found nodes", "length", len(seeds.entries), "nodeType", targetNT)
   289  	return seeds.entries
   290  }
   291  
   292  func (tab *Table) addStorage(nType NodeType, s discoverStorage) {
   293  	tab.storagesMu.Lock()
   294  	defer tab.storagesMu.Unlock()
   295  	s.setTable(tab)
   296  	tab.storages[nType] = s
   297  	s.init()
   298  }
   299  
   300  func (tab *Table) seedRand() {
   301  	var b [8]byte
   302  	crand.Read(b[:])
   303  
   304  	// tab.mutex.Lock()
   305  	tab.randMu.Lock()
   306  	tab.rand.Seed(int64(binary.BigEndian.Uint64(b[:])))
   307  	tab.randMu.Unlock()
   308  	// tab.mutex.Unlock()
   309  }
   310  
   311  // Self returns the local node.
   312  // The returned node should not be modified by the caller.
   313  func (tab *Table) Self() *Node {
   314  	return tab.self
   315  }
   316  
   317  // ReadRandomNodes fills the given slice with random nodes from the
   318  // table. It will not write the same node more than once. The nodes in
   319  // the slice are copies and can be modified by the caller.
   320  func (tab *Table) ReadRandomNodes(buf []*Node, nType NodeType) (n int) {
   321  	if !tab.isInitDone() {
   322  		return 0
   323  	}
   324  
   325  	tab.storagesMu.RLock()
   326  	defer tab.storagesMu.RUnlock()
   327  	if tab.storages[nType] == nil {
   328  		tab.localLogger.Warn("ReadRandomNodes: Not Supported NodeType", "NodeType", nType)
   329  		return 0
   330  	}
   331  
   332  	return tab.storages[nType].readRandomNodes(buf)
   333  }
   334  
   335  // Close terminates the network listener and flushes the node database.
   336  func (tab *Table) Close() {
   337  	select {
   338  	case <-tab.closed:
   339  		// already closed.
   340  	case tab.closeReq <- struct{}{}:
   341  		<-tab.closed // wait for refreshLoop to end.
   342  	}
   343  }
   344  
   345  // isInitDone returns whether the table's initial seeding procedure has completed.
   346  func (tab *Table) isInitDone() bool {
   347  	select {
   348  	case <-tab.initDone:
   349  		return true
   350  	default:
   351  		return false
   352  	}
   353  }
   354  
   355  // Resolve searches for a specific node with the given ID.
   356  // It returns nil if the node could not be found.
   357  func (tab *Table) Resolve(targetID NodeID, targetType NodeType) *Node {
   358  	// If the node is present in the local table, no
   359  	// network interaction is required.
   360  	hash := crypto.Keccak256Hash(targetID[:])
   361  	cl := tab.closest(hash, targetType, 1)
   362  	if len(cl.entries) > 0 && cl.entries[0].ID == targetID {
   363  		return cl.entries[0]
   364  	}
   365  	// Otherwise, do a network lookup.
   366  	result := tab.Lookup(targetID, targetType)
   367  	for _, n := range result {
   368  		if n.ID == targetID {
   369  			return n
   370  		}
   371  	}
   372  	return nil
   373  }
   374  
   375  // Lookup performs a network search for nodes close
   376  // to the given target. It approaches the target by querying
   377  // nodes that are closer to it on each iteration.
   378  // The given target does not need to be an actual node
   379  // identifier.
   380  func (tab *Table) Lookup(targetID NodeID, targetType NodeType) []*Node {
   381  	return tab.lookup(targetID, true, targetType)
   382  }
   383  
   384  func (tab *Table) lookup(targetID NodeID, refreshIfEmpty bool, targetNT NodeType) []*Node {
   385  	tab.storagesMu.RLock()
   386  	defer tab.storagesMu.RUnlock()
   387  
   388  	if tab.storages[targetNT] == nil {
   389  		tab.localLogger.Warn("lookup: Not Supported NodeType", "NodeType", targetNT)
   390  		return []*Node{}
   391  	}
   392  	return tab.storages[targetNT].lookup(targetID, refreshIfEmpty, targetNT)
   393  }
   394  
   395  func (tab *Table) GetNodes(targetNT NodeType, max int) []*Node {
   396  	tab.storagesMu.RLock()
   397  	defer tab.storagesMu.RUnlock()
   398  
   399  	if tab.storages[targetNT] == nil {
   400  		tab.localLogger.Warn("getNodes: Not Supported NodeType", "NodeType", targetNT)
   401  		return []*Node{}
   402  	}
   403  	return tab.storages[targetNT].getNodes(max)
   404  }
   405  
   406  func removeBn(nodes []*Node) []*Node {
   407  	tmp := nodes[:0]
   408  	for _, n := range nodes {
   409  		if n.NType != NodeTypeBN {
   410  			tmp = append(tmp, n)
   411  		}
   412  	}
   413  	return tmp
   414  }
   415  
   416  func (tab *Table) refresh() <-chan struct{} {
   417  	done := make(chan struct{})
   418  	select {
   419  	case tab.refreshReq <- done:
   420  	case <-tab.closed:
   421  		close(done)
   422  	}
   423  	return done
   424  }
   425  
   426  // loop schedules refresh, revalidate runs and coordinates shutdown.
   427  func (tab *Table) loop() {
   428  	var (
   429  		revalidate     = time.NewTimer(tab.nextRevalidateTime())
   430  		refresh        = time.NewTicker(refreshInterval)
   431  		copyNodes      = time.NewTicker(copyNodesInterval)
   432  		revalidateDone = make(chan struct{})
   433  		refreshDone    = make(chan struct{})           // where doRefresh reports completion
   434  		waiting        = []chan struct{}{tab.initDone} // holds waiting callers while doRefresh runs
   435  	)
   436  	defer refresh.Stop()
   437  	defer revalidate.Stop()
   438  	defer copyNodes.Stop()
   439  
   440  	// Start initial refresh.
   441  	go tab.doRefresh(refreshDone)
   442  
   443  loop:
   444  	for {
   445  		select {
   446  		case <-refresh.C:
   447  			tab.seedRand()
   448  			if refreshDone == nil {
   449  				refreshDone = make(chan struct{})
   450  				go tab.doRefresh(refreshDone)
   451  			}
   452  		case req := <-tab.refreshReq:
   453  			waiting = append(waiting, req)
   454  			if refreshDone == nil {
   455  				refreshDone = make(chan struct{})
   456  				go tab.doRefresh(refreshDone)
   457  			}
   458  		case <-refreshDone:
   459  			for _, ch := range waiting {
   460  				close(ch)
   461  			}
   462  			waiting, refreshDone = nil, nil
   463  		case <-revalidate.C:
   464  			go tab.doRevalidate(revalidateDone)
   465  		case <-revalidateDone:
   466  			tt := tab.nextRevalidateTime()
   467  			revalidate.Reset(tt)
   468  		case <-copyNodes.C:
   469  			go tab.copyBondedNodes()
   470  		case <-tab.closeReq:
   471  			break loop
   472  		}
   473  	}
   474  
   475  	if tab.net != nil {
   476  		tab.net.close()
   477  	}
   478  	if refreshDone != nil {
   479  		<-refreshDone
   480  	}
   481  	for _, ch := range waiting {
   482  		close(ch)
   483  	}
   484  	tab.db.close()
   485  	close(tab.closed)
   486  }
   487  
   488  // doRefresh performs a lookup for a random target to keep buckets
   489  // full. seed nodes are inserted if the table is empty (initial
   490  // bootstrap or discarded faulty peers).
   491  func (tab *Table) doRefresh(done chan struct{}) {
   492  	tab.localLogger.Trace("doRefresh()")
   493  	defer close(done)
   494  
   495  	// Load nodes from the database and insert
   496  	// them. This should yield a few previously seen nodes that are
   497  	// (hopefully) still alive.
   498  	tab.loadSeedNodes(true)
   499  
   500  	tab.storagesMu.RLock()
   501  	defer tab.storagesMu.RUnlock()
   502  	for _, ds := range tab.storages {
   503  		ds.doRefresh()
   504  	}
   505  }
   506  
   507  func (tab *Table) loadSeedNodes(bond bool) {
   508  	// TODO-Klaytn-Node Separate logic to storages.
   509  	seeds := tab.db.querySeeds(seedCount, seedMaxAge)
   510  	seeds = removeBn(seeds)
   511  	seeds = append(seeds, tab.nursery...)
   512  	if bond {
   513  		seeds = tab.bondall(seeds)
   514  	}
   515  	for i := range seeds {
   516  		seed := seeds[i]
   517  		age := log.Lazy{Fn: func() interface{} { return time.Since(tab.db.bondTime(seed.ID)) }}
   518  		tab.localLogger.Debug("Found seed node in database", "id", seed.ID, "addr", seed.addr(), "age", age)
   519  		tab.add(seed)
   520  	}
   521  }
   522  
   523  // doRevalidate checks that the last node in a random bucket is still live
   524  // and replaces or deletes the node if it isn't.
   525  func (tab *Table) doRevalidate(done chan<- struct{}) {
   526  	defer func() { done <- struct{}{} }()
   527  
   528  	tab.storagesMu.RLock()
   529  	defer tab.storagesMu.RUnlock()
   530  	for _, ds := range tab.storages {
   531  		ds.doRevalidate()
   532  	}
   533  }
   534  
   535  func (tab *Table) nextRevalidateTime() time.Duration {
   536  	tab.randMu.Lock()
   537  	defer tab.randMu.Unlock()
   538  
   539  	return time.Duration(tab.rand.Int63n(int64(revalidateInterval)))
   540  }
   541  
   542  // copyBondedNodes adds nodes from the table to the database if they have been in the table
   543  // longer then minTableTime.
   544  func (tab *Table) copyBondedNodes() {
   545  	tab.storagesMu.RLock()
   546  	defer tab.storagesMu.RUnlock()
   547  	for _, ds := range tab.storages {
   548  		ds.copyBondedNodes()
   549  	}
   550  }
   551  
   552  // closest returns the n nodes in the table that are closest to the
   553  // given id. The caller must hold tab.mutex.
   554  func (tab *Table) closest(target common.Hash, nType NodeType, nresults int) *nodesByDistance {
   555  	tab.storagesMu.RLock()
   556  	defer tab.storagesMu.RUnlock()
   557  
   558  	if tab.storages[nType] == nil {
   559  		tab.localLogger.Warn("closest(): Not Supported NodeType", "NodeType", nType)
   560  		return &nodesByDistance{}
   561  	}
   562  	return tab.storages[nType].closest(target, nresults)
   563  }
   564  
   565  // RetrieveNodes returns node list except bootnode. This method is used to make a result of FINDNODE request.
   566  func (tab *Table) RetrieveNodes(target common.Hash, nType NodeType, nresults int) []*Node {
   567  	tab.storagesMu.RLock()
   568  	defer tab.storagesMu.RUnlock()
   569  
   570  	if tab.storages[nType] == nil {
   571  		tab.localLogger.Warn("RetrieveNodes: Not Supported NodeType", "NodeType", nType)
   572  		return []*Node{}
   573  	}
   574  	nodes := tab.storages[nType].closest(target, nresults).entries
   575  	if nType != NodeTypeBN {
   576  		nodes = removeBn(nodes)
   577  	}
   578  	return nodes
   579  }
   580  
   581  func (tab *Table) len() (n int) {
   582  	tab.storagesMu.RLock()
   583  	defer tab.storagesMu.RUnlock()
   584  
   585  	for _, ds := range tab.storages {
   586  		n += ds.len()
   587  	}
   588  	return n
   589  }
   590  
   591  func (tab *Table) nodes() (n []*Node) {
   592  	tab.storagesMu.RLock()
   593  	defer tab.storagesMu.RUnlock()
   594  
   595  	for _, ds := range tab.storages {
   596  		n = append(n, ds.nodeAll()...)
   597  	}
   598  	return n
   599  }
   600  
   601  // bondall bonds with all given nodes concurrently and returns
   602  // those nodes for which bonding has probably succeeded.
   603  func (tab *Table) bondall(nodes []*Node) (result []*Node) {
   604  	rc := make(chan *Node, len(nodes))
   605  	for i := range nodes {
   606  		go func(n *Node) {
   607  			nn, _ := tab.Bond(false, n.ID, n.addr(), n.TCP, n.NType)
   608  			rc <- nn
   609  		}(nodes[i])
   610  	}
   611  	for range nodes {
   612  		if n := <-rc; n != nil {
   613  			result = append(result, n)
   614  		}
   615  	}
   616  	return result
   617  }
   618  
   619  // Bond ensures the local node has a bond with the given remote node.
   620  // It also attempts to insert the node into the table if bonding succeeds.
   621  // The caller must not hold tab.mutex.
   622  //
   623  // A bond is must be established before sending findnode requests.
   624  // Both sides must have completed a ping/pong exchange for a bond to
   625  // exist. The total number of active bonding processes is limited in
   626  // order to restrain network use.
   627  //
   628  // bond is meant to operate idempotently in that bonding with a remote
   629  // node which still remembers a previously established bond will work.
   630  // The remote node will simply not send a ping back, causing waitping
   631  // to time out.
   632  //
   633  // If pinged is true, the remote node has just pinged us and one half
   634  // of the process can be skipped.
   635  func (tab *Table) Bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16, nType NodeType) (*Node, error) {
   636  	if id == tab.self.ID {
   637  		return nil, errors.New("is self")
   638  	}
   639  	if pinged && !tab.isInitDone() {
   640  		return nil, errors.New("still initializing")
   641  	}
   642  	// Start bonding if we haven't seen this node for a while or if it failed findnode too often.
   643  	node, fails := tab.db.node(id), tab.db.findFails(id)
   644  	age := time.Since(tab.db.bondTime(id))
   645  	var result error
   646  	// A Bootnode always add node(cn, pn, en) to table.
   647  	if fails > 0 || age > nodeDBNodeExpiration || (node == nil && tab.self.NType == NodeTypeBN) {
   648  		tab.localLogger.Trace("Bond - Starting bonding ping/pong", "id", id, "known", node != nil, "failcount", fails, "age", age)
   649  
   650  		tab.bondmu.Lock()
   651  		w := tab.bonding[id]
   652  		if w != nil {
   653  			// Wait for an existing bonding process to complete.
   654  			tab.bondmu.Unlock()
   655  			<-w.done
   656  		} else {
   657  			// Register a new bonding process.
   658  			w = &bondproc{done: make(chan struct{})}
   659  			tab.bonding[id] = w
   660  			tab.bondmu.Unlock()
   661  			// Do the ping/pong. The result goes into w.
   662  			tab.pingpong(w, pinged, id, addr, tcpPort, nType)
   663  			// Unregister the process after it's done.
   664  			tab.bondmu.Lock()
   665  			delete(tab.bonding, id)
   666  			tab.bondmu.Unlock()
   667  		}
   668  		// Retrieve the bonding results
   669  		result = w.err
   670  		tab.localLogger.Trace("Bond", "error", result)
   671  		if result == nil {
   672  			node = w.n
   673  		}
   674  	}
   675  	// Add the node to the table even if the bonding ping/pong
   676  	// fails. It will be replaced quickly if it continues to be
   677  	// unresponsive.
   678  	if node != nil {
   679  		tab.localLogger.Trace("Bond - Add", "id", node.ID, "type", node.NType, "sha", node.sha)
   680  		tab.add(node)
   681  		tab.db.updateFindFails(id, 0)
   682  		lenEntries := len(tab.GetBucketEntries())
   683  		lenReplacements := len(tab.GetReplacements())
   684  		bucketEntriesGauge.Update(int64(lenEntries))
   685  		bucketReplacementsGauge.Update(int64(lenReplacements))
   686  	}
   687  	return node, result
   688  }
   689  
   690  func (tab *Table) pingpong(w *bondproc, pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16, nType NodeType) {
   691  	// Request a bonding slot to limit network usage
   692  	<-tab.bondslots
   693  	defer func() { tab.bondslots <- struct{}{} }()
   694  
   695  	// Ping the remote side and wait for a pong.
   696  	if w.err = tab.ping(id, addr); w.err != nil {
   697  		close(w.done)
   698  		return
   699  	}
   700  	if !pinged {
   701  		// Give the remote node a chance to ping us before we start
   702  		// sending findnode requests. If they still remember us,
   703  		// waitping will simply time out.
   704  		tab.localLogger.Trace("pingpong-waitping", "to", id)
   705  		tab.net.waitping(id)
   706  	}
   707  	// Bonding succeeded, update the node database.
   708  	w.n = NewNode(id, addr.IP, uint16(addr.Port), tcpPort, nil, nType)
   709  	tab.localLogger.Trace("pingpong-success, make new node", "node", w.n)
   710  	close(w.done)
   711  }
   712  
   713  // ping a remote endpoint and wait for a reply, also updating the node
   714  // database accordingly.
   715  func (tab *Table) ping(id NodeID, addr *net.UDPAddr) error {
   716  	tab.localLogger.Trace("ping", "to", id)
   717  	tab.db.updateLastPing(id, time.Now())
   718  	if err := tab.net.ping(id, addr); err != nil {
   719  		return err
   720  	}
   721  	tab.db.updateBondTime(id, time.Now())
   722  	return nil
   723  }
   724  
   725  // bucket returns the bucket for the given node ID hash.
   726  // This method is for only unit tests.
   727  func (tab *Table) bucket(sha common.Hash, nType NodeType) *bucket {
   728  	tab.storagesMu.RLock()
   729  	defer tab.storagesMu.RUnlock()
   730  
   731  	if tab.storages[nType] == nil {
   732  		tab.localLogger.Warn("bucket(): Not Supported NodeType", "NodeType", nType)
   733  		return &bucket{}
   734  	}
   735  	if _, ok := tab.storages[nType].(*KademliaStorage); !ok {
   736  		tab.localLogger.Warn("bucket(): bucket() only allowed to use at KademliaStorage", "NodeType", nType)
   737  		return &bucket{}
   738  	}
   739  	ks := tab.storages[nType].(*KademliaStorage)
   740  
   741  	ks.bucketsMu.Lock()
   742  	defer ks.bucketsMu.Unlock()
   743  	return ks.bucket(sha)
   744  }
   745  
   746  // add attempts to add the given node its corresponding bucket. If the
   747  // bucket has space available, adding the node succeeds immediately.
   748  // Otherwise, the node is added if the least recently active node in
   749  // the bucket does not respond to a ping packet.
   750  //
   751  // The caller must not hold tab.mutex.
   752  func (tab *Table) add(new *Node) {
   753  	tab.localLogger.Trace("add(node)", "NodeType", new.NType, "node", new, "sha", new.sha)
   754  	tab.storagesMu.RLock()
   755  	defer tab.storagesMu.RUnlock()
   756  	if new.NType == NodeTypeBN {
   757  		for _, ds := range tab.storages {
   758  			ds.add(new)
   759  		}
   760  	} else {
   761  		if tab.storages[new.NType] == nil {
   762  			tab.localLogger.Warn("add(): Not Supported NodeType", "NodeType", new.NType)
   763  			return
   764  		}
   765  		tab.storages[new.NType].add(new)
   766  	}
   767  }
   768  
   769  // stuff adds nodes the table to the end of their corresponding bucket
   770  // if the bucket is not full.
   771  func (tab *Table) stuff(nodes []*Node, nType NodeType) {
   772  	tab.storagesMu.RLock()
   773  	defer tab.storagesMu.RUnlock()
   774  	if tab.storages[nType] == nil {
   775  		tab.localLogger.Warn("stuff(): Not Supported NodeType", "NodeType", nType)
   776  		return
   777  	}
   778  	tab.storages[nType].stuff(nodes)
   779  }
   780  
   781  // delete removes an entry from the node table (used to evacuate
   782  // failed/non-bonded discovery peers).
   783  func (tab *Table) delete(node *Node) {
   784  	tab.storagesMu.RLock()
   785  	defer tab.storagesMu.RUnlock()
   786  	for _, ds := range tab.storages {
   787  		ds.delete(node)
   788  	}
   789  }
   790  
   791  func (tab *Table) HasBond(id NodeID) bool {
   792  	return tab.db.hasBond(id)
   793  }
   794  
   795  // nodesByDistance is a list of nodes, ordered by
   796  // distance to target.
   797  type nodesByDistance struct {
   798  	entries []*Node
   799  	target  common.Hash
   800  }
   801  
   802  // push adds the given node to the list, keeping the total size below maxElems.
   803  func (h *nodesByDistance) push(n *Node, maxElems int) {
   804  	ix := sort.Search(len(h.entries), func(i int) bool {
   805  		return distcmp(h.target, h.entries[i].sha, n.sha) > 0
   806  	})
   807  	if len(h.entries) < maxElems {
   808  		h.entries = append(h.entries, n)
   809  	}
   810  	if ix == len(h.entries) {
   811  		// farther away than all nodes we already have.
   812  		// if there was room for it, the node is now the last element.
   813  	} else {
   814  		// slide existing entries down to make room
   815  		// this will overwrite the entry we just appended.
   816  		copy(h.entries[ix+1:], h.entries[ix:])
   817  		h.entries[ix] = n
   818  	}
   819  }
   820  
   821  func (h *nodesByDistance) String() string {
   822  	return fmt.Sprintf("nodeByDistance target: %s, entries: %s", h.target, h.entries)
   823  }