github.com/jeffallen/go-ethereum@v1.1.4-0.20150910155051-571d3236c49c/p2p/discover/table.go

github.com/jeffallen/go-ethereum@v1.1.4-0.20150910155051-571d3236c49c/p2p/discover/table.go (about)

     1  // Copyright 2015 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  // Package discover implements the Node Discovery Protocol.
    18  //
    19  // The Node Discovery protocol provides a way to find RLPx nodes that
    20  // can be connected to. It uses a Kademlia-like protocol to maintain a
    21  // distributed database of the IDs and endpoints of all listening
    22  // nodes.
    23  package discover
    24  
    25  import (
    26  	"crypto/rand"
    27  	"encoding/binary"
    28  	"net"
    29  	"sort"
    30  	"sync"
    31  	"time"
    32  
    33  	"github.com/ethereum/go-ethereum/common"
    34  	"github.com/ethereum/go-ethereum/crypto"
    35  	"github.com/ethereum/go-ethereum/logger"
    36  	"github.com/ethereum/go-ethereum/logger/glog"
    37  )
    38  
    39  const (
    40  	alpha      = 3  // Kademlia concurrency factor
    41  	bucketSize = 16 // Kademlia bucket size
    42  	hashBits   = len(common.Hash{}) * 8
    43  	nBuckets   = hashBits + 1 // Number of buckets
    44  
    45  	maxBondingPingPongs = 16
    46  	maxFindnodeFailures = 5
    47  )
    48  
    49  type Table struct {
    50  	mutex   sync.Mutex        // protects buckets, their content, and nursery
    51  	buckets [nBuckets]*bucket // index of known nodes by distance
    52  	nursery []*Node           // bootstrap nodes
    53  	db      *nodeDB           // database of known nodes
    54  
    55  	bondmu    sync.Mutex
    56  	bonding   map[NodeID]*bondproc
    57  	bondslots chan struct{} // limits total number of active bonding processes
    58  
    59  	nodeAddedHook func(*Node) // for testing
    60  
    61  	net  transport
    62  	self *Node // metadata of the local node
    63  }
    64  
    65  type bondproc struct {
    66  	err  error
    67  	n    *Node
    68  	done chan struct{}
    69  }
    70  
    71  // transport is implemented by the UDP transport.
    72  // it is an interface so we can test without opening lots of UDP
    73  // sockets and without generating a private key.
    74  type transport interface {
    75  	ping(NodeID, *net.UDPAddr) error
    76  	waitping(NodeID) error
    77  	findnode(toid NodeID, addr *net.UDPAddr, target NodeID) ([]*Node, error)
    78  	close()
    79  }
    80  
    81  // bucket contains nodes, ordered by their last activity. the entry
    82  // that was most recently active is the first element in entries.
    83  type bucket struct {
    84  	lastLookup time.Time
    85  	entries    []*Node
    86  }
    87  
    88  func newTable(t transport, ourID NodeID, ourAddr *net.UDPAddr, nodeDBPath string) *Table {
    89  	// If no node database was given, use an in-memory one
    90  	db, err := newNodeDB(nodeDBPath, Version, ourID)
    91  	if err != nil {
    92  		glog.V(logger.Warn).Infoln("Failed to open node database:", err)
    93  		db, _ = newNodeDB("", Version, ourID)
    94  	}
    95  	tab := &Table{
    96  		net:       t,
    97  		db:        db,
    98  		self:      newNode(ourID, ourAddr.IP, uint16(ourAddr.Port), uint16(ourAddr.Port)),
    99  		bonding:   make(map[NodeID]*bondproc),
   100  		bondslots: make(chan struct{}, maxBondingPingPongs),
   101  	}
   102  	for i := 0; i < cap(tab.bondslots); i++ {
   103  		tab.bondslots <- struct{}{}
   104  	}
   105  	for i := range tab.buckets {
   106  		tab.buckets[i] = new(bucket)
   107  	}
   108  	return tab
   109  }
   110  
   111  // Self returns the local node.
   112  // The returned node should not be modified by the caller.
   113  func (tab *Table) Self() *Node {
   114  	return tab.self
   115  }
   116  
   117  // ReadRandomNodes fills the given slice with random nodes from the
   118  // table. It will not write the same node more than once. The nodes in
   119  // the slice are copies and can be modified by the caller.
   120  func (tab *Table) ReadRandomNodes(buf []*Node) (n int) {
   121  	tab.mutex.Lock()
   122  	defer tab.mutex.Unlock()
   123  	// TODO: tree-based buckets would help here
   124  	// Find all non-empty buckets and get a fresh slice of their entries.
   125  	var buckets [][]*Node
   126  	for _, b := range tab.buckets {
   127  		if len(b.entries) > 0 {
   128  			buckets = append(buckets, b.entries[:])
   129  		}
   130  	}
   131  	if len(buckets) == 0 {
   132  		return 0
   133  	}
   134  	// Shuffle the buckets.
   135  	for i := uint32(len(buckets)) - 1; i > 0; i-- {
   136  		j := randUint(i)
   137  		buckets[i], buckets[j] = buckets[j], buckets[i]
   138  	}
   139  	// Move head of each bucket into buf, removing buckets that become empty.
   140  	var i, j int
   141  	for ; i < len(buf); i, j = i+1, (j+1)%len(buckets) {
   142  		b := buckets[j]
   143  		buf[i] = &(*b[0])
   144  		buckets[j] = b[1:]
   145  		if len(b) == 1 {
   146  			buckets = append(buckets[:j], buckets[j+1:]...)
   147  		}
   148  		if len(buckets) == 0 {
   149  			break
   150  		}
   151  	}
   152  	return i + 1
   153  }
   154  
   155  func randUint(max uint32) uint32 {
   156  	if max == 0 {
   157  		return 0
   158  	}
   159  	var b [4]byte
   160  	rand.Read(b[:])
   161  	return binary.BigEndian.Uint32(b[:]) % max
   162  }
   163  
   164  // Close terminates the network listener and flushes the node database.
   165  func (tab *Table) Close() {
   166  	if tab.net != nil {
   167  		tab.net.close()
   168  	}
   169  	tab.db.close()
   170  }
   171  
   172  // Bootstrap sets the bootstrap nodes. These nodes are used to connect
   173  // to the network if the table is empty. Bootstrap will also attempt to
   174  // fill the table by performing random lookup operations on the
   175  // network.
   176  func (tab *Table) Bootstrap(nodes []*Node) {
   177  	tab.mutex.Lock()
   178  	// TODO: maybe filter nodes with bad fields (nil, etc.) to avoid strange crashes
   179  	tab.nursery = make([]*Node, 0, len(nodes))
   180  	for _, n := range nodes {
   181  		cpy := *n
   182  		cpy.sha = crypto.Sha3Hash(n.ID[:])
   183  		tab.nursery = append(tab.nursery, &cpy)
   184  	}
   185  	tab.mutex.Unlock()
   186  	tab.refresh()
   187  }
   188  
   189  // Lookup performs a network search for nodes close
   190  // to the given target. It approaches the target by querying
   191  // nodes that are closer to it on each iteration.
   192  // The given target does not need to be an actual node
   193  // identifier.
   194  func (tab *Table) Lookup(targetID NodeID) []*Node {
   195  	var (
   196  		target         = crypto.Sha3Hash(targetID[:])
   197  		asked          = make(map[NodeID]bool)
   198  		seen           = make(map[NodeID]bool)
   199  		reply          = make(chan []*Node, alpha)
   200  		pendingQueries = 0
   201  	)
   202  	// don't query further if we hit ourself.
   203  	// unlikely to happen often in practice.
   204  	asked[tab.self.ID] = true
   205  
   206  	tab.mutex.Lock()
   207  	// update last lookup stamp (for refresh logic)
   208  	tab.buckets[logdist(tab.self.sha, target)].lastLookup = time.Now()
   209  	// generate initial result set
   210  	result := tab.closest(target, bucketSize)
   211  	tab.mutex.Unlock()
   212  
   213  	// If the result set is empty, all nodes were dropped, refresh
   214  	if len(result.entries) == 0 {
   215  		tab.refresh()
   216  		return nil
   217  	}
   218  
   219  	for {
   220  		// ask the alpha closest nodes that we haven't asked yet
   221  		for i := 0; i < len(result.entries) && pendingQueries < alpha; i++ {
   222  			n := result.entries[i]
   223  			if !asked[n.ID] {
   224  				asked[n.ID] = true
   225  				pendingQueries++
   226  				go func() {
   227  					// Find potential neighbors to bond with
   228  					r, err := tab.net.findnode(n.ID, n.addr(), targetID)
   229  					if err != nil {
   230  						// Bump the failure counter to detect and evacuate non-bonded entries
   231  						fails := tab.db.findFails(n.ID) + 1
   232  						tab.db.updateFindFails(n.ID, fails)
   233  						glog.V(logger.Detail).Infof("Bumping failures for %x: %d", n.ID[:8], fails)
   234  
   235  						if fails >= maxFindnodeFailures {
   236  							glog.V(logger.Detail).Infof("Evacuating node %x: %d findnode failures", n.ID[:8], fails)
   237  							tab.delete(n)
   238  						}
   239  					}
   240  					reply <- tab.bondall(r)
   241  				}()
   242  			}
   243  		}
   244  		if pendingQueries == 0 {
   245  			// we have asked all closest nodes, stop the search
   246  			break
   247  		}
   248  		// wait for the next reply
   249  		for _, n := range <-reply {
   250  			if n != nil && !seen[n.ID] {
   251  				seen[n.ID] = true
   252  				result.push(n, bucketSize)
   253  			}
   254  		}
   255  		pendingQueries--
   256  	}
   257  	return result.entries
   258  }
   259  
   260  // refresh performs a lookup for a random target to keep buckets full, or seeds
   261  // the table if it is empty (initial bootstrap or discarded faulty peers).
   262  func (tab *Table) refresh() {
   263  	seed := true
   264  
   265  	// If the discovery table is empty, seed with previously known nodes
   266  	tab.mutex.Lock()
   267  	for _, bucket := range tab.buckets {
   268  		if len(bucket.entries) > 0 {
   269  			seed = false
   270  			break
   271  		}
   272  	}
   273  	tab.mutex.Unlock()
   274  
   275  	// If the table is not empty, try to refresh using the live entries
   276  	if !seed {
   277  		// The Kademlia paper specifies that the bucket refresh should
   278  		// perform a refresh in the least recently used bucket. We cannot
   279  		// adhere to this because the findnode target is a 512bit value
   280  		// (not hash-sized) and it is not easily possible to generate a
   281  		// sha3 preimage that falls into a chosen bucket.
   282  		//
   283  		// We perform a lookup with a random target instead.
   284  		var target NodeID
   285  		rand.Read(target[:])
   286  
   287  		result := tab.Lookup(target)
   288  		if len(result) == 0 {
   289  			// Lookup failed, seed after all
   290  			seed = true
   291  		}
   292  	}
   293  
   294  	if seed {
   295  		// Pick a batch of previously know seeds to lookup with
   296  		seeds := tab.db.querySeeds(10)
   297  		for _, seed := range seeds {
   298  			glog.V(logger.Debug).Infoln("Seeding network with", seed)
   299  		}
   300  		nodes := append(tab.nursery, seeds...)
   301  
   302  		// Bond with all the seed nodes (will pingpong only if failed recently)
   303  		bonded := tab.bondall(nodes)
   304  		if len(bonded) > 0 {
   305  			tab.Lookup(tab.self.ID)
   306  		}
   307  		// TODO: the Kademlia paper says that we're supposed to perform
   308  		// random lookups in all buckets further away than our closest neighbor.
   309  	}
   310  }
   311  
   312  // closest returns the n nodes in the table that are closest to the
   313  // given id. The caller must hold tab.mutex.
   314  func (tab *Table) closest(target common.Hash, nresults int) *nodesByDistance {
   315  	// This is a very wasteful way to find the closest nodes but
   316  	// obviously correct. I believe that tree-based buckets would make
   317  	// this easier to implement efficiently.
   318  	close := &nodesByDistance{target: target}
   319  	for _, b := range tab.buckets {
   320  		for _, n := range b.entries {
   321  			close.push(n, nresults)
   322  		}
   323  	}
   324  	return close
   325  }
   326  
   327  func (tab *Table) len() (n int) {
   328  	for _, b := range tab.buckets {
   329  		n += len(b.entries)
   330  	}
   331  	return n
   332  }
   333  
   334  // bondall bonds with all given nodes concurrently and returns
   335  // those nodes for which bonding has probably succeeded.
   336  func (tab *Table) bondall(nodes []*Node) (result []*Node) {
   337  	rc := make(chan *Node, len(nodes))
   338  	for i := range nodes {
   339  		go func(n *Node) {
   340  			nn, _ := tab.bond(false, n.ID, n.addr(), uint16(n.TCP))
   341  			rc <- nn
   342  		}(nodes[i])
   343  	}
   344  	for _ = range nodes {
   345  		if n := <-rc; n != nil {
   346  			result = append(result, n)
   347  		}
   348  	}
   349  	return result
   350  }
   351  
   352  // bond ensures the local node has a bond with the given remote node.
   353  // It also attempts to insert the node into the table if bonding succeeds.
   354  // The caller must not hold tab.mutex.
   355  //
   356  // A bond is must be established before sending findnode requests.
   357  // Both sides must have completed a ping/pong exchange for a bond to
   358  // exist. The total number of active bonding processes is limited in
   359  // order to restrain network use.
   360  //
   361  // bond is meant to operate idempotently in that bonding with a remote
   362  // node which still remembers a previously established bond will work.
   363  // The remote node will simply not send a ping back, causing waitping
   364  // to time out.
   365  //
   366  // If pinged is true, the remote node has just pinged us and one half
   367  // of the process can be skipped.
   368  func (tab *Table) bond(pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) (*Node, error) {
   369  	// Retrieve a previously known node and any recent findnode failures
   370  	node, fails := tab.db.node(id), 0
   371  	if node != nil {
   372  		fails = tab.db.findFails(id)
   373  	}
   374  	// If the node is unknown (non-bonded) or failed (remotely unknown), bond from scratch
   375  	var result error
   376  	if node == nil || fails > 0 {
   377  		glog.V(logger.Detail).Infof("Bonding %x: known=%v, fails=%v", id[:8], node != nil, fails)
   378  
   379  		tab.bondmu.Lock()
   380  		w := tab.bonding[id]
   381  		if w != nil {
   382  			// Wait for an existing bonding process to complete.
   383  			tab.bondmu.Unlock()
   384  			<-w.done
   385  		} else {
   386  			// Register a new bonding process.
   387  			w = &bondproc{done: make(chan struct{})}
   388  			tab.bonding[id] = w
   389  			tab.bondmu.Unlock()
   390  			// Do the ping/pong. The result goes into w.
   391  			tab.pingpong(w, pinged, id, addr, tcpPort)
   392  			// Unregister the process after it's done.
   393  			tab.bondmu.Lock()
   394  			delete(tab.bonding, id)
   395  			tab.bondmu.Unlock()
   396  		}
   397  		// Retrieve the bonding results
   398  		result = w.err
   399  		if result == nil {
   400  			node = w.n
   401  		}
   402  	}
   403  	if node != nil {
   404  		// Add the node to the table even if the bonding ping/pong
   405  		// fails. It will be relaced quickly if it continues to be
   406  		// unresponsive.
   407  		tab.add(node)
   408  		tab.db.updateFindFails(id, 0)
   409  	}
   410  	return node, result
   411  }
   412  
   413  func (tab *Table) pingpong(w *bondproc, pinged bool, id NodeID, addr *net.UDPAddr, tcpPort uint16) {
   414  	// Request a bonding slot to limit network usage
   415  	<-tab.bondslots
   416  	defer func() { tab.bondslots <- struct{}{} }()
   417  
   418  	// Ping the remote side and wait for a pong.
   419  	if w.err = tab.ping(id, addr); w.err != nil {
   420  		close(w.done)
   421  		return
   422  	}
   423  	if !pinged {
   424  		// Give the remote node a chance to ping us before we start
   425  		// sending findnode requests. If they still remember us,
   426  		// waitping will simply time out.
   427  		tab.net.waitping(id)
   428  	}
   429  	// Bonding succeeded, update the node database.
   430  	w.n = newNode(id, addr.IP, uint16(addr.Port), tcpPort)
   431  	tab.db.updateNode(w.n)
   432  	close(w.done)
   433  }
   434  
   435  // ping a remote endpoint and wait for a reply, also updating the node
   436  // database accordingly.
   437  func (tab *Table) ping(id NodeID, addr *net.UDPAddr) error {
   438  	// Update the last ping and send the message
   439  	tab.db.updateLastPing(id, time.Now())
   440  	if err := tab.net.ping(id, addr); err != nil {
   441  		return err
   442  	}
   443  	// Pong received, update the database and return
   444  	tab.db.updateLastPong(id, time.Now())
   445  	tab.db.ensureExpirer()
   446  	return nil
   447  }
   448  
   449  // add attempts to add the given node its corresponding bucket. If the
   450  // bucket has space available, adding the node succeeds immediately.
   451  // Otherwise, the node is added if the least recently active node in
   452  // the bucket does not respond to a ping packet.
   453  //
   454  // The caller must not hold tab.mutex.
   455  func (tab *Table) add(new *Node) {
   456  	b := tab.buckets[logdist(tab.self.sha, new.sha)]
   457  	tab.mutex.Lock()
   458  	defer tab.mutex.Unlock()
   459  	if b.bump(new) {
   460  		return
   461  	}
   462  	var oldest *Node
   463  	if len(b.entries) == bucketSize {
   464  		oldest = b.entries[bucketSize-1]
   465  		if oldest.contested {
   466  			// The node is already being replaced, don't attempt
   467  			// to replace it.
   468  			return
   469  		}
   470  		oldest.contested = true
   471  		// Let go of the mutex so other goroutines can access
   472  		// the table while we ping the least recently active node.
   473  		tab.mutex.Unlock()
   474  		err := tab.ping(oldest.ID, oldest.addr())
   475  		tab.mutex.Lock()
   476  		oldest.contested = false
   477  		if err == nil {
   478  			// The node responded, don't replace it.
   479  			return
   480  		}
   481  	}
   482  	added := b.replace(new, oldest)
   483  	if added && tab.nodeAddedHook != nil {
   484  		tab.nodeAddedHook(new)
   485  	}
   486  }
   487  
   488  // stuff adds nodes the table to the end of their corresponding bucket
   489  // if the bucket is not full. The caller must hold tab.mutex.
   490  func (tab *Table) stuff(nodes []*Node) {
   491  outer:
   492  	for _, n := range nodes {
   493  		if n.ID == tab.self.ID {
   494  			continue // don't add self
   495  		}
   496  		bucket := tab.buckets[logdist(tab.self.sha, n.sha)]
   497  		for i := range bucket.entries {
   498  			if bucket.entries[i].ID == n.ID {
   499  				continue outer // already in bucket
   500  			}
   501  		}
   502  		if len(bucket.entries) < bucketSize {
   503  			bucket.entries = append(bucket.entries, n)
   504  			if tab.nodeAddedHook != nil {
   505  				tab.nodeAddedHook(n)
   506  			}
   507  		}
   508  	}
   509  }
   510  
   511  // delete removes an entry from the node table (used to evacuate
   512  // failed/non-bonded discovery peers).
   513  func (tab *Table) delete(node *Node) {
   514  	tab.mutex.Lock()
   515  	defer tab.mutex.Unlock()
   516  	bucket := tab.buckets[logdist(tab.self.sha, node.sha)]
   517  	for i := range bucket.entries {
   518  		if bucket.entries[i].ID == node.ID {
   519  			bucket.entries = append(bucket.entries[:i], bucket.entries[i+1:]...)
   520  			return
   521  		}
   522  	}
   523  }
   524  
   525  func (b *bucket) replace(n *Node, last *Node) bool {
   526  	// Don't add if b already contains n.
   527  	for i := range b.entries {
   528  		if b.entries[i].ID == n.ID {
   529  			return false
   530  		}
   531  	}
   532  	// Replace last if it is still the last entry or just add n if b
   533  	// isn't full. If is no longer the last entry, it has either been
   534  	// replaced with someone else or became active.
   535  	if len(b.entries) == bucketSize && (last == nil || b.entries[bucketSize-1].ID != last.ID) {
   536  		return false
   537  	}
   538  	if len(b.entries) < bucketSize {
   539  		b.entries = append(b.entries, nil)
   540  	}
   541  	copy(b.entries[1:], b.entries)
   542  	b.entries[0] = n
   543  	return true
   544  }
   545  
   546  func (b *bucket) bump(n *Node) bool {
   547  	for i := range b.entries {
   548  		if b.entries[i].ID == n.ID {
   549  			// move it to the front
   550  			copy(b.entries[1:], b.entries[:i])
   551  			b.entries[0] = n
   552  			return true
   553  		}
   554  	}
   555  	return false
   556  }
   557  
   558  // nodesByDistance is a list of nodes, ordered by
   559  // distance to target.
   560  type nodesByDistance struct {
   561  	entries []*Node
   562  	target  common.Hash
   563  }
   564  
   565  // push adds the given node to the list, keeping the total size below maxElems.
   566  func (h *nodesByDistance) push(n *Node, maxElems int) {
   567  	ix := sort.Search(len(h.entries), func(i int) bool {
   568  		return distcmp(h.target, h.entries[i].sha, n.sha) > 0
   569  	})
   570  	if len(h.entries) < maxElems {
   571  		h.entries = append(h.entries, n)
   572  	}
   573  	if ix == len(h.entries) {
   574  		// farther away than all nodes we already have.
   575  		// if there was room for it, the node is now the last element.
   576  	} else {
   577  		// slide existing entries down to make room
   578  		// this will overwrite the entry we just appended.
   579  		copy(h.entries[ix+1:], h.entries[ix:])
   580  		h.entries[ix] = n
   581  	}
   582  }