github.com/NebulousLabs/Sia@v1.3.7/modules/gateway/nodes.go (about)

     1  package gateway
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"net"
     7  	"time"
     8  
     9  	"github.com/NebulousLabs/Sia/build"
    10  	"github.com/NebulousLabs/Sia/encoding"
    11  	"github.com/NebulousLabs/Sia/modules"
    12  	"github.com/NebulousLabs/Sia/types"
    13  	"github.com/NebulousLabs/fastrand"
    14  )
    15  
    16  var (
    17  	errNodeExists    = errors.New("node already added")
    18  	errNoNodes       = errors.New("no nodes in the node list")
    19  	errOurAddress    = errors.New("can't add our own address")
    20  	errPeerGenesisID = errors.New("peer has different genesis ID")
    21  )
    22  
    23  // A node represents a potential peer on the Sia network.
    24  type node struct {
    25  	NetAddress      modules.NetAddress `json:"netaddress"`
    26  	WasOutboundPeer bool               `json:"wasoutboundpeer"`
    27  }
    28  
    29  // addNode adds an address to the set of nodes on the network.
    30  func (g *Gateway) addNode(addr modules.NetAddress) error {
    31  	if addr == g.myAddr {
    32  		return errOurAddress
    33  	} else if _, exists := g.nodes[addr]; exists {
    34  		return errNodeExists
    35  	} else if addr.IsStdValid() != nil {
    36  		return errors.New("address is not valid: " + string(addr))
    37  	} else if net.ParseIP(addr.Host()) == nil {
    38  		return errors.New("address must be an IP address: " + string(addr))
    39  	}
    40  	g.nodes[addr] = &node{
    41  		NetAddress:      addr,
    42  		WasOutboundPeer: false,
    43  	}
    44  	return nil
    45  }
    46  
    47  // staticPingNode verifies that there is a reachable node at the provided address
    48  // by performing the Sia gateway handshake protocol.
    49  func (g *Gateway) staticPingNode(addr modules.NetAddress) error {
    50  	// Ping the untrusted node to see whether or not there's actually a
    51  	// reachable node at the provided address.
    52  	conn, err := g.staticDial(addr)
    53  	if err != nil {
    54  		return err
    55  	}
    56  	defer conn.Close()
    57  
    58  	// Read the node's version.
    59  	remoteVersion, err := connectVersionHandshake(conn, build.Version)
    60  	if err != nil {
    61  		return err
    62  	}
    63  
    64  	if build.VersionCmp(remoteVersion, minimumAcceptablePeerVersion) < 0 {
    65  		return nil // for older versions, this is where pinging ends
    66  	}
    67  
    68  	// Send our header.
    69  	// NOTE: since we don't intend to complete the connection, we can send an
    70  	// inaccurate NetAddress.
    71  	ourHeader := sessionHeader{
    72  		GenesisID:  types.GenesisID,
    73  		UniqueID:   g.staticId,
    74  		NetAddress: modules.NetAddress(conn.LocalAddr().String()),
    75  	}
    76  	if err := exchangeOurHeader(conn, ourHeader); err != nil {
    77  		return err
    78  	}
    79  
    80  	// Read remote header.
    81  	var remoteHeader sessionHeader
    82  	if err := encoding.ReadObject(conn, &remoteHeader, maxEncodedSessionHeaderSize); err != nil {
    83  		return fmt.Errorf("failed to read remote header: %v", err)
    84  	} else if err := acceptableSessionHeader(ourHeader, remoteHeader, conn.RemoteAddr().String()); err != nil {
    85  		return err
    86  	}
    87  
    88  	// Send special rejection string.
    89  	if err := encoding.WriteObject(conn, modules.StopResponse); err != nil {
    90  		return fmt.Errorf("failed to write header rejection: %v", err)
    91  	}
    92  	return nil
    93  }
    94  
    95  // removeNode will remove a node from the gateway.
    96  func (g *Gateway) removeNode(addr modules.NetAddress) error {
    97  	if _, exists := g.nodes[addr]; !exists {
    98  		return errors.New("no record of that node")
    99  	}
   100  	delete(g.nodes, addr)
   101  	return nil
   102  }
   103  
   104  // randomNode returns a random node from the gateway. An error can be returned
   105  // if there are no nodes in the node list.
   106  func (g *Gateway) randomNode() (modules.NetAddress, error) {
   107  	if len(g.nodes) == 0 {
   108  		return "", errNoPeers
   109  	}
   110  
   111  	// Select a random peer. Note that the algorithm below is roughly linear in
   112  	// the number of nodes known by the gateway, and this number can approach
   113  	// every node on the network. If the network gets large, this algorithm
   114  	// will either need to be refactored, or more likely a cap on the size of
   115  	// g.nodes will need to be added.
   116  	r := fastrand.Intn(len(g.nodes))
   117  	for node := range g.nodes {
   118  		if r <= 0 {
   119  			return node, nil
   120  		}
   121  		r--
   122  	}
   123  	return "", errNoPeers
   124  }
   125  
   126  // shareNodes is the receiving end of the ShareNodes RPC. It writes up to 10
   127  // randomly selected nodes to the caller.
   128  func (g *Gateway) shareNodes(conn modules.PeerConn) error {
   129  	conn.SetDeadline(time.Now().Add(connStdDeadline))
   130  	remoteNA := modules.NetAddress(conn.RemoteAddr().String())
   131  
   132  	// Assemble a list of nodes to send to the peer.
   133  	var nodes []modules.NetAddress
   134  	func() {
   135  		g.mu.RLock()
   136  		defer g.mu.RUnlock()
   137  
   138  		// Gather candidates for sharing.
   139  		gnodes := make([]modules.NetAddress, 0, len(g.nodes))
   140  		for node := range g.nodes {
   141  			// Don't share local peers with remote peers. That means that if 'node'
   142  			// is loopback, it will only be shared if the remote peer is also
   143  			// loopback. And if 'node' is private, it will only be shared if the
   144  			// remote peer is either the loopback or is also private.
   145  			if node.IsLoopback() && !remoteNA.IsLoopback() {
   146  				continue
   147  			}
   148  			if node.IsLocal() && !remoteNA.IsLocal() {
   149  				continue
   150  			}
   151  			gnodes = append(gnodes, node)
   152  		}
   153  
   154  		// Iterate through the random permutation of nodes and select the
   155  		// desirable ones.
   156  		for _, i := range fastrand.Perm(len(gnodes)) {
   157  			nodes = append(nodes, gnodes[i])
   158  			if uint64(len(nodes)) == maxSharedNodes {
   159  				break
   160  			}
   161  		}
   162  	}()
   163  	return encoding.WriteObject(conn, nodes)
   164  }
   165  
   166  // requestNodes is the calling end of the ShareNodes RPC.
   167  func (g *Gateway) requestNodes(conn modules.PeerConn) error {
   168  	conn.SetDeadline(time.Now().Add(connStdDeadline))
   169  
   170  	var nodes []modules.NetAddress
   171  	if err := encoding.ReadObject(conn, &nodes, maxSharedNodes*modules.MaxEncodedNetAddressLength); err != nil {
   172  		return err
   173  	}
   174  
   175  	g.mu.Lock()
   176  	changed := false
   177  	for _, node := range nodes {
   178  		err := g.addNode(node)
   179  		if err != nil && err != errNodeExists && err != errOurAddress {
   180  			g.log.Printf("WARN: peer '%v' sent the invalid addr '%v'", conn.RPCAddr(), node)
   181  		}
   182  		if err == nil {
   183  			changed = true
   184  		}
   185  	}
   186  	if changed {
   187  		err := g.saveSync()
   188  		if err != nil {
   189  			g.log.Println("ERROR: unable to save new nodes added to the gateway:", err)
   190  		}
   191  	}
   192  	g.mu.Unlock()
   193  	return nil
   194  }
   195  
   196  // permanentNodePurger is a thread that runs throughout the lifetime of the
   197  // gateway, purging unconnectable nodes from the node list in a sustainable
   198  // way.
   199  func (g *Gateway) permanentNodePurger(closeChan chan struct{}) {
   200  	defer close(closeChan)
   201  
   202  	for {
   203  		// Choose an amount of time to wait before attempting to prune a node.
   204  		// Nodes will occasionally go offline for some time, which can even be
   205  		// days. We don't want to too aggressively prune nodes with low-moderate
   206  		// uptime, as they are still useful to the network.
   207  		//
   208  		// But if there are a lot of nodes, we want to make sure that the node
   209  		// list does not become saturated with inaccessible / offline nodes.
   210  		// Pruning happens a lot faster when there are a lot of nodes in the
   211  		// gateway.
   212  		//
   213  		// This value is a ratelimit which tries to keep the nodes list in the
   214  		// gateawy healthy. A more complex algorithm might adjust this number
   215  		// according to the percentage of prune attempts that are successful
   216  		// (decrease prune frequency if most nodes in the database are online,
   217  		// increase prune frequency if more nodes in the database are offline).
   218  		waitTime := nodePurgeDelay
   219  		g.mu.RLock()
   220  		nodeCount := len(g.nodes)
   221  		g.mu.RUnlock()
   222  		if nodeCount > quickPruneListLen {
   223  			waitTime = fastNodePurgeDelay
   224  		}
   225  
   226  		// Sleep as a purge ratelimit.
   227  		select {
   228  		case <-time.After(waitTime):
   229  		case <-g.threads.StopChan():
   230  			// The gateway is shutting down, close out the thread.
   231  			return
   232  		}
   233  
   234  		// Get a random node for scanning.
   235  		g.mu.RLock()
   236  		numNodes := len(g.nodes)
   237  		node, err := g.randomNode()
   238  		g.mu.RUnlock()
   239  		if err == errNoNodes {
   240  			// errNoNodes is a common error that will be resolved by the
   241  			// bootstrap process.
   242  			continue
   243  		} else if err != nil {
   244  			// Unusual error, create a logging statement.
   245  			g.log.Println("ERROR: could not pick a random node for uptime check:", err)
   246  			continue
   247  		}
   248  		if numNodes <= pruneNodeListLen {
   249  			// There are not enough nodes in the gateway - pruning more is
   250  			// probably a bad idea, and may affect the user's ability to
   251  			// connect to the network in the future.
   252  			continue
   253  		}
   254  		// Check whether this node is already a peer. If so, no need to dial
   255  		// them.
   256  		g.mu.RLock()
   257  		_, exists := g.peers[node]
   258  		g.mu.RUnlock()
   259  		if exists {
   260  			continue
   261  		}
   262  
   263  		// Try connecting to the random node. If the node is not reachable,
   264  		// remove them from the node list.
   265  		//
   266  		// NOTE: an error may be returned if the dial is canceled partway
   267  		// through, which would cause the node to be pruned even though it may
   268  		// be a good node. Because nodes are plentiful, this is an acceptable
   269  		// bug.
   270  		if err = g.staticPingNode(node); err != nil {
   271  			g.mu.Lock()
   272  			if len(g.nodes) > pruneNodeListLen {
   273  				// Check if the number of nodes is still above the threshold.
   274  				g.removeNode(node)
   275  				g.log.Debugf("INFO: removing node %q because it could not be reached during a random scan: %v", node, err)
   276  			}
   277  			g.mu.Unlock()
   278  		}
   279  	}
   280  }
   281  
   282  // permanentNodeManager tries to keep the Gateway's node list healthy. As long
   283  // as the Gateway has fewer than healthyNodeListLen nodes, it asks a random
   284  // peer for more nodes. It also continually pings nodes in order to establish
   285  // their connectivity. Unresponsive nodes are aggressively removed.
   286  func (g *Gateway) permanentNodeManager(closeChan chan struct{}) {
   287  	defer close(closeChan)
   288  
   289  	for {
   290  		// Wait 5 seconds so that a controlled number of node requests are made
   291  		// to peers.
   292  		select {
   293  		case <-time.After(nodeListDelay):
   294  		case <-g.threads.StopChan():
   295  			// Gateway is shutting down, close the thread.
   296  			return
   297  		}
   298  
   299  		g.mu.RLock()
   300  		numNodes := len(g.nodes)
   301  		peer, err := g.randomOutboundPeer()
   302  		g.mu.RUnlock()
   303  		if err == errNoPeers {
   304  			// errNoPeers is a common and expected error, there's no need to
   305  			// log it.
   306  			continue
   307  		} else if err != nil {
   308  			g.log.Println("ERROR: could not fetch a random peer:", err)
   309  			continue
   310  		}
   311  
   312  		// Determine whether there are a satisfactory number of nodes in the
   313  		// nodelist. If there are not, use the random peer from earlier to
   314  		// expand the node list.
   315  		if numNodes < healthyNodeListLen {
   316  			err := g.managedRPC(peer, "ShareNodes", g.requestNodes)
   317  			if err != nil {
   318  				g.log.Debugf("WARN: RPC ShareNodes failed on peer %q: %v", peer, err)
   319  				continue
   320  			}
   321  		} else {
   322  			// There are enough nodes in the gateway, no need to check for more
   323  			// every 5 seconds. Wait a while before checking again.
   324  			select {
   325  			case <-time.After(wellConnectedDelay):
   326  			case <-g.threads.StopChan():
   327  				// Gateway is shutting down, close the thread.
   328  				return
   329  			}
   330  		}
   331  	}
   332  }