gitlab.com/SiaPrime/SiaPrime@v1.4.1/modules/gateway/nodes.go (about)

     1  package gateway
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"net"
     7  	"time"
     8  
     9  	"gitlab.com/NebulousLabs/fastrand"
    10  	"gitlab.com/SiaPrime/SiaPrime/build"
    11  	"gitlab.com/SiaPrime/SiaPrime/encoding"
    12  	"gitlab.com/SiaPrime/SiaPrime/modules"
    13  	"gitlab.com/SiaPrime/SiaPrime/types"
    14  )
    15  
    16  var (
    17  	errNodeExists    = errors.New("node already added")
    18  	errNoNodes       = errors.New("no nodes in the node list")
    19  	errOurAddress    = errors.New("can't add our own address")
    20  	errPeerGenesisID = errors.New("peer has different genesis ID")
    21  )
    22  
    23  // A node represents a potential peer on the Sia network.
    24  type node struct {
    25  	NetAddress      modules.NetAddress `json:"netaddress"`
    26  	WasOutboundPeer bool               `json:"wasoutboundpeer"`
    27  }
    28  
    29  // addNode adds an address to the set of nodes on the network.
    30  func (g *Gateway) addNode(addr modules.NetAddress) error {
    31  	if addr == g.myAddr {
    32  		return errOurAddress
    33  	} else if _, exists := g.nodes[addr]; exists {
    34  		return errNodeExists
    35  	} else if addr.IsStdValid() != nil {
    36  		return errors.New("address is not valid: " + string(addr))
    37  	} else if net.ParseIP(addr.Host()) == nil {
    38  		return errors.New("address must be an IP address: " + string(addr))
    39  	}
    40  	g.nodes[addr] = &node{
    41  		NetAddress:      addr,
    42  		WasOutboundPeer: false,
    43  	}
    44  	return nil
    45  }
    46  
    47  // staticPingNode verifies that there is a reachable node at the provided address
    48  // by performing the Sia gateway handshake protocol.
    49  func (g *Gateway) staticPingNode(addr modules.NetAddress) error {
    50  	// Ping the untrusted node to see whether or not there's actually a
    51  	// reachable node at the provided address.
    52  	conn, err := g.staticDial(addr)
    53  	if err != nil {
    54  		return err
    55  	}
    56  	defer conn.Close()
    57  
    58  	// Read the node's version.
    59  	remoteVersion, err := connectVersionHandshake(conn, build.Version)
    60  	if err != nil {
    61  		return err
    62  	}
    63  
    64  	if err := acceptableVersion(remoteVersion); err != nil {
    65  		// Return an error so that bad version peers are purged
    66  		return err
    67  	}
    68  
    69  	// Send our header.
    70  	// NOTE: since we don't intend to complete the connection, we can send an
    71  	// inaccurate NetAddress.
    72  	ourHeader := sessionHeader{
    73  		GenesisID:  types.GenesisID,
    74  		UniqueID:   g.staticID,
    75  		NetAddress: modules.NetAddress(conn.LocalAddr().String()),
    76  	}
    77  	if err := exchangeOurHeader(conn, ourHeader); err != nil {
    78  		return err
    79  	}
    80  
    81  	// Read remote header.
    82  	var remoteHeader sessionHeader
    83  	if err := encoding.ReadObject(conn, &remoteHeader, maxEncodedSessionHeaderSize); err != nil {
    84  		return fmt.Errorf("failed to read remote header: %v", err)
    85  	} else if err := acceptableSessionHeader(ourHeader, remoteHeader, conn.RemoteAddr().String()); err != nil {
    86  		return err
    87  	}
    88  
    89  	// Send special rejection string.
    90  	if err := encoding.WriteObject(conn, modules.StopResponse); err != nil {
    91  		return fmt.Errorf("failed to write header rejection: %v", err)
    92  	}
    93  	return nil
    94  }
    95  
    96  // removeNode will remove a node from the gateway.
    97  func (g *Gateway) removeNode(addr modules.NetAddress) error {
    98  	if _, exists := g.nodes[addr]; !exists {
    99  		return errors.New("no record of that node")
   100  	}
   101  	delete(g.nodes, addr)
   102  	return nil
   103  }
   104  
   105  // randomNode returns a random node from the gateway. An error can be returned
   106  // if there are no nodes in the node list.
   107  func (g *Gateway) randomNode() (modules.NetAddress, error) {
   108  	if len(g.nodes) == 0 {
   109  		return "", errNoPeers
   110  	}
   111  
   112  	// Select a random peer. Note that the algorithm below is roughly linear in
   113  	// the number of nodes known by the gateway, and this number can approach
   114  	// every node on the network. If the network gets large, this algorithm
   115  	// will either need to be refactored, or more likely a cap on the size of
   116  	// g.nodes will need to be added.
   117  	r := fastrand.Intn(len(g.nodes))
   118  	for node := range g.nodes {
   119  		if r <= 0 {
   120  			return node, nil
   121  		}
   122  		r--
   123  	}
   124  	return "", errNoPeers
   125  }
   126  
   127  // shareNodes is the receiving end of the ShareNodes RPC. It writes up to 10
   128  // randomly selected nodes to the caller.
   129  func (g *Gateway) shareNodes(conn modules.PeerConn) error {
   130  	conn.SetDeadline(time.Now().Add(connStdDeadline))
   131  	remoteNA := modules.NetAddress(conn.RemoteAddr().String())
   132  
   133  	// Assemble a list of nodes to send to the peer.
   134  	var nodes []modules.NetAddress
   135  	func() {
   136  		g.mu.RLock()
   137  		defer g.mu.RUnlock()
   138  
   139  		// Gather candidates for sharing.
   140  		gnodes := make([]modules.NetAddress, 0, len(g.nodes))
   141  		for node := range g.nodes {
   142  			// Don't share local peers with remote peers. That means that if 'node'
   143  			// is loopback, it will only be shared if the remote peer is also
   144  			// loopback. And if 'node' is private, it will only be shared if the
   145  			// remote peer is either the loopback or is also private.
   146  			if node.IsLoopback() && !remoteNA.IsLoopback() {
   147  				continue
   148  			}
   149  			if node.IsLocal() && !remoteNA.IsLocal() {
   150  				continue
   151  			}
   152  			gnodes = append(gnodes, node)
   153  		}
   154  
   155  		// Iterate through the random permutation of nodes and select the
   156  		// desirable ones.
   157  		for _, i := range fastrand.Perm(len(gnodes)) {
   158  			nodes = append(nodes, gnodes[i])
   159  			if uint64(len(nodes)) == maxSharedNodes {
   160  				break
   161  			}
   162  		}
   163  	}()
   164  	return encoding.WriteObject(conn, nodes)
   165  }
   166  
   167  // requestNodes is the calling end of the ShareNodes RPC.
   168  func (g *Gateway) requestNodes(conn modules.PeerConn) error {
   169  	conn.SetDeadline(time.Now().Add(connStdDeadline))
   170  
   171  	var nodes []modules.NetAddress
   172  	if err := encoding.ReadObject(conn, &nodes, maxSharedNodes*modules.MaxEncodedNetAddressLength); err != nil {
   173  		return err
   174  	}
   175  
   176  	g.mu.Lock()
   177  	changed := false
   178  	for _, node := range nodes {
   179  		err := g.addNode(node)
   180  		if err != nil && err != errNodeExists && err != errOurAddress {
   181  			g.log.Printf("WARN: peer '%v' sent the invalid addr '%v'", conn.RPCAddr(), node)
   182  		}
   183  		if err == nil {
   184  			changed = true
   185  		}
   186  	}
   187  	if changed {
   188  		err := g.saveSyncNodes()
   189  		if err != nil {
   190  			g.log.Println("ERROR: unable to save new nodes added to the gateway:", err)
   191  		}
   192  	}
   193  	g.mu.Unlock()
   194  	return nil
   195  }
   196  
   197  // permanentNodePurger is a thread that runs throughout the lifetime of the
   198  // gateway, purging unconnectable nodes from the node list in a sustainable
   199  // way.
   200  func (g *Gateway) permanentNodePurger(closeChan chan struct{}) {
   201  	defer close(closeChan)
   202  
   203  	for {
   204  		// Choose an amount of time to wait before attempting to prune a node.
   205  		// Nodes will occasionally go offline for some time, which can even be
   206  		// days. We don't want to too aggressively prune nodes with low-moderate
   207  		// uptime, as they are still useful to the network.
   208  		//
   209  		// But if there are a lot of nodes, we want to make sure that the node
   210  		// list does not become saturated with inaccessible / offline nodes.
   211  		// Pruning happens a lot faster when there are a lot of nodes in the
   212  		// gateway.
   213  		//
   214  		// This value is a ratelimit which tries to keep the nodes list in the
   215  		// gateawy healthy. A more complex algorithm might adjust this number
   216  		// according to the percentage of prune attempts that are successful
   217  		// (decrease prune frequency if most nodes in the database are online,
   218  		// increase prune frequency if more nodes in the database are offline).
   219  		waitTime := nodePurgeDelay
   220  		g.mu.RLock()
   221  		nodeCount := len(g.nodes)
   222  		g.mu.RUnlock()
   223  		if nodeCount > quickPruneListLen {
   224  			waitTime = fastNodePurgeDelay
   225  		}
   226  
   227  		// Sleep as a purge ratelimit.
   228  		select {
   229  		case <-time.After(waitTime):
   230  		case <-g.threads.StopChan():
   231  			// The gateway is shutting down, close out the thread.
   232  			return
   233  		}
   234  
   235  		// Get a random node for scanning.
   236  		g.mu.RLock()
   237  		numNodes := len(g.nodes)
   238  		node, err := g.randomNode()
   239  		g.mu.RUnlock()
   240  		if err == errNoNodes {
   241  			// errNoNodes is a common error that will be resolved by the
   242  			// bootstrap process.
   243  			continue
   244  		} else if err != nil {
   245  			// Unusual error, create a logging statement.
   246  			g.log.Println("ERROR: could not pick a random node for uptime check:", err)
   247  			continue
   248  		}
   249  		if numNodes <= pruneNodeListLen {
   250  			// There are not enough nodes in the gateway - pruning more is
   251  			// probably a bad idea, and may affect the user's ability to
   252  			// connect to the network in the future.
   253  			continue
   254  		}
   255  		// Check whether this node is already a peer. If so, no need to dial
   256  		// them.
   257  		g.mu.RLock()
   258  		_, exists := g.peers[node]
   259  		g.mu.RUnlock()
   260  		if exists {
   261  			continue
   262  		}
   263  
   264  		// Try connecting to the random node. If the node is not reachable,
   265  		// remove them from the node list.
   266  		//
   267  		// NOTE: an error may be returned if the dial is canceled partway
   268  		// through, which would cause the node to be pruned even though it may
   269  		// be a good node. Because nodes are plentiful, this is an acceptable
   270  		// bug.
   271  		if err = g.staticPingNode(node); err != nil {
   272  			g.mu.Lock()
   273  			if len(g.nodes) > pruneNodeListLen {
   274  				// Check if the number of nodes is still above the threshold.
   275  				g.removeNode(node)
   276  				g.log.Debugf("INFO: removing node %q because it could not be reached during a random scan: %v", node, err)
   277  			}
   278  			g.mu.Unlock()
   279  		}
   280  	}
   281  }
   282  
   283  // permanentNodeManager tries to keep the Gateway's node list healthy. As long
   284  // as the Gateway has fewer than healthyNodeListLen nodes, it asks a random
   285  // peer for more nodes. It also continually pings nodes in order to establish
   286  // their connectivity. Unresponsive nodes are aggressively removed.
   287  func (g *Gateway) permanentNodeManager(closeChan chan struct{}) {
   288  	defer close(closeChan)
   289  
   290  	for {
   291  		// Wait 5 seconds so that a controlled number of node requests are made
   292  		// to peers.
   293  		select {
   294  		case <-time.After(nodeListDelay):
   295  		case <-g.threads.StopChan():
   296  			// Gateway is shutting down, close the thread.
   297  			return
   298  		}
   299  
   300  		g.mu.RLock()
   301  		numNodes := len(g.nodes)
   302  		peer, err := g.randomOutboundPeer()
   303  		g.mu.RUnlock()
   304  		if err == errNoPeers {
   305  			// errNoPeers is a common and expected error, there's no need to
   306  			// log it.
   307  			continue
   308  		} else if err != nil {
   309  			g.log.Println("ERROR: could not fetch a random peer:", err)
   310  			continue
   311  		}
   312  
   313  		// Determine whether there are a satisfactory number of nodes in the
   314  		// nodelist. If there are not, use the random peer from earlier to
   315  		// expand the node list.
   316  		if numNodes < healthyNodeListLen {
   317  			err := g.managedRPC(peer, "ShareNodes", g.requestNodes)
   318  			if err != nil {
   319  				g.log.Debugf("WARN: RPC ShareNodes failed on peer %q: %v", peer, err)
   320  				continue
   321  			}
   322  		} else {
   323  			// There are enough nodes in the gateway, no need to check for more
   324  			// every 5 seconds. Wait a while before checking again.
   325  			select {
   326  			case <-time.After(wellConnectedDelay):
   327  			case <-g.threads.StopChan():
   328  				// Gateway is shutting down, close the thread.
   329  				return
   330  			}
   331  		}
   332  	}
   333  }