github.com/NebulousLabs/Sia@v1.3.7/modules/gateway/nodes.go (about) 1 package gateway 2 3 import ( 4 "errors" 5 "fmt" 6 "net" 7 "time" 8 9 "github.com/NebulousLabs/Sia/build" 10 "github.com/NebulousLabs/Sia/encoding" 11 "github.com/NebulousLabs/Sia/modules" 12 "github.com/NebulousLabs/Sia/types" 13 "github.com/NebulousLabs/fastrand" 14 ) 15 16 var ( 17 errNodeExists = errors.New("node already added") 18 errNoNodes = errors.New("no nodes in the node list") 19 errOurAddress = errors.New("can't add our own address") 20 errPeerGenesisID = errors.New("peer has different genesis ID") 21 ) 22 23 // A node represents a potential peer on the Sia network. 24 type node struct { 25 NetAddress modules.NetAddress `json:"netaddress"` 26 WasOutboundPeer bool `json:"wasoutboundpeer"` 27 } 28 29 // addNode adds an address to the set of nodes on the network. 30 func (g *Gateway) addNode(addr modules.NetAddress) error { 31 if addr == g.myAddr { 32 return errOurAddress 33 } else if _, exists := g.nodes[addr]; exists { 34 return errNodeExists 35 } else if addr.IsStdValid() != nil { 36 return errors.New("address is not valid: " + string(addr)) 37 } else if net.ParseIP(addr.Host()) == nil { 38 return errors.New("address must be an IP address: " + string(addr)) 39 } 40 g.nodes[addr] = &node{ 41 NetAddress: addr, 42 WasOutboundPeer: false, 43 } 44 return nil 45 } 46 47 // staticPingNode verifies that there is a reachable node at the provided address 48 // by performing the Sia gateway handshake protocol. 49 func (g *Gateway) staticPingNode(addr modules.NetAddress) error { 50 // Ping the untrusted node to see whether or not there's actually a 51 // reachable node at the provided address. 52 conn, err := g.staticDial(addr) 53 if err != nil { 54 return err 55 } 56 defer conn.Close() 57 58 // Read the node's version. 59 remoteVersion, err := connectVersionHandshake(conn, build.Version) 60 if err != nil { 61 return err 62 } 63 64 if build.VersionCmp(remoteVersion, minimumAcceptablePeerVersion) < 0 { 65 return nil // for older versions, this is where pinging ends 66 } 67 68 // Send our header. 69 // NOTE: since we don't intend to complete the connection, we can send an 70 // inaccurate NetAddress. 71 ourHeader := sessionHeader{ 72 GenesisID: types.GenesisID, 73 UniqueID: g.staticId, 74 NetAddress: modules.NetAddress(conn.LocalAddr().String()), 75 } 76 if err := exchangeOurHeader(conn, ourHeader); err != nil { 77 return err 78 } 79 80 // Read remote header. 81 var remoteHeader sessionHeader 82 if err := encoding.ReadObject(conn, &remoteHeader, maxEncodedSessionHeaderSize); err != nil { 83 return fmt.Errorf("failed to read remote header: %v", err) 84 } else if err := acceptableSessionHeader(ourHeader, remoteHeader, conn.RemoteAddr().String()); err != nil { 85 return err 86 } 87 88 // Send special rejection string. 89 if err := encoding.WriteObject(conn, modules.StopResponse); err != nil { 90 return fmt.Errorf("failed to write header rejection: %v", err) 91 } 92 return nil 93 } 94 95 // removeNode will remove a node from the gateway. 96 func (g *Gateway) removeNode(addr modules.NetAddress) error { 97 if _, exists := g.nodes[addr]; !exists { 98 return errors.New("no record of that node") 99 } 100 delete(g.nodes, addr) 101 return nil 102 } 103 104 // randomNode returns a random node from the gateway. An error can be returned 105 // if there are no nodes in the node list. 106 func (g *Gateway) randomNode() (modules.NetAddress, error) { 107 if len(g.nodes) == 0 { 108 return "", errNoPeers 109 } 110 111 // Select a random peer. Note that the algorithm below is roughly linear in 112 // the number of nodes known by the gateway, and this number can approach 113 // every node on the network. If the network gets large, this algorithm 114 // will either need to be refactored, or more likely a cap on the size of 115 // g.nodes will need to be added. 116 r := fastrand.Intn(len(g.nodes)) 117 for node := range g.nodes { 118 if r <= 0 { 119 return node, nil 120 } 121 r-- 122 } 123 return "", errNoPeers 124 } 125 126 // shareNodes is the receiving end of the ShareNodes RPC. It writes up to 10 127 // randomly selected nodes to the caller. 128 func (g *Gateway) shareNodes(conn modules.PeerConn) error { 129 conn.SetDeadline(time.Now().Add(connStdDeadline)) 130 remoteNA := modules.NetAddress(conn.RemoteAddr().String()) 131 132 // Assemble a list of nodes to send to the peer. 133 var nodes []modules.NetAddress 134 func() { 135 g.mu.RLock() 136 defer g.mu.RUnlock() 137 138 // Gather candidates for sharing. 139 gnodes := make([]modules.NetAddress, 0, len(g.nodes)) 140 for node := range g.nodes { 141 // Don't share local peers with remote peers. That means that if 'node' 142 // is loopback, it will only be shared if the remote peer is also 143 // loopback. And if 'node' is private, it will only be shared if the 144 // remote peer is either the loopback or is also private. 145 if node.IsLoopback() && !remoteNA.IsLoopback() { 146 continue 147 } 148 if node.IsLocal() && !remoteNA.IsLocal() { 149 continue 150 } 151 gnodes = append(gnodes, node) 152 } 153 154 // Iterate through the random permutation of nodes and select the 155 // desirable ones. 156 for _, i := range fastrand.Perm(len(gnodes)) { 157 nodes = append(nodes, gnodes[i]) 158 if uint64(len(nodes)) == maxSharedNodes { 159 break 160 } 161 } 162 }() 163 return encoding.WriteObject(conn, nodes) 164 } 165 166 // requestNodes is the calling end of the ShareNodes RPC. 167 func (g *Gateway) requestNodes(conn modules.PeerConn) error { 168 conn.SetDeadline(time.Now().Add(connStdDeadline)) 169 170 var nodes []modules.NetAddress 171 if err := encoding.ReadObject(conn, &nodes, maxSharedNodes*modules.MaxEncodedNetAddressLength); err != nil { 172 return err 173 } 174 175 g.mu.Lock() 176 changed := false 177 for _, node := range nodes { 178 err := g.addNode(node) 179 if err != nil && err != errNodeExists && err != errOurAddress { 180 g.log.Printf("WARN: peer '%v' sent the invalid addr '%v'", conn.RPCAddr(), node) 181 } 182 if err == nil { 183 changed = true 184 } 185 } 186 if changed { 187 err := g.saveSync() 188 if err != nil { 189 g.log.Println("ERROR: unable to save new nodes added to the gateway:", err) 190 } 191 } 192 g.mu.Unlock() 193 return nil 194 } 195 196 // permanentNodePurger is a thread that runs throughout the lifetime of the 197 // gateway, purging unconnectable nodes from the node list in a sustainable 198 // way. 199 func (g *Gateway) permanentNodePurger(closeChan chan struct{}) { 200 defer close(closeChan) 201 202 for { 203 // Choose an amount of time to wait before attempting to prune a node. 204 // Nodes will occasionally go offline for some time, which can even be 205 // days. We don't want to too aggressively prune nodes with low-moderate 206 // uptime, as they are still useful to the network. 207 // 208 // But if there are a lot of nodes, we want to make sure that the node 209 // list does not become saturated with inaccessible / offline nodes. 210 // Pruning happens a lot faster when there are a lot of nodes in the 211 // gateway. 212 // 213 // This value is a ratelimit which tries to keep the nodes list in the 214 // gateawy healthy. A more complex algorithm might adjust this number 215 // according to the percentage of prune attempts that are successful 216 // (decrease prune frequency if most nodes in the database are online, 217 // increase prune frequency if more nodes in the database are offline). 218 waitTime := nodePurgeDelay 219 g.mu.RLock() 220 nodeCount := len(g.nodes) 221 g.mu.RUnlock() 222 if nodeCount > quickPruneListLen { 223 waitTime = fastNodePurgeDelay 224 } 225 226 // Sleep as a purge ratelimit. 227 select { 228 case <-time.After(waitTime): 229 case <-g.threads.StopChan(): 230 // The gateway is shutting down, close out the thread. 231 return 232 } 233 234 // Get a random node for scanning. 235 g.mu.RLock() 236 numNodes := len(g.nodes) 237 node, err := g.randomNode() 238 g.mu.RUnlock() 239 if err == errNoNodes { 240 // errNoNodes is a common error that will be resolved by the 241 // bootstrap process. 242 continue 243 } else if err != nil { 244 // Unusual error, create a logging statement. 245 g.log.Println("ERROR: could not pick a random node for uptime check:", err) 246 continue 247 } 248 if numNodes <= pruneNodeListLen { 249 // There are not enough nodes in the gateway - pruning more is 250 // probably a bad idea, and may affect the user's ability to 251 // connect to the network in the future. 252 continue 253 } 254 // Check whether this node is already a peer. If so, no need to dial 255 // them. 256 g.mu.RLock() 257 _, exists := g.peers[node] 258 g.mu.RUnlock() 259 if exists { 260 continue 261 } 262 263 // Try connecting to the random node. If the node is not reachable, 264 // remove them from the node list. 265 // 266 // NOTE: an error may be returned if the dial is canceled partway 267 // through, which would cause the node to be pruned even though it may 268 // be a good node. Because nodes are plentiful, this is an acceptable 269 // bug. 270 if err = g.staticPingNode(node); err != nil { 271 g.mu.Lock() 272 if len(g.nodes) > pruneNodeListLen { 273 // Check if the number of nodes is still above the threshold. 274 g.removeNode(node) 275 g.log.Debugf("INFO: removing node %q because it could not be reached during a random scan: %v", node, err) 276 } 277 g.mu.Unlock() 278 } 279 } 280 } 281 282 // permanentNodeManager tries to keep the Gateway's node list healthy. As long 283 // as the Gateway has fewer than healthyNodeListLen nodes, it asks a random 284 // peer for more nodes. It also continually pings nodes in order to establish 285 // their connectivity. Unresponsive nodes are aggressively removed. 286 func (g *Gateway) permanentNodeManager(closeChan chan struct{}) { 287 defer close(closeChan) 288 289 for { 290 // Wait 5 seconds so that a controlled number of node requests are made 291 // to peers. 292 select { 293 case <-time.After(nodeListDelay): 294 case <-g.threads.StopChan(): 295 // Gateway is shutting down, close the thread. 296 return 297 } 298 299 g.mu.RLock() 300 numNodes := len(g.nodes) 301 peer, err := g.randomOutboundPeer() 302 g.mu.RUnlock() 303 if err == errNoPeers { 304 // errNoPeers is a common and expected error, there's no need to 305 // log it. 306 continue 307 } else if err != nil { 308 g.log.Println("ERROR: could not fetch a random peer:", err) 309 continue 310 } 311 312 // Determine whether there are a satisfactory number of nodes in the 313 // nodelist. If there are not, use the random peer from earlier to 314 // expand the node list. 315 if numNodes < healthyNodeListLen { 316 err := g.managedRPC(peer, "ShareNodes", g.requestNodes) 317 if err != nil { 318 g.log.Debugf("WARN: RPC ShareNodes failed on peer %q: %v", peer, err) 319 continue 320 } 321 } else { 322 // There are enough nodes in the gateway, no need to check for more 323 // every 5 seconds. Wait a while before checking again. 324 select { 325 case <-time.After(wellConnectedDelay): 326 case <-g.threads.StopChan(): 327 // Gateway is shutting down, close the thread. 328 return 329 } 330 } 331 } 332 }