gitlab.com/SiaPrime/SiaPrime@v1.4.1/modules/gateway/nodes.go (about) 1 package gateway 2 3 import ( 4 "errors" 5 "fmt" 6 "net" 7 "time" 8 9 "gitlab.com/NebulousLabs/fastrand" 10 "gitlab.com/SiaPrime/SiaPrime/build" 11 "gitlab.com/SiaPrime/SiaPrime/encoding" 12 "gitlab.com/SiaPrime/SiaPrime/modules" 13 "gitlab.com/SiaPrime/SiaPrime/types" 14 ) 15 16 var ( 17 errNodeExists = errors.New("node already added") 18 errNoNodes = errors.New("no nodes in the node list") 19 errOurAddress = errors.New("can't add our own address") 20 errPeerGenesisID = errors.New("peer has different genesis ID") 21 ) 22 23 // A node represents a potential peer on the Sia network. 24 type node struct { 25 NetAddress modules.NetAddress `json:"netaddress"` 26 WasOutboundPeer bool `json:"wasoutboundpeer"` 27 } 28 29 // addNode adds an address to the set of nodes on the network. 30 func (g *Gateway) addNode(addr modules.NetAddress) error { 31 if addr == g.myAddr { 32 return errOurAddress 33 } else if _, exists := g.nodes[addr]; exists { 34 return errNodeExists 35 } else if addr.IsStdValid() != nil { 36 return errors.New("address is not valid: " + string(addr)) 37 } else if net.ParseIP(addr.Host()) == nil { 38 return errors.New("address must be an IP address: " + string(addr)) 39 } 40 g.nodes[addr] = &node{ 41 NetAddress: addr, 42 WasOutboundPeer: false, 43 } 44 return nil 45 } 46 47 // staticPingNode verifies that there is a reachable node at the provided address 48 // by performing the Sia gateway handshake protocol. 49 func (g *Gateway) staticPingNode(addr modules.NetAddress) error { 50 // Ping the untrusted node to see whether or not there's actually a 51 // reachable node at the provided address. 52 conn, err := g.staticDial(addr) 53 if err != nil { 54 return err 55 } 56 defer conn.Close() 57 58 // Read the node's version. 59 remoteVersion, err := connectVersionHandshake(conn, build.Version) 60 if err != nil { 61 return err 62 } 63 64 if err := acceptableVersion(remoteVersion); err != nil { 65 // Return an error so that bad version peers are purged 66 return err 67 } 68 69 // Send our header. 70 // NOTE: since we don't intend to complete the connection, we can send an 71 // inaccurate NetAddress. 72 ourHeader := sessionHeader{ 73 GenesisID: types.GenesisID, 74 UniqueID: g.staticID, 75 NetAddress: modules.NetAddress(conn.LocalAddr().String()), 76 } 77 if err := exchangeOurHeader(conn, ourHeader); err != nil { 78 return err 79 } 80 81 // Read remote header. 82 var remoteHeader sessionHeader 83 if err := encoding.ReadObject(conn, &remoteHeader, maxEncodedSessionHeaderSize); err != nil { 84 return fmt.Errorf("failed to read remote header: %v", err) 85 } else if err := acceptableSessionHeader(ourHeader, remoteHeader, conn.RemoteAddr().String()); err != nil { 86 return err 87 } 88 89 // Send special rejection string. 90 if err := encoding.WriteObject(conn, modules.StopResponse); err != nil { 91 return fmt.Errorf("failed to write header rejection: %v", err) 92 } 93 return nil 94 } 95 96 // removeNode will remove a node from the gateway. 97 func (g *Gateway) removeNode(addr modules.NetAddress) error { 98 if _, exists := g.nodes[addr]; !exists { 99 return errors.New("no record of that node") 100 } 101 delete(g.nodes, addr) 102 return nil 103 } 104 105 // randomNode returns a random node from the gateway. An error can be returned 106 // if there are no nodes in the node list. 107 func (g *Gateway) randomNode() (modules.NetAddress, error) { 108 if len(g.nodes) == 0 { 109 return "", errNoPeers 110 } 111 112 // Select a random peer. Note that the algorithm below is roughly linear in 113 // the number of nodes known by the gateway, and this number can approach 114 // every node on the network. If the network gets large, this algorithm 115 // will either need to be refactored, or more likely a cap on the size of 116 // g.nodes will need to be added. 117 r := fastrand.Intn(len(g.nodes)) 118 for node := range g.nodes { 119 if r <= 0 { 120 return node, nil 121 } 122 r-- 123 } 124 return "", errNoPeers 125 } 126 127 // shareNodes is the receiving end of the ShareNodes RPC. It writes up to 10 128 // randomly selected nodes to the caller. 129 func (g *Gateway) shareNodes(conn modules.PeerConn) error { 130 conn.SetDeadline(time.Now().Add(connStdDeadline)) 131 remoteNA := modules.NetAddress(conn.RemoteAddr().String()) 132 133 // Assemble a list of nodes to send to the peer. 134 var nodes []modules.NetAddress 135 func() { 136 g.mu.RLock() 137 defer g.mu.RUnlock() 138 139 // Gather candidates for sharing. 140 gnodes := make([]modules.NetAddress, 0, len(g.nodes)) 141 for node := range g.nodes { 142 // Don't share local peers with remote peers. That means that if 'node' 143 // is loopback, it will only be shared if the remote peer is also 144 // loopback. And if 'node' is private, it will only be shared if the 145 // remote peer is either the loopback or is also private. 146 if node.IsLoopback() && !remoteNA.IsLoopback() { 147 continue 148 } 149 if node.IsLocal() && !remoteNA.IsLocal() { 150 continue 151 } 152 gnodes = append(gnodes, node) 153 } 154 155 // Iterate through the random permutation of nodes and select the 156 // desirable ones. 157 for _, i := range fastrand.Perm(len(gnodes)) { 158 nodes = append(nodes, gnodes[i]) 159 if uint64(len(nodes)) == maxSharedNodes { 160 break 161 } 162 } 163 }() 164 return encoding.WriteObject(conn, nodes) 165 } 166 167 // requestNodes is the calling end of the ShareNodes RPC. 168 func (g *Gateway) requestNodes(conn modules.PeerConn) error { 169 conn.SetDeadline(time.Now().Add(connStdDeadline)) 170 171 var nodes []modules.NetAddress 172 if err := encoding.ReadObject(conn, &nodes, maxSharedNodes*modules.MaxEncodedNetAddressLength); err != nil { 173 return err 174 } 175 176 g.mu.Lock() 177 changed := false 178 for _, node := range nodes { 179 err := g.addNode(node) 180 if err != nil && err != errNodeExists && err != errOurAddress { 181 g.log.Printf("WARN: peer '%v' sent the invalid addr '%v'", conn.RPCAddr(), node) 182 } 183 if err == nil { 184 changed = true 185 } 186 } 187 if changed { 188 err := g.saveSyncNodes() 189 if err != nil { 190 g.log.Println("ERROR: unable to save new nodes added to the gateway:", err) 191 } 192 } 193 g.mu.Unlock() 194 return nil 195 } 196 197 // permanentNodePurger is a thread that runs throughout the lifetime of the 198 // gateway, purging unconnectable nodes from the node list in a sustainable 199 // way. 200 func (g *Gateway) permanentNodePurger(closeChan chan struct{}) { 201 defer close(closeChan) 202 203 for { 204 // Choose an amount of time to wait before attempting to prune a node. 205 // Nodes will occasionally go offline for some time, which can even be 206 // days. We don't want to too aggressively prune nodes with low-moderate 207 // uptime, as they are still useful to the network. 208 // 209 // But if there are a lot of nodes, we want to make sure that the node 210 // list does not become saturated with inaccessible / offline nodes. 211 // Pruning happens a lot faster when there are a lot of nodes in the 212 // gateway. 213 // 214 // This value is a ratelimit which tries to keep the nodes list in the 215 // gateawy healthy. A more complex algorithm might adjust this number 216 // according to the percentage of prune attempts that are successful 217 // (decrease prune frequency if most nodes in the database are online, 218 // increase prune frequency if more nodes in the database are offline). 219 waitTime := nodePurgeDelay 220 g.mu.RLock() 221 nodeCount := len(g.nodes) 222 g.mu.RUnlock() 223 if nodeCount > quickPruneListLen { 224 waitTime = fastNodePurgeDelay 225 } 226 227 // Sleep as a purge ratelimit. 228 select { 229 case <-time.After(waitTime): 230 case <-g.threads.StopChan(): 231 // The gateway is shutting down, close out the thread. 232 return 233 } 234 235 // Get a random node for scanning. 236 g.mu.RLock() 237 numNodes := len(g.nodes) 238 node, err := g.randomNode() 239 g.mu.RUnlock() 240 if err == errNoNodes { 241 // errNoNodes is a common error that will be resolved by the 242 // bootstrap process. 243 continue 244 } else if err != nil { 245 // Unusual error, create a logging statement. 246 g.log.Println("ERROR: could not pick a random node for uptime check:", err) 247 continue 248 } 249 if numNodes <= pruneNodeListLen { 250 // There are not enough nodes in the gateway - pruning more is 251 // probably a bad idea, and may affect the user's ability to 252 // connect to the network in the future. 253 continue 254 } 255 // Check whether this node is already a peer. If so, no need to dial 256 // them. 257 g.mu.RLock() 258 _, exists := g.peers[node] 259 g.mu.RUnlock() 260 if exists { 261 continue 262 } 263 264 // Try connecting to the random node. If the node is not reachable, 265 // remove them from the node list. 266 // 267 // NOTE: an error may be returned if the dial is canceled partway 268 // through, which would cause the node to be pruned even though it may 269 // be a good node. Because nodes are plentiful, this is an acceptable 270 // bug. 271 if err = g.staticPingNode(node); err != nil { 272 g.mu.Lock() 273 if len(g.nodes) > pruneNodeListLen { 274 // Check if the number of nodes is still above the threshold. 275 g.removeNode(node) 276 g.log.Debugf("INFO: removing node %q because it could not be reached during a random scan: %v", node, err) 277 } 278 g.mu.Unlock() 279 } 280 } 281 } 282 283 // permanentNodeManager tries to keep the Gateway's node list healthy. As long 284 // as the Gateway has fewer than healthyNodeListLen nodes, it asks a random 285 // peer for more nodes. It also continually pings nodes in order to establish 286 // their connectivity. Unresponsive nodes are aggressively removed. 287 func (g *Gateway) permanentNodeManager(closeChan chan struct{}) { 288 defer close(closeChan) 289 290 for { 291 // Wait 5 seconds so that a controlled number of node requests are made 292 // to peers. 293 select { 294 case <-time.After(nodeListDelay): 295 case <-g.threads.StopChan(): 296 // Gateway is shutting down, close the thread. 297 return 298 } 299 300 g.mu.RLock() 301 numNodes := len(g.nodes) 302 peer, err := g.randomOutboundPeer() 303 g.mu.RUnlock() 304 if err == errNoPeers { 305 // errNoPeers is a common and expected error, there's no need to 306 // log it. 307 continue 308 } else if err != nil { 309 g.log.Println("ERROR: could not fetch a random peer:", err) 310 continue 311 } 312 313 // Determine whether there are a satisfactory number of nodes in the 314 // nodelist. If there are not, use the random peer from earlier to 315 // expand the node list. 316 if numNodes < healthyNodeListLen { 317 err := g.managedRPC(peer, "ShareNodes", g.requestNodes) 318 if err != nil { 319 g.log.Debugf("WARN: RPC ShareNodes failed on peer %q: %v", peer, err) 320 continue 321 } 322 } else { 323 // There are enough nodes in the gateway, no need to check for more 324 // every 5 seconds. Wait a while before checking again. 325 select { 326 case <-time.After(wellConnectedDelay): 327 case <-g.threads.StopChan(): 328 // Gateway is shutting down, close the thread. 329 return 330 } 331 } 332 } 333 }