github.com/Synthesix/Sia@v1.3.3-0.20180413141344-f863baeed3ca/modules/renter/hostdb/scan.go (about) 1 package hostdb 2 3 // scan.go contains the functions which periodically scan the list of all hosts 4 // to see which hosts are online or offline, and to get any updates to the 5 // settings of the hosts. 6 7 import ( 8 "net" 9 "time" 10 11 "github.com/Synthesix/Sia/build" 12 "github.com/Synthesix/Sia/crypto" 13 "github.com/Synthesix/Sia/encoding" 14 "github.com/Synthesix/Sia/modules" 15 "github.com/NebulousLabs/fastrand" 16 ) 17 18 // queueScan will add a host to the queue to be scanned. 19 func (hdb *HostDB) queueScan(entry modules.HostDBEntry) { 20 // If this entry is already in the scan pool, can return immediately. 21 _, exists := hdb.scanMap[entry.PublicKey.String()] 22 if exists { 23 return 24 } 25 26 // Add the entry to a waitlist, then check if any thread is currently 27 // emptying the waitlist. If not, spawn a thread to empty the waitlist. 28 hdb.scanMap[entry.PublicKey.String()] = struct{}{} 29 hdb.scanList = append(hdb.scanList, entry) 30 if hdb.scanWait { 31 // Another thread is emptying the scan list, nothing to worry about. 32 return 33 } 34 35 // Sanity check - the scan map and the scan list should have the same 36 // length. 37 if build.DEBUG && len(hdb.scanMap) > len(hdb.scanList)+maxScanningThreads { 38 hdb.log.Critical("The hostdb scan map has seemingly grown too large:", len(hdb.scanMap), len(hdb.scanList), maxScanningThreads) 39 } 40 41 hdb.scanWait = true 42 go func() { 43 scanPool := make(chan modules.HostDBEntry) 44 defer close(scanPool) 45 46 // Nobody is emptying the scan list, volunteer. 47 if hdb.tg.Add() != nil { 48 // Hostdb is shutting down, don't spin up another thread. It is 49 // okay to leave scanWait set to true as that will not affect 50 // shutdown. 51 return 52 } 53 defer hdb.tg.Done() 54 55 // Due to the patterns used to spin up scanning threads, it's possible 56 // that we get to this point while all scanning threads are currently 57 // used up, completing jobs that were sent out by the previous pool 58 // managing thread. This thread is at risk of deadlocking if there's 59 // not at least one scanning thread accepting work that it created 60 // itself, so we use a starterThread exception and spin up 61 // one-thread-too-many on the first iteration to ensure that we do not 62 // deadlock. 63 starterThread := false 64 for { 65 // If the scanList is empty, this thread can spin down. 66 hdb.mu.Lock() 67 if len(hdb.scanList) == 0 { 68 // Scan list is empty, can exit. Let the world know that nobody 69 // is emptying the scan list anymore. 70 hdb.scanWait = false 71 hdb.mu.Unlock() 72 return 73 } 74 75 // Get the next host, shrink the scan list. 76 entry := hdb.scanList[0] 77 hdb.scanList = hdb.scanList[1:] 78 delete(hdb.scanMap, entry.PublicKey.String()) 79 scansRemaining := len(hdb.scanList) 80 81 // Grab the most recent entry for this host. 82 recentEntry, exists := hdb.hostTree.Select(entry.PublicKey) 83 if exists { 84 entry = recentEntry 85 } 86 87 // Try to send this entry to an existing idle worker (non-blocking). 88 select { 89 case scanPool <- entry: 90 hdb.log.Debugf("Sending host %v for scan, %v hosts remain", entry.PublicKey.String(), scansRemaining) 91 hdb.mu.Unlock() 92 continue 93 default: 94 } 95 96 // Create new worker thread. 97 if hdb.scanningThreads < maxScanningThreads || !starterThread { 98 starterThread = true 99 hdb.scanningThreads++ 100 go func() { 101 hdb.threadedProbeHosts(scanPool) 102 hdb.mu.Lock() 103 hdb.scanningThreads-- 104 hdb.mu.Unlock() 105 }() 106 } 107 hdb.mu.Unlock() 108 109 // Block while waiting for an opening in the scan pool. 110 hdb.log.Debugf("Sending host %v for scan, %v hosts remain", entry.PublicKey.String(), scansRemaining) 111 select { 112 case scanPool <- entry: 113 // iterate again 114 case <-hdb.tg.StopChan(): 115 // quit 116 return 117 } 118 } 119 }() 120 } 121 122 // updateEntry updates an entry in the hostdb after a scan has taken place. 123 // 124 // CAUTION: This function will automatically add multiple entries to a new host 125 // to give that host some base uptime. This makes this function co-dependent 126 // with the host weight functions. Adjustment of the host weight functions need 127 // to keep this function in mind, and vice-versa. 128 func (hdb *HostDB) updateEntry(entry modules.HostDBEntry, netErr error) { 129 // If the scan failed because we don't have Internet access, toss out this update. 130 if netErr != nil && !hdb.gateway.Online() { 131 return 132 } 133 134 // Grab the host from the host tree, and update it with the neew settings. 135 newEntry, exists := hdb.hostTree.Select(entry.PublicKey) 136 if exists { 137 newEntry.HostExternalSettings = entry.HostExternalSettings 138 } else { 139 newEntry = entry 140 } 141 142 // Update the recent interactions with this host. 143 if netErr == nil { 144 newEntry.RecentSuccessfulInteractions++ 145 } else { 146 newEntry.RecentFailedInteractions++ 147 } 148 149 // Add the datapoints for the scan. 150 if len(newEntry.ScanHistory) < 2 { 151 // Add two scans to the scan history. Two are needed because the scans 152 // are forward looking, but we want this first scan to represent as 153 // much as one week of uptime or downtime. 154 earliestStartTime := time.Now().Add(time.Hour * 7 * 24 * -1) // Permit up to a week of starting uptime or downtime. 155 suggestedStartTime := time.Now().Add(time.Minute * 10 * time.Duration(hdb.blockHeight-entry.FirstSeen+1) * -1) // Add one to the FirstSeen in case FirstSeen is this block, guarantees incrementing order. 156 if suggestedStartTime.Before(earliestStartTime) { 157 suggestedStartTime = earliestStartTime 158 } 159 newEntry.ScanHistory = modules.HostDBScans{ 160 {Timestamp: suggestedStartTime, Success: netErr == nil}, 161 {Timestamp: time.Now(), Success: netErr == nil}, 162 } 163 } else { 164 if newEntry.ScanHistory[len(newEntry.ScanHistory)-1].Success && netErr != nil { 165 hdb.log.Debugf("Host %v is being downgraded from an online host to an offline host: %v\n", newEntry.PublicKey.String(), netErr) 166 } 167 168 // Make sure that the current time is after the timestamp of the 169 // previous scan. It may not be if the system clock has changed. This 170 // will prevent the sort-check sanity checks from triggering. 171 newTimestamp := time.Now() 172 prevTimestamp := newEntry.ScanHistory[len(newEntry.ScanHistory)-1].Timestamp 173 if !newTimestamp.After(prevTimestamp) { 174 newTimestamp = prevTimestamp.Add(time.Second) 175 } 176 177 // Before appending, make sure that the scan we just performed is 178 // timestamped after the previous scan performed. It may not be if the 179 // system clock has changed. 180 newEntry.ScanHistory = append(newEntry.ScanHistory, modules.HostDBScan{Timestamp: newTimestamp, Success: netErr == nil}) 181 } 182 183 // Check whether any of the recent scans demonstrate uptime. The pruning and 184 // compression of the history ensure that there are only relatively recent 185 // scans represented. 186 var recentUptime bool 187 for _, scan := range newEntry.ScanHistory { 188 if scan.Success { 189 recentUptime = true 190 } 191 } 192 193 // If the host has been offline for too long, delete the host from the 194 // hostdb. Only delete if there have been enough scans over a long enough 195 // period to be confident that the host really is offline for good. 196 if time.Now().Sub(newEntry.ScanHistory[0].Timestamp) > maxHostDowntime && !recentUptime && len(newEntry.ScanHistory) >= minScans { 197 err := hdb.hostTree.Remove(newEntry.PublicKey) 198 if err != nil { 199 hdb.log.Println("ERROR: unable to remove host newEntry which has had a ton of downtime:", err) 200 } 201 202 // The function should terminate here as no more interaction is needed 203 // with this host. 204 return 205 } 206 207 // Compress any old scans into the historic values. 208 for len(newEntry.ScanHistory) > minScans && time.Now().Sub(newEntry.ScanHistory[0].Timestamp) > maxHostDowntime { 209 timePassed := newEntry.ScanHistory[1].Timestamp.Sub(newEntry.ScanHistory[0].Timestamp) 210 if newEntry.ScanHistory[0].Success { 211 newEntry.HistoricUptime += timePassed 212 } else { 213 newEntry.HistoricDowntime += timePassed 214 } 215 newEntry.ScanHistory = newEntry.ScanHistory[1:] 216 } 217 218 // Add the updated entry 219 if !exists { 220 err := hdb.hostTree.Insert(newEntry) 221 if err != nil { 222 hdb.log.Println("ERROR: unable to insert entry which is was thought to be new:", err) 223 } else { 224 hdb.log.Debugf("Adding host %v to the hostdb. Net error: %v\n", newEntry.PublicKey.String(), netErr) 225 } 226 } else { 227 err := hdb.hostTree.Modify(newEntry) 228 if err != nil { 229 hdb.log.Println("ERROR: unable to modify entry which is thought to exist:", err) 230 } else { 231 hdb.log.Debugf("Adding host %v to the hostdb. Net error: %v\n", newEntry.PublicKey.String(), netErr) 232 } 233 } 234 } 235 236 // managedScanHost will connect to a host and grab the settings, verifying 237 // uptime and updating to the host's preferences. 238 func (hdb *HostDB) managedScanHost(entry modules.HostDBEntry) { 239 // Request settings from the queued host entry. 240 netAddr := entry.NetAddress 241 pubKey := entry.PublicKey 242 hdb.log.Debugf("Scanning host %v at %v", pubKey, netAddr) 243 244 // Update historic interactions of entry if necessary 245 hdb.mu.RLock() 246 updateHostHistoricInteractions(&entry, hdb.blockHeight) 247 hdb.mu.RUnlock() 248 249 var settings modules.HostExternalSettings 250 err := func() error { 251 dialer := &net.Dialer{ 252 Cancel: hdb.tg.StopChan(), 253 Timeout: hostRequestTimeout, 254 } 255 conn, err := dialer.Dial("tcp", string(netAddr)) 256 if err != nil { 257 return err 258 } 259 connCloseChan := make(chan struct{}) 260 go func() { 261 select { 262 case <-hdb.tg.StopChan(): 263 case <-connCloseChan: 264 } 265 conn.Close() 266 }() 267 defer close(connCloseChan) 268 conn.SetDeadline(time.Now().Add(hostScanDeadline)) 269 270 err = encoding.WriteObject(conn, modules.RPCSettings) 271 if err != nil { 272 return err 273 } 274 var pubkey crypto.PublicKey 275 copy(pubkey[:], pubKey.Key) 276 return crypto.ReadSignedObject(conn, &settings, maxSettingsLen, pubkey) 277 }() 278 if err != nil { 279 hdb.log.Debugf("Scan of host at %v failed: %v", netAddr, err) 280 281 } else { 282 hdb.log.Debugf("Scan of host at %v succeeded.", netAddr) 283 entry.HostExternalSettings = settings 284 } 285 286 // Update the host tree to have a new entry, including the new error. Then 287 // delete the entry from the scan map as the scan has been successful. 288 hdb.mu.Lock() 289 hdb.updateEntry(entry, err) 290 hdb.mu.Unlock() 291 } 292 293 // threadedProbeHosts pulls hosts from the thread pool and runs a scan on them. 294 func (hdb *HostDB) threadedProbeHosts(scanPool <-chan modules.HostDBEntry) { 295 err := hdb.tg.Add() 296 if err != nil { 297 return 298 } 299 defer hdb.tg.Done() 300 for hostEntry := range scanPool { 301 // Block until hostdb has internet connectivity. 302 for { 303 hdb.mu.RLock() 304 online := hdb.gateway.Online() 305 hdb.mu.RUnlock() 306 if online { 307 break 308 } 309 select { 310 case <-time.After(time.Second * 30): 311 continue 312 case <-hdb.tg.StopChan(): 313 return 314 } 315 } 316 317 // There appears to be internet connectivity, continue with the 318 // scan. 319 hdb.managedScanHost(hostEntry) 320 } 321 } 322 323 // threadedScan is an ongoing function which will query the full set of hosts 324 // every few hours to see who is online and available for uploading. 325 func (hdb *HostDB) threadedScan() { 326 err := hdb.tg.Add() 327 if err != nil { 328 return 329 } 330 defer hdb.tg.Done() 331 332 for { 333 // Set up a scan for the hostCheckupQuanity most valuable hosts in the 334 // hostdb. Hosts that fail their scans will be docked significantly, 335 // pushing them further back in the hierarchy, ensuring that for the 336 // most part only online hosts are getting scanned unless there are 337 // fewer than hostCheckupQuantity of them. 338 339 // Grab a set of hosts to scan, grab hosts that are active, inactive, 340 // and offline to get high diversity. 341 var onlineHosts, offlineHosts []modules.HostDBEntry 342 allHosts := hdb.hostTree.All() 343 for i := len(allHosts) - 1; i >= 0; i-- { 344 if len(onlineHosts) >= hostCheckupQuantity && len(offlineHosts) >= hostCheckupQuantity { 345 break 346 } 347 348 // Figure out if the host is online or offline. 349 host := allHosts[i] 350 online := len(host.ScanHistory) > 0 && host.ScanHistory[len(host.ScanHistory)-1].Success 351 if online && len(onlineHosts) < hostCheckupQuantity { 352 onlineHosts = append(onlineHosts, host) 353 } else if !online && len(offlineHosts) < hostCheckupQuantity { 354 offlineHosts = append(offlineHosts, host) 355 } 356 } 357 358 // Queue the scans for each host. 359 hdb.log.Println("Performing scan on", len(onlineHosts), "online hosts and", len(offlineHosts), "offline hosts.") 360 hdb.mu.Lock() 361 for _, host := range onlineHosts { 362 hdb.queueScan(host) 363 } 364 for _, host := range offlineHosts { 365 hdb.queueScan(host) 366 } 367 hdb.mu.Unlock() 368 369 // Sleep for a random amount of time before doing another round of 370 // scanning. The minimums and maximums keep the scan time reasonable, 371 // while the randomness prevents the scanning from always happening at 372 // the same time of day or week. 373 sleepRange := uint64(maxScanSleep - minScanSleep) 374 sleepTime := minScanSleep + time.Duration(fastrand.Uint64n(sleepRange)) 375 376 // Sleep until it's time for the next scan cycle. 377 select { 378 case <-hdb.tg.StopChan(): 379 return 380 case <-time.After(sleepTime): 381 } 382 } 383 }