github.com/fozzysec/SiaPrime@v0.0.0-20190612043147-66c8e8d11fe3/modules/renter/hostdb/scan.go (about) 1 package hostdb 2 3 // scan.go contains the functions which periodically scan the list of all hosts 4 // to see which hosts are online or offline, and to get any updates to the 5 // settings of the hosts. 6 7 import ( 8 "fmt" 9 "net" 10 "sort" 11 "time" 12 13 "SiaPrime/build" 14 "SiaPrime/crypto" 15 "SiaPrime/encoding" 16 "SiaPrime/modules" 17 "SiaPrime/modules/renter/hostdb/hosttree" 18 "gitlab.com/NebulousLabs/fastrand" 19 ) 20 21 // equalIPNets checks if two slices of IP subnets contain the same subnets. 22 func equalIPNets(ipNetsA, ipNetsB []string) bool { 23 // Check the length first. 24 if len(ipNetsA) != len(ipNetsB) { 25 return false 26 } 27 // Create a map of all the subnets in ipNetsA. 28 mapNetsA := make(map[string]struct{}) 29 for _, subnet := range ipNetsA { 30 mapNetsA[subnet] = struct{}{} 31 } 32 // Make sure that all the subnets from ipNetsB are in the map. 33 for _, subnet := range ipNetsB { 34 if _, exists := mapNetsA[subnet]; !exists { 35 return false 36 } 37 } 38 return true 39 } 40 41 // queueScan will add a host to the queue to be scanned. The host will be added 42 // at a random position which means that the order in which queueScan is called 43 // is not necessarily the order in which the hosts get scanned. That guarantees 44 // a random scan order during the initial scan. 45 func (hdb *HostDB) queueScan(entry modules.HostDBEntry) { 46 // If this entry is already in the scan pool, can return immediately. 47 _, exists := hdb.scanMap[entry.PublicKey.String()] 48 if exists { 49 return 50 } 51 // Add the entry to a random position in the waitlist. 52 hdb.scanMap[entry.PublicKey.String()] = struct{}{} 53 hdb.scanList = append(hdb.scanList, entry) 54 if len(hdb.scanList) > 1 { 55 i := len(hdb.scanList) - 1 56 j := fastrand.Intn(i) 57 hdb.scanList[i], hdb.scanList[j] = hdb.scanList[j], hdb.scanList[i] 58 } 59 // Check if any thread is currently emptying the waitlist. If not, spawn a 60 // thread to empty the waitlist. 61 if hdb.scanWait { 62 // Another thread is emptying the scan list, nothing to worry about. 63 return 64 } 65 66 // Sanity check - the scan map and the scan list should have the same 67 // length. 68 if build.DEBUG && len(hdb.scanMap) > len(hdb.scanList)+maxScanningThreads { 69 hdb.log.Critical("The hostdb scan map has seemingly grown too large:", len(hdb.scanMap), len(hdb.scanList), maxScanningThreads) 70 } 71 72 hdb.scanWait = true 73 go func() { 74 scanPool := make(chan modules.HostDBEntry) 75 defer close(scanPool) 76 77 // Nobody is emptying the scan list, volunteer. 78 if hdb.tg.Add() != nil { 79 // Hostdb is shutting down, don't spin up another thread. It is 80 // okay to leave scanWait set to true as that will not affect 81 // shutdown. 82 return 83 } 84 defer hdb.tg.Done() 85 86 // Block scan when a specific dependency is provided. 87 hdb.deps.Disrupt("BlockScan") 88 89 // Due to the patterns used to spin up scanning threads, it's possible 90 // that we get to this point while all scanning threads are currently 91 // used up, completing jobs that were sent out by the previous pool 92 // managing thread. This thread is at risk of deadlocking if there's 93 // not at least one scanning thread accepting work that it created 94 // itself, so we use a starterThread exception and spin up 95 // one-thread-too-many on the first iteration to ensure that we do not 96 // deadlock. 97 starterThread := false 98 for { 99 // If the scanList is empty, this thread can spin down. 100 hdb.mu.Lock() 101 if len(hdb.scanList) == 0 { 102 // Scan list is empty, can exit. Let the world know that nobody 103 // is emptying the scan list anymore. 104 hdb.scanWait = false 105 hdb.mu.Unlock() 106 return 107 } 108 109 // Get the next host, shrink the scan list. 110 entry := hdb.scanList[0] 111 hdb.scanList = hdb.scanList[1:] 112 delete(hdb.scanMap, entry.PublicKey.String()) 113 scansRemaining := len(hdb.scanList) 114 115 // Grab the most recent entry for this host. 116 recentEntry, exists := hdb.hostTree.Select(entry.PublicKey) 117 if exists { 118 entry = recentEntry 119 } 120 121 // Try to send this entry to an existing idle worker (non-blocking). 122 select { 123 case scanPool <- entry: 124 hdb.log.Debugf("Sending host %v for scan, %v hosts remain", entry.PublicKey.String(), scansRemaining) 125 hdb.mu.Unlock() 126 continue 127 default: 128 } 129 130 // Create new worker thread. 131 if hdb.scanningThreads < maxScanningThreads || !starterThread { 132 starterThread = true 133 hdb.scanningThreads++ 134 if err := hdb.tg.Add(); err != nil { 135 hdb.mu.Unlock() 136 return 137 } 138 go func() { 139 defer hdb.tg.Done() 140 hdb.threadedProbeHosts(scanPool) 141 hdb.mu.Lock() 142 hdb.scanningThreads-- 143 hdb.mu.Unlock() 144 }() 145 } 146 hdb.mu.Unlock() 147 148 // Block while waiting for an opening in the scan pool. 149 hdb.log.Debugf("Sending host %v for scan, %v hosts remain", entry.PublicKey.String(), scansRemaining) 150 select { 151 case scanPool <- entry: 152 // iterate again 153 case <-hdb.tg.StopChan(): 154 // quit 155 return 156 } 157 } 158 }() 159 } 160 161 // updateEntry updates an entry in the hostdb after a scan has taken place. 162 // 163 // CAUTION: This function will automatically add multiple entries to a new host 164 // to give that host some base uptime. This makes this function co-dependent 165 // with the host weight functions. Adjustment of the host weight functions need 166 // to keep this function in mind, and vice-versa. 167 func (hdb *HostDB) updateEntry(entry modules.HostDBEntry, netErr error) { 168 // If the scan failed because we don't have Internet access, toss out this update. 169 if netErr != nil && !hdb.gateway.Online() { 170 return 171 } 172 173 // Grab the host from the host tree, and update it with the neew settings. 174 newEntry, exists := hdb.hostTree.Select(entry.PublicKey) 175 if exists { 176 newEntry.HostExternalSettings = entry.HostExternalSettings 177 newEntry.IPNets = entry.IPNets 178 newEntry.LastIPNetChange = entry.LastIPNetChange 179 } else { 180 newEntry = entry 181 } 182 183 // Update the recent interactions with this host. 184 if netErr == nil { 185 newEntry.RecentSuccessfulInteractions++ 186 } else { 187 newEntry.RecentFailedInteractions++ 188 } 189 190 // Add the datapoints for the scan. 191 if len(newEntry.ScanHistory) < 2 { 192 // Add two scans to the scan history. Two are needed because the scans 193 // are forward looking, but we want this first scan to represent as 194 // much as one week of uptime or downtime. 195 earliestStartTime := time.Now().Add(time.Hour * 7 * 24 * -1) // Permit up to a week of starting uptime or downtime. 196 suggestedStartTime := time.Now().Add(time.Minute * 10 * time.Duration(hdb.blockHeight-entry.FirstSeen+1) * -1) // Add one to the FirstSeen in case FirstSeen is this block, guarantees incrementing order. 197 if suggestedStartTime.Before(earliestStartTime) { 198 suggestedStartTime = earliestStartTime 199 } 200 newEntry.ScanHistory = modules.HostDBScans{ 201 {Timestamp: suggestedStartTime, Success: netErr == nil}, 202 {Timestamp: time.Now(), Success: netErr == nil}, 203 } 204 } else { 205 if newEntry.ScanHistory[len(newEntry.ScanHistory)-1].Success && netErr != nil { 206 hdb.log.Debugf("Host %v is being downgraded from an online host to an offline host: %v\n", newEntry.PublicKey.String(), netErr) 207 } 208 209 // Make sure that the current time is after the timestamp of the 210 // previous scan. It may not be if the system clock has changed. This 211 // will prevent the sort-check sanity checks from triggering. 212 newTimestamp := time.Now() 213 prevTimestamp := newEntry.ScanHistory[len(newEntry.ScanHistory)-1].Timestamp 214 if !newTimestamp.After(prevTimestamp) { 215 newTimestamp = prevTimestamp.Add(time.Second) 216 } 217 218 // Before appending, make sure that the scan we just performed is 219 // timestamped after the previous scan performed. It may not be if the 220 // system clock has changed. 221 newEntry.ScanHistory = append(newEntry.ScanHistory, modules.HostDBScan{Timestamp: newTimestamp, Success: netErr == nil}) 222 } 223 224 // Check whether any of the recent scans demonstrate uptime. The pruning and 225 // compression of the history ensure that there are only relatively recent 226 // scans represented. 227 var recentUptime bool 228 for _, scan := range newEntry.ScanHistory { 229 if scan.Success { 230 recentUptime = true 231 } 232 } 233 234 // If the host has been offline for too long, delete the host from the 235 // hostdb. Only delete if there have been enough scans over a long enough 236 // period to be confident that the host really is offline for good. 237 if time.Now().Sub(newEntry.ScanHistory[0].Timestamp) > maxHostDowntime && !recentUptime && len(newEntry.ScanHistory) >= minScans { 238 err := hdb.hostTree.Remove(newEntry.PublicKey) 239 if err != nil { 240 hdb.log.Println("ERROR: unable to remove host newEntry which has had a ton of downtime:", err) 241 } 242 243 // The function should terminate here as no more interaction is needed 244 // with this host. 245 return 246 } 247 248 // Compress any old scans into the historic values. 249 for len(newEntry.ScanHistory) > minScans && time.Now().Sub(newEntry.ScanHistory[0].Timestamp) > maxHostDowntime { 250 timePassed := newEntry.ScanHistory[1].Timestamp.Sub(newEntry.ScanHistory[0].Timestamp) 251 if newEntry.ScanHistory[0].Success { 252 newEntry.HistoricUptime += timePassed 253 } else { 254 newEntry.HistoricDowntime += timePassed 255 } 256 newEntry.ScanHistory = newEntry.ScanHistory[1:] 257 } 258 259 // Add the updated entry 260 if !exists { 261 err := hdb.hostTree.Insert(newEntry) 262 if err != nil { 263 hdb.log.Println("ERROR: unable to insert entry which is was thought to be new:", err) 264 } else { 265 hdb.log.Debugf("Adding host %v to the hostdb. Net error: %v\n", newEntry.PublicKey.String(), netErr) 266 } 267 } else { 268 err := hdb.hostTree.Modify(newEntry) 269 if err != nil { 270 hdb.log.Println("ERROR: unable to modify entry which is thought to exist:", err) 271 } else { 272 hdb.log.Debugf("Adding host %v to the hostdb. Net error: %v\n", newEntry.PublicKey.String(), netErr) 273 } 274 } 275 } 276 277 // managedLookupIPNets returns string representations of the CIDR subnets 278 // used by the host. In case of an error we return nil. We don't really care 279 // about the error because we don't update host entries if we are offline 280 // anyway. So if we fail to resolve a hostname, the problem is not related to 281 // us. 282 func (hdb *HostDB) managedLookupIPNets(address modules.NetAddress) (ipNets []string, err error) { 283 // Lookup the IP addresses of the host. 284 addresses, err := hdb.deps.Resolver().LookupIP(address.Host()) 285 if err != nil { 286 return nil, err 287 } 288 // Get the subnets of the addresses. 289 for _, ip := range addresses { 290 // Set the filterRange according to the type of IP address. 291 var filterRange int 292 if ip.To4() != nil { 293 filterRange = hosttree.IPv4FilterRange 294 } else { 295 filterRange = hosttree.IPv6FilterRange 296 } 297 298 // Get the subnet. 299 _, ipnet, err := net.ParseCIDR(fmt.Sprintf("%s/%d", ip.String(), filterRange)) 300 if err != nil { 301 return nil, err 302 } 303 // Add the subnet to the host. 304 ipNets = append(ipNets, ipnet.String()) 305 } 306 return 307 } 308 309 // managedScanHost will connect to a host and grab the settings, verifying 310 // uptime and updating to the host's preferences. 311 func (hdb *HostDB) managedScanHost(entry modules.HostDBEntry) { 312 // Request settings from the queued host entry. 313 netAddr := entry.NetAddress 314 pubKey := entry.PublicKey 315 hdb.log.Debugf("Scanning host %v at %v", pubKey, netAddr) 316 317 // If we use a custom resolver for testing, we replace the custom domain 318 // with 127.0.0.1. Otherwise the scan will fail. 319 if hdb.deps.Disrupt("customResolver") { 320 port := netAddr.Port() 321 netAddr = modules.NetAddress(fmt.Sprintf("127.0.0.1:%s", port)) 322 } 323 324 // Resolve the host's used subnets and update the timestamp if they 325 // changed. We only update the timestamp if resolving the ipNets was 326 // successful. 327 ipNets, err := hdb.managedLookupIPNets(entry.NetAddress) 328 if err == nil && !equalIPNets(ipNets, entry.IPNets) { 329 entry.IPNets = ipNets 330 entry.LastIPNetChange = time.Now() 331 } 332 if err != nil { 333 hdb.log.Debugln("mangedScanHost: failed to look up IP nets", err) 334 } 335 336 // Update historic interactions of entry if necessary 337 hdb.mu.RLock() 338 updateHostHistoricInteractions(&entry, hdb.blockHeight) 339 hdb.mu.RUnlock() 340 341 var settings modules.HostExternalSettings 342 var latency time.Duration 343 err = func() error { 344 timeout := hostRequestTimeout 345 hdb.mu.RLock() 346 if len(hdb.initialScanLatencies) > minScansForSpeedup { 347 build.Critical("initialScanLatencies should never be greater than minScansForSpeedup") 348 } 349 if !hdb.initialScanComplete && len(hdb.initialScanLatencies) == minScansForSpeedup { 350 // During an initial scan, when we have at least minScansForSpeedup 351 // active scans in initialScanLatencies, we use 352 // 5*median(initialScanLatencies) as the new hostRequestTimeout to 353 // speedup the scanning process. 354 timeout = hdb.initialScanLatencies[len(hdb.initialScanLatencies)/2] 355 timeout *= scanSpeedupMedianMultiplier 356 if hostRequestTimeout < timeout { 357 timeout = hostRequestTimeout 358 } 359 } 360 hdb.mu.RUnlock() 361 362 dialer := &net.Dialer{ 363 Cancel: hdb.tg.StopChan(), 364 Timeout: timeout, 365 } 366 start := time.Now() 367 conn, err := dialer.Dial("tcp", string(netAddr)) 368 latency = time.Since(start) 369 if err != nil { 370 return err 371 } 372 connCloseChan := make(chan struct{}) 373 go func() { 374 select { 375 case <-hdb.tg.StopChan(): 376 case <-connCloseChan: 377 } 378 conn.Close() 379 }() 380 defer close(connCloseChan) 381 conn.SetDeadline(time.Now().Add(hostScanDeadline)) 382 383 err = encoding.WriteObject(conn, modules.RPCSettings) 384 if err != nil { 385 return err 386 } 387 var pubkey crypto.PublicKey 388 copy(pubkey[:], pubKey.Key) 389 return crypto.ReadSignedObject(conn, &settings, maxSettingsLen, pubkey) 390 }() 391 if err != nil { 392 hdb.log.Debugf("Scan of host at %v failed: %v", netAddr, err) 393 394 } else { 395 hdb.log.Debugf("Scan of host at %v succeeded.", netAddr) 396 entry.HostExternalSettings = settings 397 } 398 success := err == nil 399 400 hdb.mu.Lock() 401 defer hdb.mu.Unlock() 402 // We don't want to override the NetAddress during a scan so we need to 403 // retrieve the most recent NetAddress from the tree first. 404 oldEntry, exists := hdb.hostTree.Select(entry.PublicKey) 405 if exists { 406 entry.NetAddress = oldEntry.NetAddress 407 } 408 // Update the host tree to have a new entry, including the new error. Then 409 // delete the entry from the scan map as the scan has been successful. 410 hdb.updateEntry(entry, err) 411 412 // Add the scan to the initialScanLatencies if it was successful. 413 if success && len(hdb.initialScanLatencies) < minScansForSpeedup { 414 hdb.initialScanLatencies = append(hdb.initialScanLatencies, latency) 415 // If the slice has reached its maximum size we sort it. 416 if len(hdb.initialScanLatencies) == minScansForSpeedup { 417 sort.Slice(hdb.initialScanLatencies, func(i, j int) bool { 418 return hdb.initialScanLatencies[i] < hdb.initialScanLatencies[j] 419 }) 420 } 421 } 422 } 423 424 // waitForScans is a helper function that blocks until the hostDB's scanList is 425 // empty. 426 func (hdb *HostDB) managedWaitForScans() { 427 for { 428 hdb.mu.Lock() 429 length := len(hdb.scanList) 430 hdb.mu.Unlock() 431 if length == 0 { 432 break 433 } 434 select { 435 case <-hdb.tg.StopChan(): 436 case <-time.After(scanCheckInterval): 437 } 438 } 439 } 440 441 // threadedProbeHosts pulls hosts from the thread pool and runs a scan on them. 442 func (hdb *HostDB) threadedProbeHosts(scanPool <-chan modules.HostDBEntry) { 443 for hostEntry := range scanPool { 444 // Block until hostdb has internet connectivity. 445 for { 446 hdb.mu.RLock() 447 online := hdb.gateway.Online() 448 hdb.mu.RUnlock() 449 if online { 450 break 451 } 452 select { 453 case <-time.After(time.Second * 30): 454 continue 455 case <-hdb.tg.StopChan(): 456 return 457 } 458 } 459 460 // There appears to be internet connectivity, continue with the 461 // scan. 462 hdb.managedScanHost(hostEntry) 463 } 464 } 465 466 // threadedScan is an ongoing function which will query the full set of hosts 467 // every few hours to see who is online and available for uploading. 468 func (hdb *HostDB) threadedScan() { 469 err := hdb.tg.Add() 470 if err != nil { 471 return 472 } 473 defer hdb.tg.Done() 474 475 // Wait until the consensus set is synced. Only then we can be sure that 476 // the initial scan covers the whole network. 477 for { 478 if hdb.cs.Synced() { 479 break 480 } 481 select { 482 case <-hdb.tg.StopChan(): 483 return 484 case <-time.After(scanCheckInterval): 485 } 486 } 487 488 // Block scan when a specific dependency is provided. 489 hdb.deps.Disrupt("BlockScan") 490 491 // The initial scan might have been interrupted. Queue one scan for every 492 // announced host that was missed by the initial scan and wait for the 493 // scans to finish before starting the scan loop. 494 allHosts := hdb.hostTree.All() 495 hdb.mu.Lock() 496 for _, host := range allHosts { 497 if len(host.ScanHistory) == 0 && host.HistoricUptime == 0 && host.HistoricDowntime == 0 { 498 hdb.queueScan(host) 499 } 500 } 501 hdb.mu.Unlock() 502 hdb.managedWaitForScans() 503 504 // Set the flag to indicate that the initial scan is complete. 505 hdb.mu.Lock() 506 hdb.initialScanComplete = true 507 hdb.mu.Unlock() 508 509 for { 510 // Set up a scan for the hostCheckupQuanity most valuable hosts in the 511 // hostdb. Hosts that fail their scans will be docked significantly, 512 // pushing them further back in the hierarchy, ensuring that for the 513 // most part only online hosts are getting scanned unless there are 514 // fewer than hostCheckupQuantity of them. 515 516 // Grab a set of hosts to scan, grab hosts that are active, inactive, 517 // and offline to get high diversity. 518 var onlineHosts, offlineHosts []modules.HostDBEntry 519 allHosts := hdb.hostTree.All() 520 for i := len(allHosts) - 1; i >= 0; i-- { 521 if len(onlineHosts) >= hostCheckupQuantity && len(offlineHosts) >= hostCheckupQuantity { 522 break 523 } 524 525 // Figure out if the host is online or offline. 526 host := allHosts[i] 527 online := len(host.ScanHistory) > 0 && host.ScanHistory[len(host.ScanHistory)-1].Success 528 if online && len(onlineHosts) < hostCheckupQuantity { 529 onlineHosts = append(onlineHosts, host) 530 } else if !online && len(offlineHosts) < hostCheckupQuantity { 531 offlineHosts = append(offlineHosts, host) 532 } 533 } 534 535 // Queue the scans for each host. 536 hdb.log.Println("Performing scan on", len(onlineHosts), "online hosts and", len(offlineHosts), "offline hosts.") 537 hdb.mu.Lock() 538 for _, host := range onlineHosts { 539 hdb.queueScan(host) 540 } 541 for _, host := range offlineHosts { 542 hdb.queueScan(host) 543 } 544 hdb.mu.Unlock() 545 546 // Sleep for a random amount of time before doing another round of 547 // scanning. The minimums and maximums keep the scan time reasonable, 548 // while the randomness prevents the scanning from always happening at 549 // the same time of day or week. 550 sleepRange := uint64(maxScanSleep - minScanSleep) 551 sleepTime := minScanSleep + time.Duration(fastrand.Uint64n(sleepRange)) 552 553 // Sleep until it's time for the next scan cycle. 554 select { 555 case <-hdb.tg.StopChan(): 556 return 557 case <-time.After(sleepTime): 558 } 559 } 560 }