github.com/fozzysec/SiaPrime@v0.0.0-20190612043147-66c8e8d11fe3/modules/renter/hostdb/scan.go (about)

     1  package hostdb
     2  
     3  // scan.go contains the functions which periodically scan the list of all hosts
     4  // to see which hosts are online or offline, and to get any updates to the
     5  // settings of the hosts.
     6  
     7  import (
     8  	"fmt"
     9  	"net"
    10  	"sort"
    11  	"time"
    12  
    13  	"SiaPrime/build"
    14  	"SiaPrime/crypto"
    15  	"SiaPrime/encoding"
    16  	"SiaPrime/modules"
    17  	"SiaPrime/modules/renter/hostdb/hosttree"
    18  	"gitlab.com/NebulousLabs/fastrand"
    19  )
    20  
    21  // equalIPNets checks if two slices of IP subnets contain the same subnets.
    22  func equalIPNets(ipNetsA, ipNetsB []string) bool {
    23  	// Check the length first.
    24  	if len(ipNetsA) != len(ipNetsB) {
    25  		return false
    26  	}
    27  	// Create a map of all the subnets in ipNetsA.
    28  	mapNetsA := make(map[string]struct{})
    29  	for _, subnet := range ipNetsA {
    30  		mapNetsA[subnet] = struct{}{}
    31  	}
    32  	// Make sure that all the subnets from ipNetsB are in the map.
    33  	for _, subnet := range ipNetsB {
    34  		if _, exists := mapNetsA[subnet]; !exists {
    35  			return false
    36  		}
    37  	}
    38  	return true
    39  }
    40  
    41  // queueScan will add a host to the queue to be scanned. The host will be added
    42  // at a random position which means that the order in which queueScan is called
    43  // is not necessarily the order in which the hosts get scanned. That guarantees
    44  // a random scan order during the initial scan.
    45  func (hdb *HostDB) queueScan(entry modules.HostDBEntry) {
    46  	// If this entry is already in the scan pool, can return immediately.
    47  	_, exists := hdb.scanMap[entry.PublicKey.String()]
    48  	if exists {
    49  		return
    50  	}
    51  	// Add the entry to a random position in the waitlist.
    52  	hdb.scanMap[entry.PublicKey.String()] = struct{}{}
    53  	hdb.scanList = append(hdb.scanList, entry)
    54  	if len(hdb.scanList) > 1 {
    55  		i := len(hdb.scanList) - 1
    56  		j := fastrand.Intn(i)
    57  		hdb.scanList[i], hdb.scanList[j] = hdb.scanList[j], hdb.scanList[i]
    58  	}
    59  	// Check if any thread is currently emptying the waitlist. If not, spawn a
    60  	// thread to empty the waitlist.
    61  	if hdb.scanWait {
    62  		// Another thread is emptying the scan list, nothing to worry about.
    63  		return
    64  	}
    65  
    66  	// Sanity check - the scan map and the scan list should have the same
    67  	// length.
    68  	if build.DEBUG && len(hdb.scanMap) > len(hdb.scanList)+maxScanningThreads {
    69  		hdb.log.Critical("The hostdb scan map has seemingly grown too large:", len(hdb.scanMap), len(hdb.scanList), maxScanningThreads)
    70  	}
    71  
    72  	hdb.scanWait = true
    73  	go func() {
    74  		scanPool := make(chan modules.HostDBEntry)
    75  		defer close(scanPool)
    76  
    77  		// Nobody is emptying the scan list, volunteer.
    78  		if hdb.tg.Add() != nil {
    79  			// Hostdb is shutting down, don't spin up another thread.  It is
    80  			// okay to leave scanWait set to true as that will not affect
    81  			// shutdown.
    82  			return
    83  		}
    84  		defer hdb.tg.Done()
    85  
    86  		// Block scan when a specific dependency is provided.
    87  		hdb.deps.Disrupt("BlockScan")
    88  
    89  		// Due to the patterns used to spin up scanning threads, it's possible
    90  		// that we get to this point while all scanning threads are currently
    91  		// used up, completing jobs that were sent out by the previous pool
    92  		// managing thread. This thread is at risk of deadlocking if there's
    93  		// not at least one scanning thread accepting work that it created
    94  		// itself, so we use a starterThread exception and spin up
    95  		// one-thread-too-many on the first iteration to ensure that we do not
    96  		// deadlock.
    97  		starterThread := false
    98  		for {
    99  			// If the scanList is empty, this thread can spin down.
   100  			hdb.mu.Lock()
   101  			if len(hdb.scanList) == 0 {
   102  				// Scan list is empty, can exit. Let the world know that nobody
   103  				// is emptying the scan list anymore.
   104  				hdb.scanWait = false
   105  				hdb.mu.Unlock()
   106  				return
   107  			}
   108  
   109  			// Get the next host, shrink the scan list.
   110  			entry := hdb.scanList[0]
   111  			hdb.scanList = hdb.scanList[1:]
   112  			delete(hdb.scanMap, entry.PublicKey.String())
   113  			scansRemaining := len(hdb.scanList)
   114  
   115  			// Grab the most recent entry for this host.
   116  			recentEntry, exists := hdb.hostTree.Select(entry.PublicKey)
   117  			if exists {
   118  				entry = recentEntry
   119  			}
   120  
   121  			// Try to send this entry to an existing idle worker (non-blocking).
   122  			select {
   123  			case scanPool <- entry:
   124  				hdb.log.Debugf("Sending host %v for scan, %v hosts remain", entry.PublicKey.String(), scansRemaining)
   125  				hdb.mu.Unlock()
   126  				continue
   127  			default:
   128  			}
   129  
   130  			// Create new worker thread.
   131  			if hdb.scanningThreads < maxScanningThreads || !starterThread {
   132  				starterThread = true
   133  				hdb.scanningThreads++
   134  				if err := hdb.tg.Add(); err != nil {
   135  					hdb.mu.Unlock()
   136  					return
   137  				}
   138  				go func() {
   139  					defer hdb.tg.Done()
   140  					hdb.threadedProbeHosts(scanPool)
   141  					hdb.mu.Lock()
   142  					hdb.scanningThreads--
   143  					hdb.mu.Unlock()
   144  				}()
   145  			}
   146  			hdb.mu.Unlock()
   147  
   148  			// Block while waiting for an opening in the scan pool.
   149  			hdb.log.Debugf("Sending host %v for scan, %v hosts remain", entry.PublicKey.String(), scansRemaining)
   150  			select {
   151  			case scanPool <- entry:
   152  				// iterate again
   153  			case <-hdb.tg.StopChan():
   154  				// quit
   155  				return
   156  			}
   157  		}
   158  	}()
   159  }
   160  
   161  // updateEntry updates an entry in the hostdb after a scan has taken place.
   162  //
   163  // CAUTION: This function will automatically add multiple entries to a new host
   164  // to give that host some base uptime. This makes this function co-dependent
   165  // with the host weight functions. Adjustment of the host weight functions need
   166  // to keep this function in mind, and vice-versa.
   167  func (hdb *HostDB) updateEntry(entry modules.HostDBEntry, netErr error) {
   168  	// If the scan failed because we don't have Internet access, toss out this update.
   169  	if netErr != nil && !hdb.gateway.Online() {
   170  		return
   171  	}
   172  
   173  	// Grab the host from the host tree, and update it with the neew settings.
   174  	newEntry, exists := hdb.hostTree.Select(entry.PublicKey)
   175  	if exists {
   176  		newEntry.HostExternalSettings = entry.HostExternalSettings
   177  		newEntry.IPNets = entry.IPNets
   178  		newEntry.LastIPNetChange = entry.LastIPNetChange
   179  	} else {
   180  		newEntry = entry
   181  	}
   182  
   183  	// Update the recent interactions with this host.
   184  	if netErr == nil {
   185  		newEntry.RecentSuccessfulInteractions++
   186  	} else {
   187  		newEntry.RecentFailedInteractions++
   188  	}
   189  
   190  	// Add the datapoints for the scan.
   191  	if len(newEntry.ScanHistory) < 2 {
   192  		// Add two scans to the scan history. Two are needed because the scans
   193  		// are forward looking, but we want this first scan to represent as
   194  		// much as one week of uptime or downtime.
   195  		earliestStartTime := time.Now().Add(time.Hour * 7 * 24 * -1)                                                   // Permit up to a week of starting uptime or downtime.
   196  		suggestedStartTime := time.Now().Add(time.Minute * 10 * time.Duration(hdb.blockHeight-entry.FirstSeen+1) * -1) // Add one to the FirstSeen in case FirstSeen is this block, guarantees incrementing order.
   197  		if suggestedStartTime.Before(earliestStartTime) {
   198  			suggestedStartTime = earliestStartTime
   199  		}
   200  		newEntry.ScanHistory = modules.HostDBScans{
   201  			{Timestamp: suggestedStartTime, Success: netErr == nil},
   202  			{Timestamp: time.Now(), Success: netErr == nil},
   203  		}
   204  	} else {
   205  		if newEntry.ScanHistory[len(newEntry.ScanHistory)-1].Success && netErr != nil {
   206  			hdb.log.Debugf("Host %v is being downgraded from an online host to an offline host: %v\n", newEntry.PublicKey.String(), netErr)
   207  		}
   208  
   209  		// Make sure that the current time is after the timestamp of the
   210  		// previous scan. It may not be if the system clock has changed. This
   211  		// will prevent the sort-check sanity checks from triggering.
   212  		newTimestamp := time.Now()
   213  		prevTimestamp := newEntry.ScanHistory[len(newEntry.ScanHistory)-1].Timestamp
   214  		if !newTimestamp.After(prevTimestamp) {
   215  			newTimestamp = prevTimestamp.Add(time.Second)
   216  		}
   217  
   218  		// Before appending, make sure that the scan we just performed is
   219  		// timestamped after the previous scan performed. It may not be if the
   220  		// system clock has changed.
   221  		newEntry.ScanHistory = append(newEntry.ScanHistory, modules.HostDBScan{Timestamp: newTimestamp, Success: netErr == nil})
   222  	}
   223  
   224  	// Check whether any of the recent scans demonstrate uptime. The pruning and
   225  	// compression of the history ensure that there are only relatively recent
   226  	// scans represented.
   227  	var recentUptime bool
   228  	for _, scan := range newEntry.ScanHistory {
   229  		if scan.Success {
   230  			recentUptime = true
   231  		}
   232  	}
   233  
   234  	// If the host has been offline for too long, delete the host from the
   235  	// hostdb. Only delete if there have been enough scans over a long enough
   236  	// period to be confident that the host really is offline for good.
   237  	if time.Now().Sub(newEntry.ScanHistory[0].Timestamp) > maxHostDowntime && !recentUptime && len(newEntry.ScanHistory) >= minScans {
   238  		err := hdb.hostTree.Remove(newEntry.PublicKey)
   239  		if err != nil {
   240  			hdb.log.Println("ERROR: unable to remove host newEntry which has had a ton of downtime:", err)
   241  		}
   242  
   243  		// The function should terminate here as no more interaction is needed
   244  		// with this host.
   245  		return
   246  	}
   247  
   248  	// Compress any old scans into the historic values.
   249  	for len(newEntry.ScanHistory) > minScans && time.Now().Sub(newEntry.ScanHistory[0].Timestamp) > maxHostDowntime {
   250  		timePassed := newEntry.ScanHistory[1].Timestamp.Sub(newEntry.ScanHistory[0].Timestamp)
   251  		if newEntry.ScanHistory[0].Success {
   252  			newEntry.HistoricUptime += timePassed
   253  		} else {
   254  			newEntry.HistoricDowntime += timePassed
   255  		}
   256  		newEntry.ScanHistory = newEntry.ScanHistory[1:]
   257  	}
   258  
   259  	// Add the updated entry
   260  	if !exists {
   261  		err := hdb.hostTree.Insert(newEntry)
   262  		if err != nil {
   263  			hdb.log.Println("ERROR: unable to insert entry which is was thought to be new:", err)
   264  		} else {
   265  			hdb.log.Debugf("Adding host %v to the hostdb. Net error: %v\n", newEntry.PublicKey.String(), netErr)
   266  		}
   267  	} else {
   268  		err := hdb.hostTree.Modify(newEntry)
   269  		if err != nil {
   270  			hdb.log.Println("ERROR: unable to modify entry which is thought to exist:", err)
   271  		} else {
   272  			hdb.log.Debugf("Adding host %v to the hostdb. Net error: %v\n", newEntry.PublicKey.String(), netErr)
   273  		}
   274  	}
   275  }
   276  
   277  // managedLookupIPNets returns string representations of the CIDR subnets
   278  // used by the host.  In case of an error we return nil. We don't really care
   279  // about the error because we don't update host entries if we are offline
   280  // anyway. So if we fail to resolve a hostname, the problem is not related to
   281  // us.
   282  func (hdb *HostDB) managedLookupIPNets(address modules.NetAddress) (ipNets []string, err error) {
   283  	// Lookup the IP addresses of the host.
   284  	addresses, err := hdb.deps.Resolver().LookupIP(address.Host())
   285  	if err != nil {
   286  		return nil, err
   287  	}
   288  	// Get the subnets of the addresses.
   289  	for _, ip := range addresses {
   290  		// Set the filterRange according to the type of IP address.
   291  		var filterRange int
   292  		if ip.To4() != nil {
   293  			filterRange = hosttree.IPv4FilterRange
   294  		} else {
   295  			filterRange = hosttree.IPv6FilterRange
   296  		}
   297  
   298  		// Get the subnet.
   299  		_, ipnet, err := net.ParseCIDR(fmt.Sprintf("%s/%d", ip.String(), filterRange))
   300  		if err != nil {
   301  			return nil, err
   302  		}
   303  		// Add the subnet to the host.
   304  		ipNets = append(ipNets, ipnet.String())
   305  	}
   306  	return
   307  }
   308  
   309  // managedScanHost will connect to a host and grab the settings, verifying
   310  // uptime and updating to the host's preferences.
   311  func (hdb *HostDB) managedScanHost(entry modules.HostDBEntry) {
   312  	// Request settings from the queued host entry.
   313  	netAddr := entry.NetAddress
   314  	pubKey := entry.PublicKey
   315  	hdb.log.Debugf("Scanning host %v at %v", pubKey, netAddr)
   316  
   317  	// If we use a custom resolver for testing, we replace the custom domain
   318  	// with 127.0.0.1. Otherwise the scan will fail.
   319  	if hdb.deps.Disrupt("customResolver") {
   320  		port := netAddr.Port()
   321  		netAddr = modules.NetAddress(fmt.Sprintf("127.0.0.1:%s", port))
   322  	}
   323  
   324  	// Resolve the host's used subnets and update the timestamp if they
   325  	// changed. We only update the timestamp if resolving the ipNets was
   326  	// successful.
   327  	ipNets, err := hdb.managedLookupIPNets(entry.NetAddress)
   328  	if err == nil && !equalIPNets(ipNets, entry.IPNets) {
   329  		entry.IPNets = ipNets
   330  		entry.LastIPNetChange = time.Now()
   331  	}
   332  	if err != nil {
   333  		hdb.log.Debugln("mangedScanHost: failed to look up IP nets", err)
   334  	}
   335  
   336  	// Update historic interactions of entry if necessary
   337  	hdb.mu.RLock()
   338  	updateHostHistoricInteractions(&entry, hdb.blockHeight)
   339  	hdb.mu.RUnlock()
   340  
   341  	var settings modules.HostExternalSettings
   342  	var latency time.Duration
   343  	err = func() error {
   344  		timeout := hostRequestTimeout
   345  		hdb.mu.RLock()
   346  		if len(hdb.initialScanLatencies) > minScansForSpeedup {
   347  			build.Critical("initialScanLatencies should never be greater than minScansForSpeedup")
   348  		}
   349  		if !hdb.initialScanComplete && len(hdb.initialScanLatencies) == minScansForSpeedup {
   350  			// During an initial scan, when we have at least minScansForSpeedup
   351  			// active scans in initialScanLatencies, we use
   352  			// 5*median(initialScanLatencies) as the new hostRequestTimeout to
   353  			// speedup the scanning process.
   354  			timeout = hdb.initialScanLatencies[len(hdb.initialScanLatencies)/2]
   355  			timeout *= scanSpeedupMedianMultiplier
   356  			if hostRequestTimeout < timeout {
   357  				timeout = hostRequestTimeout
   358  			}
   359  		}
   360  		hdb.mu.RUnlock()
   361  
   362  		dialer := &net.Dialer{
   363  			Cancel:  hdb.tg.StopChan(),
   364  			Timeout: timeout,
   365  		}
   366  		start := time.Now()
   367  		conn, err := dialer.Dial("tcp", string(netAddr))
   368  		latency = time.Since(start)
   369  		if err != nil {
   370  			return err
   371  		}
   372  		connCloseChan := make(chan struct{})
   373  		go func() {
   374  			select {
   375  			case <-hdb.tg.StopChan():
   376  			case <-connCloseChan:
   377  			}
   378  			conn.Close()
   379  		}()
   380  		defer close(connCloseChan)
   381  		conn.SetDeadline(time.Now().Add(hostScanDeadline))
   382  
   383  		err = encoding.WriteObject(conn, modules.RPCSettings)
   384  		if err != nil {
   385  			return err
   386  		}
   387  		var pubkey crypto.PublicKey
   388  		copy(pubkey[:], pubKey.Key)
   389  		return crypto.ReadSignedObject(conn, &settings, maxSettingsLen, pubkey)
   390  	}()
   391  	if err != nil {
   392  		hdb.log.Debugf("Scan of host at %v failed: %v", netAddr, err)
   393  
   394  	} else {
   395  		hdb.log.Debugf("Scan of host at %v succeeded.", netAddr)
   396  		entry.HostExternalSettings = settings
   397  	}
   398  	success := err == nil
   399  
   400  	hdb.mu.Lock()
   401  	defer hdb.mu.Unlock()
   402  	// We don't want to override the NetAddress during a scan so we need to
   403  	// retrieve the most recent NetAddress from the tree first.
   404  	oldEntry, exists := hdb.hostTree.Select(entry.PublicKey)
   405  	if exists {
   406  		entry.NetAddress = oldEntry.NetAddress
   407  	}
   408  	// Update the host tree to have a new entry, including the new error. Then
   409  	// delete the entry from the scan map as the scan has been successful.
   410  	hdb.updateEntry(entry, err)
   411  
   412  	// Add the scan to the initialScanLatencies if it was successful.
   413  	if success && len(hdb.initialScanLatencies) < minScansForSpeedup {
   414  		hdb.initialScanLatencies = append(hdb.initialScanLatencies, latency)
   415  		// If the slice has reached its maximum size we sort it.
   416  		if len(hdb.initialScanLatencies) == minScansForSpeedup {
   417  			sort.Slice(hdb.initialScanLatencies, func(i, j int) bool {
   418  				return hdb.initialScanLatencies[i] < hdb.initialScanLatencies[j]
   419  			})
   420  		}
   421  	}
   422  }
   423  
   424  // waitForScans is a helper function that blocks until the hostDB's scanList is
   425  // empty.
   426  func (hdb *HostDB) managedWaitForScans() {
   427  	for {
   428  		hdb.mu.Lock()
   429  		length := len(hdb.scanList)
   430  		hdb.mu.Unlock()
   431  		if length == 0 {
   432  			break
   433  		}
   434  		select {
   435  		case <-hdb.tg.StopChan():
   436  		case <-time.After(scanCheckInterval):
   437  		}
   438  	}
   439  }
   440  
   441  // threadedProbeHosts pulls hosts from the thread pool and runs a scan on them.
   442  func (hdb *HostDB) threadedProbeHosts(scanPool <-chan modules.HostDBEntry) {
   443  	for hostEntry := range scanPool {
   444  		// Block until hostdb has internet connectivity.
   445  		for {
   446  			hdb.mu.RLock()
   447  			online := hdb.gateway.Online()
   448  			hdb.mu.RUnlock()
   449  			if online {
   450  				break
   451  			}
   452  			select {
   453  			case <-time.After(time.Second * 30):
   454  				continue
   455  			case <-hdb.tg.StopChan():
   456  				return
   457  			}
   458  		}
   459  
   460  		// There appears to be internet connectivity, continue with the
   461  		// scan.
   462  		hdb.managedScanHost(hostEntry)
   463  	}
   464  }
   465  
   466  // threadedScan is an ongoing function which will query the full set of hosts
   467  // every few hours to see who is online and available for uploading.
   468  func (hdb *HostDB) threadedScan() {
   469  	err := hdb.tg.Add()
   470  	if err != nil {
   471  		return
   472  	}
   473  	defer hdb.tg.Done()
   474  
   475  	// Wait until the consensus set is synced. Only then we can be sure that
   476  	// the initial scan covers the whole network.
   477  	for {
   478  		if hdb.cs.Synced() {
   479  			break
   480  		}
   481  		select {
   482  		case <-hdb.tg.StopChan():
   483  			return
   484  		case <-time.After(scanCheckInterval):
   485  		}
   486  	}
   487  
   488  	// Block scan when a specific dependency is provided.
   489  	hdb.deps.Disrupt("BlockScan")
   490  
   491  	// The initial scan might have been interrupted. Queue one scan for every
   492  	// announced host that was missed by the initial scan and wait for the
   493  	// scans to finish before starting the scan loop.
   494  	allHosts := hdb.hostTree.All()
   495  	hdb.mu.Lock()
   496  	for _, host := range allHosts {
   497  		if len(host.ScanHistory) == 0 && host.HistoricUptime == 0 && host.HistoricDowntime == 0 {
   498  			hdb.queueScan(host)
   499  		}
   500  	}
   501  	hdb.mu.Unlock()
   502  	hdb.managedWaitForScans()
   503  
   504  	// Set the flag to indicate that the initial scan is complete.
   505  	hdb.mu.Lock()
   506  	hdb.initialScanComplete = true
   507  	hdb.mu.Unlock()
   508  
   509  	for {
   510  		// Set up a scan for the hostCheckupQuanity most valuable hosts in the
   511  		// hostdb. Hosts that fail their scans will be docked significantly,
   512  		// pushing them further back in the hierarchy, ensuring that for the
   513  		// most part only online hosts are getting scanned unless there are
   514  		// fewer than hostCheckupQuantity of them.
   515  
   516  		// Grab a set of hosts to scan, grab hosts that are active, inactive,
   517  		// and offline to get high diversity.
   518  		var onlineHosts, offlineHosts []modules.HostDBEntry
   519  		allHosts := hdb.hostTree.All()
   520  		for i := len(allHosts) - 1; i >= 0; i-- {
   521  			if len(onlineHosts) >= hostCheckupQuantity && len(offlineHosts) >= hostCheckupQuantity {
   522  				break
   523  			}
   524  
   525  			// Figure out if the host is online or offline.
   526  			host := allHosts[i]
   527  			online := len(host.ScanHistory) > 0 && host.ScanHistory[len(host.ScanHistory)-1].Success
   528  			if online && len(onlineHosts) < hostCheckupQuantity {
   529  				onlineHosts = append(onlineHosts, host)
   530  			} else if !online && len(offlineHosts) < hostCheckupQuantity {
   531  				offlineHosts = append(offlineHosts, host)
   532  			}
   533  		}
   534  
   535  		// Queue the scans for each host.
   536  		hdb.log.Println("Performing scan on", len(onlineHosts), "online hosts and", len(offlineHosts), "offline hosts.")
   537  		hdb.mu.Lock()
   538  		for _, host := range onlineHosts {
   539  			hdb.queueScan(host)
   540  		}
   541  		for _, host := range offlineHosts {
   542  			hdb.queueScan(host)
   543  		}
   544  		hdb.mu.Unlock()
   545  
   546  		// Sleep for a random amount of time before doing another round of
   547  		// scanning. The minimums and maximums keep the scan time reasonable,
   548  		// while the randomness prevents the scanning from always happening at
   549  		// the same time of day or week.
   550  		sleepRange := uint64(maxScanSleep - minScanSleep)
   551  		sleepTime := minScanSleep + time.Duration(fastrand.Uint64n(sleepRange))
   552  
   553  		// Sleep until it's time for the next scan cycle.
   554  		select {
   555  		case <-hdb.tg.StopChan():
   556  			return
   557  		case <-time.After(sleepTime):
   558  		}
   559  	}
   560  }