github.com/Synthesix/Sia@v1.3.3-0.20180413141344-f863baeed3ca/modules/renter/hostdb/scan.go (about)

     1  package hostdb
     2  
     3  // scan.go contains the functions which periodically scan the list of all hosts
     4  // to see which hosts are online or offline, and to get any updates to the
     5  // settings of the hosts.
     6  
     7  import (
     8  	"net"
     9  	"time"
    10  
    11  	"github.com/Synthesix/Sia/build"
    12  	"github.com/Synthesix/Sia/crypto"
    13  	"github.com/Synthesix/Sia/encoding"
    14  	"github.com/Synthesix/Sia/modules"
    15  	"github.com/NebulousLabs/fastrand"
    16  )
    17  
    18  // queueScan will add a host to the queue to be scanned.
    19  func (hdb *HostDB) queueScan(entry modules.HostDBEntry) {
    20  	// If this entry is already in the scan pool, can return immediately.
    21  	_, exists := hdb.scanMap[entry.PublicKey.String()]
    22  	if exists {
    23  		return
    24  	}
    25  
    26  	// Add the entry to a waitlist, then check if any thread is currently
    27  	// emptying the waitlist. If not, spawn a thread to empty the waitlist.
    28  	hdb.scanMap[entry.PublicKey.String()] = struct{}{}
    29  	hdb.scanList = append(hdb.scanList, entry)
    30  	if hdb.scanWait {
    31  		// Another thread is emptying the scan list, nothing to worry about.
    32  		return
    33  	}
    34  
    35  	// Sanity check - the scan map and the scan list should have the same
    36  	// length.
    37  	if build.DEBUG && len(hdb.scanMap) > len(hdb.scanList)+maxScanningThreads {
    38  		hdb.log.Critical("The hostdb scan map has seemingly grown too large:", len(hdb.scanMap), len(hdb.scanList), maxScanningThreads)
    39  	}
    40  
    41  	hdb.scanWait = true
    42  	go func() {
    43  		scanPool := make(chan modules.HostDBEntry)
    44  		defer close(scanPool)
    45  
    46  		// Nobody is emptying the scan list, volunteer.
    47  		if hdb.tg.Add() != nil {
    48  			// Hostdb is shutting down, don't spin up another thread.  It is
    49  			// okay to leave scanWait set to true as that will not affect
    50  			// shutdown.
    51  			return
    52  		}
    53  		defer hdb.tg.Done()
    54  
    55  		// Due to the patterns used to spin up scanning threads, it's possible
    56  		// that we get to this point while all scanning threads are currently
    57  		// used up, completing jobs that were sent out by the previous pool
    58  		// managing thread. This thread is at risk of deadlocking if there's
    59  		// not at least one scanning thread accepting work that it created
    60  		// itself, so we use a starterThread exception and spin up
    61  		// one-thread-too-many on the first iteration to ensure that we do not
    62  		// deadlock.
    63  		starterThread := false
    64  		for {
    65  			// If the scanList is empty, this thread can spin down.
    66  			hdb.mu.Lock()
    67  			if len(hdb.scanList) == 0 {
    68  				// Scan list is empty, can exit. Let the world know that nobody
    69  				// is emptying the scan list anymore.
    70  				hdb.scanWait = false
    71  				hdb.mu.Unlock()
    72  				return
    73  			}
    74  
    75  			// Get the next host, shrink the scan list.
    76  			entry := hdb.scanList[0]
    77  			hdb.scanList = hdb.scanList[1:]
    78  			delete(hdb.scanMap, entry.PublicKey.String())
    79  			scansRemaining := len(hdb.scanList)
    80  
    81  			// Grab the most recent entry for this host.
    82  			recentEntry, exists := hdb.hostTree.Select(entry.PublicKey)
    83  			if exists {
    84  				entry = recentEntry
    85  			}
    86  
    87  			// Try to send this entry to an existing idle worker (non-blocking).
    88  			select {
    89  			case scanPool <- entry:
    90  				hdb.log.Debugf("Sending host %v for scan, %v hosts remain", entry.PublicKey.String(), scansRemaining)
    91  				hdb.mu.Unlock()
    92  				continue
    93  			default:
    94  			}
    95  
    96  			// Create new worker thread.
    97  			if hdb.scanningThreads < maxScanningThreads || !starterThread {
    98  				starterThread = true
    99  				hdb.scanningThreads++
   100  				go func() {
   101  					hdb.threadedProbeHosts(scanPool)
   102  					hdb.mu.Lock()
   103  					hdb.scanningThreads--
   104  					hdb.mu.Unlock()
   105  				}()
   106  			}
   107  			hdb.mu.Unlock()
   108  
   109  			// Block while waiting for an opening in the scan pool.
   110  			hdb.log.Debugf("Sending host %v for scan, %v hosts remain", entry.PublicKey.String(), scansRemaining)
   111  			select {
   112  			case scanPool <- entry:
   113  				// iterate again
   114  			case <-hdb.tg.StopChan():
   115  				// quit
   116  				return
   117  			}
   118  		}
   119  	}()
   120  }
   121  
   122  // updateEntry updates an entry in the hostdb after a scan has taken place.
   123  //
   124  // CAUTION: This function will automatically add multiple entries to a new host
   125  // to give that host some base uptime. This makes this function co-dependent
   126  // with the host weight functions. Adjustment of the host weight functions need
   127  // to keep this function in mind, and vice-versa.
   128  func (hdb *HostDB) updateEntry(entry modules.HostDBEntry, netErr error) {
   129  	// If the scan failed because we don't have Internet access, toss out this update.
   130  	if netErr != nil && !hdb.gateway.Online() {
   131  		return
   132  	}
   133  
   134  	// Grab the host from the host tree, and update it with the neew settings.
   135  	newEntry, exists := hdb.hostTree.Select(entry.PublicKey)
   136  	if exists {
   137  		newEntry.HostExternalSettings = entry.HostExternalSettings
   138  	} else {
   139  		newEntry = entry
   140  	}
   141  
   142  	// Update the recent interactions with this host.
   143  	if netErr == nil {
   144  		newEntry.RecentSuccessfulInteractions++
   145  	} else {
   146  		newEntry.RecentFailedInteractions++
   147  	}
   148  
   149  	// Add the datapoints for the scan.
   150  	if len(newEntry.ScanHistory) < 2 {
   151  		// Add two scans to the scan history. Two are needed because the scans
   152  		// are forward looking, but we want this first scan to represent as
   153  		// much as one week of uptime or downtime.
   154  		earliestStartTime := time.Now().Add(time.Hour * 7 * 24 * -1)                                                   // Permit up to a week of starting uptime or downtime.
   155  		suggestedStartTime := time.Now().Add(time.Minute * 10 * time.Duration(hdb.blockHeight-entry.FirstSeen+1) * -1) // Add one to the FirstSeen in case FirstSeen is this block, guarantees incrementing order.
   156  		if suggestedStartTime.Before(earliestStartTime) {
   157  			suggestedStartTime = earliestStartTime
   158  		}
   159  		newEntry.ScanHistory = modules.HostDBScans{
   160  			{Timestamp: suggestedStartTime, Success: netErr == nil},
   161  			{Timestamp: time.Now(), Success: netErr == nil},
   162  		}
   163  	} else {
   164  		if newEntry.ScanHistory[len(newEntry.ScanHistory)-1].Success && netErr != nil {
   165  			hdb.log.Debugf("Host %v is being downgraded from an online host to an offline host: %v\n", newEntry.PublicKey.String(), netErr)
   166  		}
   167  
   168  		// Make sure that the current time is after the timestamp of the
   169  		// previous scan. It may not be if the system clock has changed. This
   170  		// will prevent the sort-check sanity checks from triggering.
   171  		newTimestamp := time.Now()
   172  		prevTimestamp := newEntry.ScanHistory[len(newEntry.ScanHistory)-1].Timestamp
   173  		if !newTimestamp.After(prevTimestamp) {
   174  			newTimestamp = prevTimestamp.Add(time.Second)
   175  		}
   176  
   177  		// Before appending, make sure that the scan we just performed is
   178  		// timestamped after the previous scan performed. It may not be if the
   179  		// system clock has changed.
   180  		newEntry.ScanHistory = append(newEntry.ScanHistory, modules.HostDBScan{Timestamp: newTimestamp, Success: netErr == nil})
   181  	}
   182  
   183  	// Check whether any of the recent scans demonstrate uptime. The pruning and
   184  	// compression of the history ensure that there are only relatively recent
   185  	// scans represented.
   186  	var recentUptime bool
   187  	for _, scan := range newEntry.ScanHistory {
   188  		if scan.Success {
   189  			recentUptime = true
   190  		}
   191  	}
   192  
   193  	// If the host has been offline for too long, delete the host from the
   194  	// hostdb. Only delete if there have been enough scans over a long enough
   195  	// period to be confident that the host really is offline for good.
   196  	if time.Now().Sub(newEntry.ScanHistory[0].Timestamp) > maxHostDowntime && !recentUptime && len(newEntry.ScanHistory) >= minScans {
   197  		err := hdb.hostTree.Remove(newEntry.PublicKey)
   198  		if err != nil {
   199  			hdb.log.Println("ERROR: unable to remove host newEntry which has had a ton of downtime:", err)
   200  		}
   201  
   202  		// The function should terminate here as no more interaction is needed
   203  		// with this host.
   204  		return
   205  	}
   206  
   207  	// Compress any old scans into the historic values.
   208  	for len(newEntry.ScanHistory) > minScans && time.Now().Sub(newEntry.ScanHistory[0].Timestamp) > maxHostDowntime {
   209  		timePassed := newEntry.ScanHistory[1].Timestamp.Sub(newEntry.ScanHistory[0].Timestamp)
   210  		if newEntry.ScanHistory[0].Success {
   211  			newEntry.HistoricUptime += timePassed
   212  		} else {
   213  			newEntry.HistoricDowntime += timePassed
   214  		}
   215  		newEntry.ScanHistory = newEntry.ScanHistory[1:]
   216  	}
   217  
   218  	// Add the updated entry
   219  	if !exists {
   220  		err := hdb.hostTree.Insert(newEntry)
   221  		if err != nil {
   222  			hdb.log.Println("ERROR: unable to insert entry which is was thought to be new:", err)
   223  		} else {
   224  			hdb.log.Debugf("Adding host %v to the hostdb. Net error: %v\n", newEntry.PublicKey.String(), netErr)
   225  		}
   226  	} else {
   227  		err := hdb.hostTree.Modify(newEntry)
   228  		if err != nil {
   229  			hdb.log.Println("ERROR: unable to modify entry which is thought to exist:", err)
   230  		} else {
   231  			hdb.log.Debugf("Adding host %v to the hostdb. Net error: %v\n", newEntry.PublicKey.String(), netErr)
   232  		}
   233  	}
   234  }
   235  
   236  // managedScanHost will connect to a host and grab the settings, verifying
   237  // uptime and updating to the host's preferences.
   238  func (hdb *HostDB) managedScanHost(entry modules.HostDBEntry) {
   239  	// Request settings from the queued host entry.
   240  	netAddr := entry.NetAddress
   241  	pubKey := entry.PublicKey
   242  	hdb.log.Debugf("Scanning host %v at %v", pubKey, netAddr)
   243  
   244  	// Update historic interactions of entry if necessary
   245  	hdb.mu.RLock()
   246  	updateHostHistoricInteractions(&entry, hdb.blockHeight)
   247  	hdb.mu.RUnlock()
   248  
   249  	var settings modules.HostExternalSettings
   250  	err := func() error {
   251  		dialer := &net.Dialer{
   252  			Cancel:  hdb.tg.StopChan(),
   253  			Timeout: hostRequestTimeout,
   254  		}
   255  		conn, err := dialer.Dial("tcp", string(netAddr))
   256  		if err != nil {
   257  			return err
   258  		}
   259  		connCloseChan := make(chan struct{})
   260  		go func() {
   261  			select {
   262  			case <-hdb.tg.StopChan():
   263  			case <-connCloseChan:
   264  			}
   265  			conn.Close()
   266  		}()
   267  		defer close(connCloseChan)
   268  		conn.SetDeadline(time.Now().Add(hostScanDeadline))
   269  
   270  		err = encoding.WriteObject(conn, modules.RPCSettings)
   271  		if err != nil {
   272  			return err
   273  		}
   274  		var pubkey crypto.PublicKey
   275  		copy(pubkey[:], pubKey.Key)
   276  		return crypto.ReadSignedObject(conn, &settings, maxSettingsLen, pubkey)
   277  	}()
   278  	if err != nil {
   279  		hdb.log.Debugf("Scan of host at %v failed: %v", netAddr, err)
   280  
   281  	} else {
   282  		hdb.log.Debugf("Scan of host at %v succeeded.", netAddr)
   283  		entry.HostExternalSettings = settings
   284  	}
   285  
   286  	// Update the host tree to have a new entry, including the new error. Then
   287  	// delete the entry from the scan map as the scan has been successful.
   288  	hdb.mu.Lock()
   289  	hdb.updateEntry(entry, err)
   290  	hdb.mu.Unlock()
   291  }
   292  
   293  // threadedProbeHosts pulls hosts from the thread pool and runs a scan on them.
   294  func (hdb *HostDB) threadedProbeHosts(scanPool <-chan modules.HostDBEntry) {
   295  	err := hdb.tg.Add()
   296  	if err != nil {
   297  		return
   298  	}
   299  	defer hdb.tg.Done()
   300  	for hostEntry := range scanPool {
   301  		// Block until hostdb has internet connectivity.
   302  		for {
   303  			hdb.mu.RLock()
   304  			online := hdb.gateway.Online()
   305  			hdb.mu.RUnlock()
   306  			if online {
   307  				break
   308  			}
   309  			select {
   310  			case <-time.After(time.Second * 30):
   311  				continue
   312  			case <-hdb.tg.StopChan():
   313  				return
   314  			}
   315  		}
   316  
   317  		// There appears to be internet connectivity, continue with the
   318  		// scan.
   319  		hdb.managedScanHost(hostEntry)
   320  	}
   321  }
   322  
   323  // threadedScan is an ongoing function which will query the full set of hosts
   324  // every few hours to see who is online and available for uploading.
   325  func (hdb *HostDB) threadedScan() {
   326  	err := hdb.tg.Add()
   327  	if err != nil {
   328  		return
   329  	}
   330  	defer hdb.tg.Done()
   331  
   332  	for {
   333  		// Set up a scan for the hostCheckupQuanity most valuable hosts in the
   334  		// hostdb. Hosts that fail their scans will be docked significantly,
   335  		// pushing them further back in the hierarchy, ensuring that for the
   336  		// most part only online hosts are getting scanned unless there are
   337  		// fewer than hostCheckupQuantity of them.
   338  
   339  		// Grab a set of hosts to scan, grab hosts that are active, inactive,
   340  		// and offline to get high diversity.
   341  		var onlineHosts, offlineHosts []modules.HostDBEntry
   342  		allHosts := hdb.hostTree.All()
   343  		for i := len(allHosts) - 1; i >= 0; i-- {
   344  			if len(onlineHosts) >= hostCheckupQuantity && len(offlineHosts) >= hostCheckupQuantity {
   345  				break
   346  			}
   347  
   348  			// Figure out if the host is online or offline.
   349  			host := allHosts[i]
   350  			online := len(host.ScanHistory) > 0 && host.ScanHistory[len(host.ScanHistory)-1].Success
   351  			if online && len(onlineHosts) < hostCheckupQuantity {
   352  				onlineHosts = append(onlineHosts, host)
   353  			} else if !online && len(offlineHosts) < hostCheckupQuantity {
   354  				offlineHosts = append(offlineHosts, host)
   355  			}
   356  		}
   357  
   358  		// Queue the scans for each host.
   359  		hdb.log.Println("Performing scan on", len(onlineHosts), "online hosts and", len(offlineHosts), "offline hosts.")
   360  		hdb.mu.Lock()
   361  		for _, host := range onlineHosts {
   362  			hdb.queueScan(host)
   363  		}
   364  		for _, host := range offlineHosts {
   365  			hdb.queueScan(host)
   366  		}
   367  		hdb.mu.Unlock()
   368  
   369  		// Sleep for a random amount of time before doing another round of
   370  		// scanning. The minimums and maximums keep the scan time reasonable,
   371  		// while the randomness prevents the scanning from always happening at
   372  		// the same time of day or week.
   373  		sleepRange := uint64(maxScanSleep - minScanSleep)
   374  		sleepTime := minScanSleep + time.Duration(fastrand.Uint64n(sleepRange))
   375  
   376  		// Sleep until it's time for the next scan cycle.
   377  		select {
   378  		case <-hdb.tg.StopChan():
   379  			return
   380  		case <-time.After(sleepTime):
   381  		}
   382  	}
   383  }