gitlab.com/SiaPrime/SiaPrime@v1.4.1/modules/renter/worker.go

gitlab.com/SiaPrime/SiaPrime@v1.4.1/modules/renter/worker.go (about)

     1  package renter
     2  
     3  // worker.go defines a worker with a work loop. Each worker is connected to a
     4  // single host, and the work loop will listen for jobs and then perform them.
     5  //
     6  // The worker has a set of jobs that it is capable of performing. The standard
     7  // functions for a job are Queue, Kill, and Perform. Queue will add a job to the
     8  // queue of work of that type. Kill will empty the queue and close out any work
     9  // that will not be completed. Perform will grab a job from the queue if one
    10  // exists and complete that piece of work. See snapshotworkerfetchbackups.go for
    11  // a clean example.
    12  
    13  // TODO: A single session should be added to the worker that gets maintained
    14  // within the work loop. All jobs performed by the worker will use the worker's
    15  // single session.
    16  //
    17  // TODO: The upload and download code needs to be moved into properly separated
    18  // subsystems.
    19  //
    20  // TODO: Need to write testing around the kill functions in the worker, to clean
    21  // up any queued jobs after a worker has been killed.
    22  
    23  import (
    24  	"sync"
    25  	"time"
    26  
    27  	"gitlab.com/SiaPrime/SiaPrime/types"
    28  )
    29  
    30  // A worker listens for work on a certain host.
    31  //
    32  // The mutex of the worker only protects the 'unprocessedChunks' and the
    33  // 'standbyChunks' fields of the worker. The rest of the fields are only
    34  // interacted with exclusively by the primary worker thread, and only one of
    35  // those ever exists at a time.
    36  //
    37  // The workers have a concept of 'cooldown' for uploads and downloads. If a
    38  // download or upload operation fails, the assumption is that future attempts
    39  // are also likely to fail, because whatever condition resulted in the failure
    40  // will still be present until some time has passed. Without any cooldowns,
    41  // uploading and downloading with flaky hosts in the worker sets has
    42  // substantially reduced overall performance and throughput.
    43  type worker struct {
    44  	// The host pub key also serves as an id for the worker, as there is only
    45  	// one worker per host.
    46  	staticHostPubKey types.SiaPublicKey
    47  
    48  	// Download variables that are not protected by a mutex, but also do not
    49  	// need to be protected by a mutex, as they are only accessed by the master
    50  	// thread for the worker.
    51  	//
    52  	// The 'owned' prefix here indicates that only the master thread for the
    53  	// object (in this case, 'threadedWorkLoop') is allowed to access these
    54  	// variables. Because only that thread is allowed to access the variables,
    55  	// that thread is able to access these variables without a mutex.
    56  	ownedDownloadConsecutiveFailures int       // How many failures in a row?
    57  	ownedDownloadRecentFailure       time.Time // How recent was the last failure?
    58  
    59  	// Download variables related to queuing work. They have a separate mutex to
    60  	// minimize lock contention.
    61  	downloadChunks     []*unfinishedDownloadChunk // Yet unprocessed work items.
    62  	downloadMu         sync.Mutex
    63  	downloadTerminated bool // Has downloading been terminated for this worker?
    64  
    65  	// Fetch backups queue for the worker.
    66  	staticFetchBackupsJobQueue fetchBackupsJobQueue
    67  
    68  	// Upload variables.
    69  	unprocessedChunks         []*unfinishedUploadChunk // Yet unprocessed work items.
    70  	uploadConsecutiveFailures int                      // How many times in a row uploading has failed.
    71  	uploadRecentFailure       time.Time                // How recent was the last failure?
    72  	uploadRecentFailureErr    error                    // What was the reason for the last failure?
    73  	uploadTerminated          bool                     // Have we stopped uploading?
    74  
    75  	// Utilities.
    76  	//
    77  	// The mutex is only needed when interacting with 'downloadChunks' and
    78  	// 'unprocessedChunks', as everything else is only accessed from the single
    79  	// master thread.
    80  	killChan chan struct{} // Worker will shut down if a signal is sent down this channel.
    81  	mu       sync.Mutex
    82  	renter   *Renter
    83  	wakeChan chan struct{} // Worker will check queues if given a wake signal.
    84  }
    85  
    86  // managedBlockUntilReady will block until the worker has internet connectivity.
    87  // 'false' will be returned if a kill signal is received or if the renter is
    88  // shut down before internet connectivity is restored. 'true' will be returned
    89  // if internet connectivity is successfully restored.
    90  func (w *worker) managedBlockUntilReady() bool {
    91  	// Check if the worker has received a kill signal, or if the renter has
    92  	// received a stop signal.
    93  	select {
    94  	case <-w.renter.tg.StopChan():
    95  		return false
    96  	case <-w.killChan:
    97  		return false
    98  	default:
    99  	}
   100  
   101  	// Check internet connectivity. If the worker does not have internet
   102  	// connectivity, block until connectivity is restored.
   103  	for !w.renter.g.Online() {
   104  		select {
   105  		case <-w.renter.tg.StopChan():
   106  			return false
   107  		case <-w.killChan:
   108  			return false
   109  		case <-time.After(offlineCheckFrequency):
   110  		}
   111  	}
   112  	return true
   113  }
   114  
   115  // staticWake needs to be called any time that a job queued.
   116  func (w *worker) staticWake() {
   117  	select {
   118  	case w.wakeChan <- struct{}{}:
   119  	default:
   120  	}
   121  }
   122  
   123  // threadedWorkLoop continually checks if work has been issued to a worker. The
   124  // work loop checks for different types of work in a specific order, forming a
   125  // priority queue for the various types of work. It is possible for continuous
   126  // requests for one type of work to drown out a worker's ability to perform
   127  // other types of work.
   128  //
   129  // If no work is found, the worker will sleep until woken up. Because each
   130  // iteration is stateless, it may be possible to reduce the goroutine count in
   131  // Sia by spinning down the worker / expiring the thread when there is no work,
   132  // and then checking if the thread exists and creating a new one if not when
   133  // alerting / waking the worker. This will not interrupt any connections that
   134  // the worker has because the worker object will be kept in memory via the
   135  // worker map.
   136  func (w *worker) threadedWorkLoop() {
   137  	// Ensure that all queued jobs are gracefully cleaned up when the worker is
   138  	// shut down.
   139  	//
   140  	// TODO: Need to write testing around these kill functions and ensure they
   141  	// are executing correctly.
   142  	defer w.managedKillUploading()
   143  	defer w.managedKillDownloading()
   144  	defer w.managedKillFetchBackupsJobs()
   145  
   146  	// Primary work loop. There are several types of jobs that the worker can
   147  	// perform, and they are attempted with a specific priority. If any type of
   148  	// work is attempted, the loop resets to check for higher priority work
   149  	// again. This means that a stream of higher priority tasks can starve a
   150  	// building set of lower priority tasks.
   151  	//
   152  	// 'workAttempted' indicates that there was a job to perform, and that a
   153  	// nontrivial amount of time was spent attempting to perform the job. The
   154  	// job may or may not have been successful, that is irrelevant.
   155  	for {
   156  		// There are certain conditions under which the worker should either
   157  		// block or exit. This function will block until those conditions are
   158  		// met, returning 'true' when the worker can proceed and 'false' if the
   159  		// worker should exit.
   160  		if !w.managedBlockUntilReady() {
   161  			return
   162  		}
   163  
   164  		var workAttempted bool
   165  		// Perform any job to fetch the list of backups from the host.
   166  		workAttempted = w.managedPerformFetchBackupsJob()
   167  		if workAttempted {
   168  			continue
   169  		}
   170  		// Perform any job to help download a chunk.
   171  		workAttempted = w.managedPerformDownloadChunkJob()
   172  		if workAttempted {
   173  			continue
   174  		}
   175  		// Perform any job to help upload a chunk.
   176  		workAttempted = w.managedPerformUploadChunkJob()
   177  		if workAttempted {
   178  			continue
   179  		}
   180  
   181  		// Block until new work is received via the upload or download channels,
   182  		// or until a kill or stop signal is received.
   183  		select {
   184  		case <-w.wakeChan:
   185  			continue
   186  		case <-w.killChan:
   187  			return
   188  		case <-w.renter.tg.StopChan():
   189  			return
   190  		}
   191  	}
   192  }
   193  
   194  // newWorker will create and return a worker that is ready to receive jobs.
   195  func (r *Renter) newWorker(hostPubKey types.SiaPublicKey) *worker {
   196  	return &worker{
   197  		staticHostPubKey: hostPubKey,
   198  
   199  		killChan: make(chan struct{}),
   200  		wakeChan: make(chan struct{}, 1),
   201  
   202  		renter: r,
   203  	}
   204  }