gitlab.com/SiaPrime/SiaPrime@v1.4.1/modules/renter/worker.go (about) 1 package renter 2 3 // worker.go defines a worker with a work loop. Each worker is connected to a 4 // single host, and the work loop will listen for jobs and then perform them. 5 // 6 // The worker has a set of jobs that it is capable of performing. The standard 7 // functions for a job are Queue, Kill, and Perform. Queue will add a job to the 8 // queue of work of that type. Kill will empty the queue and close out any work 9 // that will not be completed. Perform will grab a job from the queue if one 10 // exists and complete that piece of work. See snapshotworkerfetchbackups.go for 11 // a clean example. 12 13 // TODO: A single session should be added to the worker that gets maintained 14 // within the work loop. All jobs performed by the worker will use the worker's 15 // single session. 16 // 17 // TODO: The upload and download code needs to be moved into properly separated 18 // subsystems. 19 // 20 // TODO: Need to write testing around the kill functions in the worker, to clean 21 // up any queued jobs after a worker has been killed. 22 23 import ( 24 "sync" 25 "time" 26 27 "gitlab.com/SiaPrime/SiaPrime/types" 28 ) 29 30 // A worker listens for work on a certain host. 31 // 32 // The mutex of the worker only protects the 'unprocessedChunks' and the 33 // 'standbyChunks' fields of the worker. The rest of the fields are only 34 // interacted with exclusively by the primary worker thread, and only one of 35 // those ever exists at a time. 36 // 37 // The workers have a concept of 'cooldown' for uploads and downloads. If a 38 // download or upload operation fails, the assumption is that future attempts 39 // are also likely to fail, because whatever condition resulted in the failure 40 // will still be present until some time has passed. Without any cooldowns, 41 // uploading and downloading with flaky hosts in the worker sets has 42 // substantially reduced overall performance and throughput. 43 type worker struct { 44 // The host pub key also serves as an id for the worker, as there is only 45 // one worker per host. 46 staticHostPubKey types.SiaPublicKey 47 48 // Download variables that are not protected by a mutex, but also do not 49 // need to be protected by a mutex, as they are only accessed by the master 50 // thread for the worker. 51 // 52 // The 'owned' prefix here indicates that only the master thread for the 53 // object (in this case, 'threadedWorkLoop') is allowed to access these 54 // variables. Because only that thread is allowed to access the variables, 55 // that thread is able to access these variables without a mutex. 56 ownedDownloadConsecutiveFailures int // How many failures in a row? 57 ownedDownloadRecentFailure time.Time // How recent was the last failure? 58 59 // Download variables related to queuing work. They have a separate mutex to 60 // minimize lock contention. 61 downloadChunks []*unfinishedDownloadChunk // Yet unprocessed work items. 62 downloadMu sync.Mutex 63 downloadTerminated bool // Has downloading been terminated for this worker? 64 65 // Fetch backups queue for the worker. 66 staticFetchBackupsJobQueue fetchBackupsJobQueue 67 68 // Upload variables. 69 unprocessedChunks []*unfinishedUploadChunk // Yet unprocessed work items. 70 uploadConsecutiveFailures int // How many times in a row uploading has failed. 71 uploadRecentFailure time.Time // How recent was the last failure? 72 uploadRecentFailureErr error // What was the reason for the last failure? 73 uploadTerminated bool // Have we stopped uploading? 74 75 // Utilities. 76 // 77 // The mutex is only needed when interacting with 'downloadChunks' and 78 // 'unprocessedChunks', as everything else is only accessed from the single 79 // master thread. 80 killChan chan struct{} // Worker will shut down if a signal is sent down this channel. 81 mu sync.Mutex 82 renter *Renter 83 wakeChan chan struct{} // Worker will check queues if given a wake signal. 84 } 85 86 // managedBlockUntilReady will block until the worker has internet connectivity. 87 // 'false' will be returned if a kill signal is received or if the renter is 88 // shut down before internet connectivity is restored. 'true' will be returned 89 // if internet connectivity is successfully restored. 90 func (w *worker) managedBlockUntilReady() bool { 91 // Check if the worker has received a kill signal, or if the renter has 92 // received a stop signal. 93 select { 94 case <-w.renter.tg.StopChan(): 95 return false 96 case <-w.killChan: 97 return false 98 default: 99 } 100 101 // Check internet connectivity. If the worker does not have internet 102 // connectivity, block until connectivity is restored. 103 for !w.renter.g.Online() { 104 select { 105 case <-w.renter.tg.StopChan(): 106 return false 107 case <-w.killChan: 108 return false 109 case <-time.After(offlineCheckFrequency): 110 } 111 } 112 return true 113 } 114 115 // staticWake needs to be called any time that a job queued. 116 func (w *worker) staticWake() { 117 select { 118 case w.wakeChan <- struct{}{}: 119 default: 120 } 121 } 122 123 // threadedWorkLoop continually checks if work has been issued to a worker. The 124 // work loop checks for different types of work in a specific order, forming a 125 // priority queue for the various types of work. It is possible for continuous 126 // requests for one type of work to drown out a worker's ability to perform 127 // other types of work. 128 // 129 // If no work is found, the worker will sleep until woken up. Because each 130 // iteration is stateless, it may be possible to reduce the goroutine count in 131 // Sia by spinning down the worker / expiring the thread when there is no work, 132 // and then checking if the thread exists and creating a new one if not when 133 // alerting / waking the worker. This will not interrupt any connections that 134 // the worker has because the worker object will be kept in memory via the 135 // worker map. 136 func (w *worker) threadedWorkLoop() { 137 // Ensure that all queued jobs are gracefully cleaned up when the worker is 138 // shut down. 139 // 140 // TODO: Need to write testing around these kill functions and ensure they 141 // are executing correctly. 142 defer w.managedKillUploading() 143 defer w.managedKillDownloading() 144 defer w.managedKillFetchBackupsJobs() 145 146 // Primary work loop. There are several types of jobs that the worker can 147 // perform, and they are attempted with a specific priority. If any type of 148 // work is attempted, the loop resets to check for higher priority work 149 // again. This means that a stream of higher priority tasks can starve a 150 // building set of lower priority tasks. 151 // 152 // 'workAttempted' indicates that there was a job to perform, and that a 153 // nontrivial amount of time was spent attempting to perform the job. The 154 // job may or may not have been successful, that is irrelevant. 155 for { 156 // There are certain conditions under which the worker should either 157 // block or exit. This function will block until those conditions are 158 // met, returning 'true' when the worker can proceed and 'false' if the 159 // worker should exit. 160 if !w.managedBlockUntilReady() { 161 return 162 } 163 164 var workAttempted bool 165 // Perform any job to fetch the list of backups from the host. 166 workAttempted = w.managedPerformFetchBackupsJob() 167 if workAttempted { 168 continue 169 } 170 // Perform any job to help download a chunk. 171 workAttempted = w.managedPerformDownloadChunkJob() 172 if workAttempted { 173 continue 174 } 175 // Perform any job to help upload a chunk. 176 workAttempted = w.managedPerformUploadChunkJob() 177 if workAttempted { 178 continue 179 } 180 181 // Block until new work is received via the upload or download channels, 182 // or until a kill or stop signal is received. 183 select { 184 case <-w.wakeChan: 185 continue 186 case <-w.killChan: 187 return 188 case <-w.renter.tg.StopChan(): 189 return 190 } 191 } 192 } 193 194 // newWorker will create and return a worker that is ready to receive jobs. 195 func (r *Renter) newWorker(hostPubKey types.SiaPublicKey) *worker { 196 return &worker{ 197 staticHostPubKey: hostPubKey, 198 199 killChan: make(chan struct{}), 200 wakeChan: make(chan struct{}, 1), 201 202 renter: r, 203 } 204 }