gitlab.com/SkynetLabs/skyd@v1.6.9/skymodules/renter/workerjobgeneric.go (about) 1 package renter 2 3 import ( 4 "container/list" 5 "context" 6 "sync" 7 "time" 8 9 "gitlab.com/NebulousLabs/errors" 10 ) 11 12 var errDiscardingCanceledJob = errors.New("callNext: skipping and discarding already canceled job") 13 14 type ( 15 // jobGeneric implements the basic functionality for a job. 16 jobGeneric struct { 17 staticCtx context.Context 18 19 staticQueue workerJobQueue 20 21 // staticMetadata is a generic field on the job that can be set and 22 // casted by implementations of a job 23 staticMetadata interface{} 24 25 // externExecuteTime is set when the job gets executed 26 // 27 // NOTE: the 'extern' prefix is used here even though the field is not 28 // governed by a mutex, it can be accessed by anyone since a job is not 29 // used from more than one thread - it is not static because it's not 30 // set on initialization but rather by the implementation of callExecute 31 externExecuteTime time.Time 32 } 33 34 // jobGenericQueue is a generic queue for a job. It has a mutex, references 35 // a worker, tracks whether or not it has been killed, and has a cooldown 36 // timer. It does not have an array of jobs that are in the queue, because 37 // those are type specific. 38 jobGenericQueue struct { 39 jobs *list.List 40 41 killed bool 42 43 cooldownUntil time.Time 44 consecutiveFailures uint64 45 46 // firstFailureTime is set by a failed job in case that job was executed 47 // after the current value, it is cleared by a successful job, it's used 48 // to conditionally increment the consecutive failures field, rather 49 // than having parallelly executed failed jobs all increment it 50 firstFailureTime time.Time 51 52 recentErr error 53 recentErrTime time.Time 54 55 staticWorkerObj *worker // name conflict with staticWorker method 56 mu sync.Mutex 57 } 58 59 // workerJob defines a job that the worker is able to perform. 60 workerJob interface { 61 // callDicard will discard this job, sending an error down the response 62 // channel of the job. The provided error should be part of the error 63 // that gets sent. 64 callDiscard(error) 65 66 // callExecute will run the actual job. 67 callExecute() error 68 69 // callExpectedBandwidth will return the amount of bandwidth that a job 70 // expects to consume. 71 callExpectedBandwidth() (upload uint64, download uint64) 72 73 // staticGetMetadata returns a metadata object. 74 staticGetMetadata() interface{} 75 76 // staticCanceled returns true if the job has been canceled, false 77 // otherwise. 78 staticCanceled() bool 79 } 80 81 // workerJobQueue defines an interface to create a worker job queue. 82 workerJobQueue interface { 83 // callDiscardAll will discard all of the jobs in the queue using the 84 // provided error. 85 callDiscardAll(error) 86 87 // callReportFailure should be called on the queue every time that a job 88 // fails, and include the error associated with the failure and the time 89 // at which the job was executed. 90 callReportFailure(error, time.Time, time.Time) 91 92 // callReportSuccess should be called on the queue every time that a job 93 // succeeds. 94 callReportSuccess() 95 96 // callStatus returns the status of the queue 97 callStatus() workerJobQueueStatus 98 99 // staticWorker will return the worker of the job queue. 100 staticWorker() *worker 101 } 102 103 // workerJobQueueStatus is a struct that reflects the status of the queue 104 workerJobQueueStatus struct { 105 size uint64 106 cooldownUntil time.Time 107 consecutiveFailures uint64 108 recentErr error 109 recentErrTime time.Time 110 } 111 ) 112 113 // newJobGeneric returns an initialized jobGeneric. The queue that is associated 114 // with the job should be used as the input to this function. The job will 115 // cancel itself if the cancelChan is closed. 116 func newJobGeneric(ctx context.Context, queue workerJobQueue, metadata interface{}) jobGeneric { 117 return jobGeneric{ 118 staticCtx: ctx, 119 staticQueue: queue, 120 staticMetadata: metadata, 121 } 122 } 123 124 // newJobGenericQueue will return an initialized generic job queue. 125 func newJobGenericQueue(w *worker) *jobGenericQueue { 126 return &jobGenericQueue{ 127 jobs: list.New(), 128 staticWorkerObj: w, 129 } 130 } 131 132 // staticCanceled returns whether or not the job has been canceled. 133 func (j *jobGeneric) staticCanceled() bool { 134 select { 135 case <-j.staticCtx.Done(): 136 return true 137 default: 138 return false 139 } 140 } 141 142 // staticGetMetadata returns the job's metadata. 143 func (j *jobGeneric) staticGetMetadata() interface{} { 144 return j.staticMetadata 145 } 146 147 // add will add a job to the queue. 148 func (jq *jobGenericQueue) add(j workerJob) bool { 149 if jq.killed || jq.onCooldown() { 150 return false 151 } 152 jq.jobs.PushBack(j) 153 jq.staticWorkerObj.staticWake() 154 return true 155 } 156 157 // callAdd will add a job to the queue. 158 func (jq *jobGenericQueue) callAdd(j workerJob) bool { 159 jq.mu.Lock() 160 defer jq.mu.Unlock() 161 return jq.add(j) 162 } 163 164 // callCooldownStatus returns all necessary information to present the queues' cooldown status. 165 func (jq *jobGenericQueue) callCooldownStatus() (bool, bool, int, time.Duration, string) { 166 jq.mu.Lock() 167 defer jq.mu.Unlock() 168 169 var coolDownErrStr string 170 if jq.onCooldown() && jq.recentErr != nil { 171 coolDownErrStr = jq.recentErr.Error() 172 } 173 174 var coolDownUntil time.Duration 175 if jq.onCooldown() { 176 coolDownUntil = time.Until(jq.cooldownUntil) 177 } 178 179 return jq.onCooldown(), jq.killed, jq.jobs.Len(), coolDownUntil, coolDownErrStr 180 } 181 182 // callDiscardAll will discard all jobs in the queue using the provided error. 183 func (jq *jobGenericQueue) callDiscardAll(err error) { 184 jq.mu.Lock() 185 defer jq.mu.Unlock() 186 jq.discardAll(err) 187 } 188 189 // callKill will kill the queue, discarding all jobs and ensuring no more jobs 190 // can be added. 191 func (jq *jobGenericQueue) callKill() { 192 jq.mu.Lock() 193 defer jq.mu.Unlock() 194 195 err := errors.New("worker is being killed") 196 jq.discardAll(err) 197 jq.killed = true 198 } 199 200 // callIsKilled returns whether or not the jobGenericQueue was killed or not 201 func (jq *jobGenericQueue) callIsKilled() bool { 202 jq.mu.Lock() 203 defer jq.mu.Unlock() 204 return jq.killed 205 } 206 207 // callLen returns the number of jobs in the queue. 208 func (jq *jobGenericQueue) callLen() int { 209 jq.mu.Lock() 210 defer jq.mu.Unlock() 211 return jq.jobs.Len() 212 } 213 214 // callNext returns the next job in the worker queue. If there is no job in the 215 // queue, 'nil' will be returned. 216 func (jq *jobGenericQueue) callNext() workerJob { 217 jq.mu.Lock() 218 defer jq.mu.Unlock() 219 220 // Loop through the jobs, looking for the first job that hasn't yet been 221 // canceled. Remove jobs from the queue along the way. 222 for job := jq.jobs.Front(); job != nil; job = job.Next() { 223 // Remove the job from the list. 224 jq.jobs.Remove(job) 225 226 // Check if the job is already canceled. 227 wj := job.Value.(workerJob) 228 if wj.staticCanceled() { 229 wj.callDiscard(errDiscardingCanceledJob) 230 continue 231 } 232 return wj 233 } 234 235 // Job queue is empty, return nil. 236 return nil 237 } 238 239 // callOnCooldown returns whether the queue is on cooldown. 240 func (jq *jobGenericQueue) callOnCooldown() bool { 241 jq.mu.Lock() 242 defer jq.mu.Unlock() 243 return jq.onCooldown() 244 } 245 246 // callReportFailure reports that a job has failed within the queue. This will 247 // cause all remaining jobs in the queue to be discarded, and will put the queue 248 // on cooldown. 249 func (jq *jobGenericQueue) callReportFailure(err error, executedAt, failedAt time.Time) { 250 jq.mu.Lock() 251 defer jq.mu.Unlock() 252 253 // only update the cooldown if we're currently not on cooldown 254 if !jq.onCooldown() { 255 jq.cooldownUntil = cooldownUntil(jq.consecutiveFailures) 256 } 257 258 jq.recentErr = errors.AddContext(err, "discarding all jobs in this queue and going on cooldown") 259 jq.recentErrTime = time.Now() 260 261 // discard all jobs in the queue 262 jq.discardAll(jq.recentErr) 263 264 // only if the job was executed after the time of the first failure we want 265 // to count it as a consective failure, when that is the case we also want 266 // to update the time of the first failure to the current time 267 // 268 // NOTE: this is to ensure multiple concurrent jobs that fail at about the 269 // same time don't all count towards the consecutive failures, causing the 270 // cooldown to go from zero to max immediately 271 if executedAt.After(jq.firstFailureTime) { 272 jq.consecutiveFailures++ 273 jq.firstFailureTime = failedAt 274 } 275 } 276 277 // callReportSuccess lets the job queue know that there was a successsful job. 278 // Note that this will reset the consecutive failure count, but will not reset 279 // the recentErr value - the recentErr value is left as an error so that when 280 // debugging later, developers and users can see what errors had been caused by 281 // past issues. 282 func (jq *jobGenericQueue) callReportSuccess() { 283 jq.mu.Lock() 284 jq.consecutiveFailures = 0 285 jq.firstFailureTime = time.Time{} 286 jq.mu.Unlock() 287 } 288 289 // callStatus returns the queue status 290 func (jq *jobGenericQueue) callStatus() workerJobQueueStatus { 291 jq.mu.Lock() 292 defer jq.mu.Unlock() 293 return workerJobQueueStatus{ 294 size: uint64(jq.jobs.Len()), 295 cooldownUntil: jq.cooldownUntil, 296 consecutiveFailures: jq.consecutiveFailures, 297 recentErr: jq.recentErr, 298 recentErrTime: jq.recentErrTime, 299 } 300 } 301 302 // discardAll will drop all jobs from the queue. 303 func (jq *jobGenericQueue) discardAll(err error) { 304 for job := jq.jobs.Front(); job != nil; job = job.Next() { 305 wj := job.Value.(workerJob) 306 wj.callDiscard(err) 307 } 308 jq.jobs = list.New() 309 } 310 311 // staticWorker will return the worker that is associated with this job queue. 312 func (jq *jobGenericQueue) staticWorker() *worker { 313 return jq.staticWorkerObj 314 } 315 316 // onCooldown returns whether the queue is on cooldown. 317 func (jq *jobGenericQueue) onCooldown() bool { 318 return time.Now().Before(jq.cooldownUntil) 319 }