gitlab.com/SkynetLabs/skyd@v1.6.9/skymodules/renter/uploadchunkdistributionqueue.go (about) 1 package renter 2 3 import ( 4 "container/list" 5 "sync" 6 "time" 7 ) 8 9 // uploadchunkdistributionqueue.go creates a queue for distributing upload 10 // chunks to workers. The queue has two lanes, one for priority upload work and 11 // one for low priority upload work. Priority upload work always goes first if 12 // it's available, but to ensure that low priority work gets at least a minimal 13 // amount of throughput we will bump low priority work in to the priority work 14 // queue if too much priority work gets scheduled while the low priority work is 15 // waiting. 16 17 const ( 18 // uploadChunkDistirbutionBackoff dictates the amount of time that the 19 // distributor will sleep after determining that a chunk is not ready to be 20 // distributed because too many workers are busy. 21 uploadChunkDistributionBackoff = time.Millisecond * 25 22 23 // lowPriorityMinThroughput is the minimum throughput as a ratio that low 24 // priority traffic will have when waiting in the queue. For example, a min 25 // throughput multiplier of 10 means that for every 1 GB of high priority 26 // traffic that gets queued, at least 100 MB of low priority traffic will be 27 // promoted to high priority traffic. 28 // 29 // Raising the min throughput can negatively impact the latency for real 30 // time uploads. A high rate means that users trying to upload new files 31 // will often get stuck waiting for repair traffic that was bumped in 32 // priority. 33 // 34 // If the throughput is too low, repair traffic will have no priority and is 35 // at risk of starving due to lots of new upload traffic. In general, the 36 // best solution for handling high repair traffic is to migrate the current 37 // node to a maintenance server (that is not receiving new uploads) and have 38 // users upload to a fresh node that has very little need of repair traffic. 39 // Increasing the lowPriorityMinThroughput will increase the total amount of 40 // data that a node can maintain at the cost of latency for new uploads. 41 lowPriorityMinThroughputMultiplier = 10 // 10% 42 43 // workerUploadBusyThreshold is the number of jobs a worker needs to have to 44 // be considered busy. A threshold of 1 for example means the worker is 45 // 'busy' if it has 1 upload job or more in its queue. 46 workerUploadBusyThreshold = 1 47 48 // workerUploadOverloadedThreshold is the number of jobs a worker needs to 49 // have to be considered overloaded. A threshold of 3 for example means the 50 // worker is 'overloaded' if there are 3 jobs or more in its queue. 51 workerUploadOverloadedThreshold = 3 52 ) 53 54 // uploadChunkDistributionQueue is a struct which tracks which chunks are queued 55 // to be distributed to workers, and serializes distribution so that one chunk 56 // is added to workers at a time. Distribution is controlled so that workers get 57 // an even balance of work and no single worker ends up with a backlog that can 58 // slow down the whole system. 59 type uploadChunkDistributionQueue struct { 60 processThreadRunning bool 61 62 priorityBuildup uint64 63 priorityLane *ucdqFifo 64 lowPriorityLane *ucdqFifo 65 66 mu sync.Mutex 67 staticRenter *Renter 68 } 69 70 // ucdqFifo implements a fifo to use with the ucdq. 71 type ucdqFifo struct { 72 *list.List 73 } 74 75 // newUploadChunkDistributionQueue will initialize a ucdq for the renter. 76 func newUploadChunkDistributionQueue(r *Renter) *uploadChunkDistributionQueue { 77 return &uploadChunkDistributionQueue{ 78 priorityLane: newUCDQfifo(), 79 lowPriorityLane: newUCDQfifo(), 80 81 staticRenter: r, 82 } 83 } 84 85 // newUCDQfifo inits a fifo for the ucdq. 86 func newUCDQfifo() *ucdqFifo { 87 return &ucdqFifo{ 88 List: list.New(), 89 } 90 } 91 92 // Pop removes and returns the first element in the fifo, removing that element 93 // from the queue. 94 func (u *ucdqFifo) Pop() *unfinishedUploadChunk { 95 mr := u.Front() 96 if mr == nil { 97 return nil 98 } 99 return u.List.Remove(mr).(*unfinishedUploadChunk) 100 } 101 102 // callAddUploadChunk will add an unfinished upload chunk to the queue. The 103 // chunk will be put into a lane based on whether the memory was requested with 104 // priority or not. 105 func (ucdq *uploadChunkDistributionQueue) callAddUploadChunk(uc *unfinishedUploadChunk) { 106 // We need to hold a lock for the whole process of adding an upload chunk. 107 ucdq.mu.Lock() 108 defer ucdq.mu.Unlock() 109 110 // Since we're definitely going to add a chunk to the queue, we also need to 111 // make sure that a processing thread is launched to process it. If there's 112 // already a processing thread running, nothing will happen. If there is not 113 // a processing thread running, we need to set the thread running bool to 114 // true whole still holding the ucdq lock, which is why this function is 115 // deferred to run before the unlock. 116 defer func() { 117 // Check if there is a thread running to process the queue. 118 if !ucdq.processThreadRunning { 119 ucdq.processThreadRunning = true 120 ucdq.staticRenter.tg.Launch(ucdq.threadedProcessQueue) 121 } 122 }() 123 124 // If the chunk is not a priority chunk, put it in the low priority lane. 125 if !uc.staticPriority { 126 ucdq.lowPriorityLane.PushBack(uc) 127 return 128 } 129 130 // If the chunk is a priority chunk, add it in the priority lane and then 131 // determine whether a low priority chunk needs to be bumped to the priority 132 // lane. 133 // 134 // The bumping happens when a priority chunk is added to ensure that low 135 // priority chunks are evenly distributed throughout the high priority 136 // queue, and that a sudden influx of high priority chunks doesn't mean that 137 // low priority chunks will have to wait a long time even if they get 138 // bumped. 139 ucdq.priorityLane.PushBack(uc) 140 if ucdq.lowPriorityLane.Len() == 0 { 141 // No need to worry about priority buildup if there is nothing waiting 142 // in the low priority lane. 143 return 144 } 145 // Tally up the new buildup caused by this new priority chunk. 146 ucdq.priorityBuildup += uc.staticMemoryNeeded 147 148 // Add items from the low priority lane as long as there is enough buildup 149 // to justify bumping them. 150 for x := ucdq.lowPriorityLane.Pop(); x != nil; x = ucdq.lowPriorityLane.Pop() { 151 // If there is buildup, add the item. 152 needed := x.staticMemoryNeeded * lowPriorityMinThroughputMultiplier 153 if ucdq.priorityBuildup >= needed { 154 ucdq.priorityBuildup -= needed 155 ucdq.priorityLane.PushBack(x) 156 continue 157 } 158 // Otherwise return the element. We are done. 159 ucdq.lowPriorityLane.PushFront(x) 160 break 161 } 162 // If all low priority items were bumped into the high priority lane, the 163 // buildup can be cleared out. 164 if ucdq.lowPriorityLane.Len() == 0 { 165 ucdq.priorityBuildup = 0 166 } 167 } 168 169 // threadedProcessQueue serializes the processing of chunks in the distribution 170 // queue. If there are priority chunks, it'll handle those first, and then if 171 // there are no chunks in the priority lane it'll handle things in the low 172 // priority lane. Each lane is treated like a FIFO. 173 // 174 // When things are being pulled out of the low priority lane, the priority 175 // buildup can be reduced because the low priority lane is not starving. 176 // 177 // The general structure of this function is to pull a chunk out of a queue, 178 // then try to distribute the chunk. The distributor function may determine that 179 // the workers are not ready to process the chunk yet. If the distributor 180 // function indicates that a chunk was not distributed, the chunk should go back 181 // into the queue it came out of. Then on the next iteration, we will grab the 182 // highest priority chunk. 183 func (ucdq *uploadChunkDistributionQueue) threadedProcessQueue() { 184 for { 185 // Check whether the renter has shut down, return immediately if so. 186 select { 187 case <-ucdq.staticRenter.tg.StopChan(): 188 return 189 default: 190 } 191 192 // Extract the next item in the queue. 193 ucdq.mu.Lock() 194 // First check for the exit condition - the queue is empty. While 195 // holding the lock, release the process bool and then exit. 196 if ucdq.priorityLane.Len() == 0 && ucdq.lowPriorityLane.Len() == 0 { 197 ucdq.processThreadRunning = false 198 ucdq.mu.Unlock() 199 return 200 } 201 // At least one uc exists in the queue. Prefer to grab the priority one, 202 // if there is no priority one grab the low priority one. We need to 203 // remember which lane the uc came from because we may need to put it 204 // back into that lane later. 205 var nextUC *unfinishedUploadChunk 206 var priority bool 207 if ucdq.priorityLane.Len() > 0 { 208 nextUC = ucdq.priorityLane.Pop() 209 priority = true 210 } else { 211 nextUC = ucdq.lowPriorityLane.Pop() 212 priority = false 213 } 214 ucdq.mu.Unlock() 215 216 // Simulate chunk distribution but don't actually distribute it. 217 var distributed bool 218 if ucdq.staticRenter.staticDeps.Disrupt("DelayChunkDistribution") { 219 time.Sleep(time.Second) 220 distributed = true 221 } else { 222 // While not holding the lock but still blocking, pass the chunk off to 223 // the thread that will distribute the chunk to workers. This call can 224 // fail. If the call failed, the chunk should be re-inserted into the 225 // front of the low prior heap IFF the chunk was a low prio chunk. 226 distributed = ucdq.staticRenter.managedDistributeChunkToWorkers(nextUC) 227 } 228 229 // If the chunk was not distributed, we want to block briefly to give 230 // the workers time to process the items in their queue. The only reason 231 // that a chunk will not be distributed is because workers have too much 232 // work in their queue already. 233 if !distributed { 234 // NOTE: This could potentially be improved by switching it to a channel 235 // that waits for new chunks to appear or waits for busy/overloaded workers 236 // to report a better state. We opted not to do that here because 25ms is 237 // not a huge penalty to pay and there's a fair amount of complexity 238 // involved in switching to a better solution. 239 ucdq.staticRenter.tg.Sleep(uploadChunkDistributionBackoff) 240 } 241 if distributed && priority { 242 // If the chunk was distributed successfully and we pulled the chunk 243 // from the priority lane, there is nothing more to do. 244 continue 245 } 246 if distributed && !priority { 247 // If the chunk was distributed successfully and we pulled the chunk 248 // from the low priority lane, we need to subtract from the priority 249 // buildup as the low priority lane has made progress. 250 ucdq.mu.Lock() 251 needed := nextUC.staticMemoryNeeded * lowPriorityMinThroughputMultiplier 252 if ucdq.priorityBuildup < needed { 253 ucdq.priorityBuildup = 0 254 } else { 255 ucdq.priorityBuildup -= needed 256 } 257 ucdq.mu.Unlock() 258 continue 259 } 260 if !distributed && priority { 261 // If the chunk was not distributed, we need to push it back to the 262 // front of the priority lane and then cycle again. 263 ucdq.mu.Lock() 264 ucdq.priorityLane.PushFront(nextUC) 265 ucdq.mu.Unlock() 266 continue 267 } 268 if !distributed && !priority { 269 // If the chunk was not distributed, push it back into the front of 270 // the low priority lane. The next iteration may grab a high 271 // priority chunk if a new high prio chunk has appeared while we 272 // were checking on this chunk. 273 ucdq.mu.Lock() 274 ucdq.lowPriorityLane.PushFront(nextUC) 275 ucdq.mu.Unlock() 276 continue 277 } 278 panic("missing case, this code should not be reachable") 279 } 280 } 281 282 // managedDistributeChunkToWorkers is a function which will attempt to 283 // distribute the chunk to workers for upload. If the distribution is 284 // successful, it will return true. If the distribution is not successful, it 285 // will return false, indicating that distribution needs to be retried. 286 func (r *Renter) managedDistributeChunkToWorkers(uc *unfinishedUploadChunk) bool { 287 // Grab the best set of workers to receive this chunk. This function may 288 // take a significant amount of time to return, as it will wait until there 289 // are enough workers available to accept the chunk. This waiting pressure 290 // keeps throughput high because most workers will continue to be busy all 291 // the time, but it also significantly improves latency for high priority 292 // chunks because the distribution queue can ensure that priority chunks 293 // always get to the front of the worker line. 294 workers, finalized := r.managedFindBestUploadWorkerSet(uc) 295 if !finalized { 296 return false 297 } 298 299 // Give the chunk to each worker, marking the number of workers that have 300 // received the chunk. Only count the worker if the worker's upload queue 301 // accepts the job. 302 uc.managedIncreaseRemainingWorkers(len(workers)) 303 jobsDistributed := 0 304 for _, w := range workers { 305 if w.callQueueUploadChunk(uc) { 306 jobsDistributed++ 307 } 308 } 309 310 uc.managedUpdateDistributionTime() 311 r.staticRepairLog.Printf("Distributed chunk %v of %s to %v workers.", uc.staticIndex, uc.staticSiaPath, jobsDistributed) 312 // Cleanup is required after distribution to ensure that memory is released 313 // for any pieces which don't have a worker. 314 r.managedCleanUpUploadChunk(uc) 315 return true 316 } 317 318 // managedFindBestUploadWorkerSet will look through the set of available workers 319 // and hand-pick the workers that should be used for the upload chunk. It may 320 // also choose to wait until more workers are available, which means this 321 // function can potentially block for long periods of time. 322 func (r *Renter) managedFindBestUploadWorkerSet(uc *unfinishedUploadChunk) ([]*worker, bool) { 323 // Grab the set of workers to upload. If 'finalized' is false, it means 324 // that all of the good workers are already busy, and we need to wait 325 // before distributing the chunk. 326 workers, finalized := managedSelectWorkersForUploading(uc, r.staticWorkerPool.callWorkers()) 327 if finalized { 328 return workers, true 329 } 330 return nil, false 331 } 332 333 // managedSelectWorkersForUploading is a function that will select workers to be 334 // used in uploading a chunk to the network. This function can fail if there are 335 // not enough workers that are ready to take on more work, in which case the 336 // caller needs to wait before trying again. 337 // 338 // This function is meant to only be called by 'managedFindBestUploadWorkerSet', 339 // which handles the retry mechanism for you. The functions are split up this 340 // way to make the retry logic easier to understand. 341 func managedSelectWorkersForUploading(uc *unfinishedUploadChunk, workers []*worker) ([]*worker, bool) { 342 r := uc.staticRenter 343 344 // Scan through the workers and determine how many workers have available 345 // slots to upload. Available workers and busy workers are both counted as 346 // viable candidates for receiving work. 347 var availableWorkers, busyWorkers, overloadedWorkers uint64 348 var nOnCooldown, nGFU int 349 totalWorkers := len(workers) 350 for _, w := range workers { 351 // Skip any worker that is on cooldown or is !GFU. 352 cache := w.staticCache() 353 w.mu.Lock() 354 onCooldown, _ := w.onUploadCooldown() 355 numUnprocessedChunks := w.unprocessedChunks.Len() 356 w.mu.Unlock() 357 gfu := cache.staticContractUtility.GoodForUpload 358 if onCooldown { 359 nOnCooldown++ 360 continue 361 } 362 if !gfu { 363 nGFU++ 364 continue 365 } 366 367 // Count the worker by status. A worker is 'available', 'busy', or 368 // 'overloaded' depending on how many jobs it has in its upload queue. 369 // Only available and busy workers are candidates to receive the 370 // unfinished chunk. 371 if numUnprocessedChunks < workerUploadBusyThreshold { 372 availableWorkers++ 373 } else if numUnprocessedChunks < workerUploadOverloadedThreshold { 374 busyWorkers++ 375 } else { 376 overloadedWorkers++ 377 continue 378 } 379 workers[availableWorkers+busyWorkers-1] = w 380 } 381 // Truncate the set of workers to only include the available and busy 382 // workers that were added to the front of the queue while counting workers. 383 workers = workers[:availableWorkers+busyWorkers] 384 385 // Distribute the upload depending on the number of pieces and the number of 386 // workers. We want to handle every edge case where there are more pieces 387 // than workers total, which means that waiting is not going to improve the 388 // situation. 389 if availableWorkers >= uint64(uc.staticMinimumPieces) && availableWorkers+busyWorkers >= uint64(uc.staticPiecesNeeded) { 390 // This is the base success case. We have enough available workers to 391 // get the chunk 'available' on the Sia network ASAP, and we have enough 392 // busy workers to complete the chunk all the way. 393 return workers, true 394 } 395 if availableWorkers >= uint64(uc.staticMinimumPieces) && overloadedWorkers == 0 { 396 // This is an edge case where there are no overloaded workers, and there 397 // are enough available workers to make the chunk available on the Sia 398 // network. Because there are no overloaded workers, waiting longer is 399 // not going to allow us to make more progress, so we need to accept 400 // this chunk as-is. 401 r.staticLog.Printf("WARN: uploading chunk %v even though we don't have enough workers to get the chunk to full health: total %v available: %v busy: %v overloaded: %v !gfu: %v onCooldown: %v", uc.id, totalWorkers, availableWorkers, busyWorkers, overloadedWorkers, nGFU, nOnCooldown) 402 return workers, true 403 } 404 if len(workers) > 0 && overloadedWorkers == 0 && busyWorkers == 0 { 405 // This is the worst of the success cases. It means we don't even have 406 // enough workers to make the chunk available on the network, but all 407 // the workers that we do have are available. Even though this is a bad 408 // place to be, the right thing to do is move forward with the upload. 409 r.staticLog.Printf("WARN: uploading chunk %v even though we don't have enough workers to make chunk available: total: %v available: %v busy: %v overloaded: %v !gfu: %v onCooldown: %v", uc.id, totalWorkers, availableWorkers, busyWorkers, overloadedWorkers, nGFU, nOnCooldown) 410 return workers, true 411 } 412 413 // In all other cases, we should wait until either some busy workers have 414 // processed enough chunks to become available workers, or until some 415 // overloaded workers have processed enough chunks to become busy workers, 416 // or both. 417 return nil, false 418 }