gitlab.com/SkynetLabs/skyd@v1.6.9/skymodules/renter/uploadchunkdistributionqueue.go (about)

     1  package renter
     2  
     3  import (
     4  	"container/list"
     5  	"sync"
     6  	"time"
     7  )
     8  
     9  // uploadchunkdistributionqueue.go creates a queue for distributing upload
    10  // chunks to workers. The queue has two lanes, one for priority upload work and
    11  // one for low priority upload work. Priority upload work always goes first if
    12  // it's available, but to ensure that low priority work gets at least a minimal
    13  // amount of throughput we will bump low priority work in to the priority work
    14  // queue if too much priority work gets scheduled while the low priority work is
    15  // waiting.
    16  
    17  const (
    18  	// uploadChunkDistirbutionBackoff dictates the amount of time that the
    19  	// distributor will sleep after determining that a chunk is not ready to be
    20  	// distributed because too many workers are busy.
    21  	uploadChunkDistributionBackoff = time.Millisecond * 25
    22  
    23  	// lowPriorityMinThroughput is the minimum throughput as a ratio that low
    24  	// priority traffic will have when waiting in the queue. For example, a min
    25  	// throughput multiplier of 10 means that for every 1 GB of high priority
    26  	// traffic that gets queued, at least 100 MB of low priority traffic will be
    27  	// promoted to high priority traffic.
    28  	//
    29  	// Raising the min throughput can negatively impact the latency for real
    30  	// time uploads. A high rate means that users trying to upload new files
    31  	// will often get stuck waiting for repair traffic that was bumped in
    32  	// priority.
    33  	//
    34  	// If the throughput is too low, repair traffic will have no priority and is
    35  	// at risk of starving due to lots of new upload traffic. In general, the
    36  	// best solution for handling high repair traffic is to migrate the current
    37  	// node to a maintenance server (that is not receiving new uploads) and have
    38  	// users upload to a fresh node that has very little need of repair traffic.
    39  	// Increasing the lowPriorityMinThroughput will increase the total amount of
    40  	// data that a node can maintain at the cost of latency for new uploads.
    41  	lowPriorityMinThroughputMultiplier = 10 // 10%
    42  
    43  	// workerUploadBusyThreshold is the number of jobs a worker needs to have to
    44  	// be considered busy. A threshold of 1 for example means the worker is
    45  	// 'busy' if it has 1 upload job or more in its queue.
    46  	workerUploadBusyThreshold = 1
    47  
    48  	// workerUploadOverloadedThreshold is the number of jobs a worker needs to
    49  	// have to be considered overloaded. A threshold of 3 for example means the
    50  	// worker is 'overloaded' if there are 3 jobs or more in its queue.
    51  	workerUploadOverloadedThreshold = 3
    52  )
    53  
    54  // uploadChunkDistributionQueue is a struct which tracks which chunks are queued
    55  // to be distributed to workers, and serializes distribution so that one chunk
    56  // is added to workers at a time. Distribution is controlled so that workers get
    57  // an even balance of work and no single worker ends up with a backlog that can
    58  // slow down the whole system.
    59  type uploadChunkDistributionQueue struct {
    60  	processThreadRunning bool
    61  
    62  	priorityBuildup uint64
    63  	priorityLane    *ucdqFifo
    64  	lowPriorityLane *ucdqFifo
    65  
    66  	mu           sync.Mutex
    67  	staticRenter *Renter
    68  }
    69  
    70  // ucdqFifo implements a fifo to use with the ucdq.
    71  type ucdqFifo struct {
    72  	*list.List
    73  }
    74  
    75  // newUploadChunkDistributionQueue will initialize a ucdq for the renter.
    76  func newUploadChunkDistributionQueue(r *Renter) *uploadChunkDistributionQueue {
    77  	return &uploadChunkDistributionQueue{
    78  		priorityLane:    newUCDQfifo(),
    79  		lowPriorityLane: newUCDQfifo(),
    80  
    81  		staticRenter: r,
    82  	}
    83  }
    84  
    85  // newUCDQfifo inits a fifo for the ucdq.
    86  func newUCDQfifo() *ucdqFifo {
    87  	return &ucdqFifo{
    88  		List: list.New(),
    89  	}
    90  }
    91  
    92  // Pop removes and returns the first element in the fifo, removing that element
    93  // from the queue.
    94  func (u *ucdqFifo) Pop() *unfinishedUploadChunk {
    95  	mr := u.Front()
    96  	if mr == nil {
    97  		return nil
    98  	}
    99  	return u.List.Remove(mr).(*unfinishedUploadChunk)
   100  }
   101  
   102  // callAddUploadChunk will add an unfinished upload chunk to the queue. The
   103  // chunk will be put into a lane based on whether the memory was requested with
   104  // priority or not.
   105  func (ucdq *uploadChunkDistributionQueue) callAddUploadChunk(uc *unfinishedUploadChunk) {
   106  	// We need to hold a lock for the whole process of adding an upload chunk.
   107  	ucdq.mu.Lock()
   108  	defer ucdq.mu.Unlock()
   109  
   110  	// Since we're definitely going to add a chunk to the queue, we also need to
   111  	// make sure that a processing thread is launched to process it. If there's
   112  	// already a processing thread running, nothing will happen. If there is not
   113  	// a processing thread running, we need to set the thread running bool to
   114  	// true whole still holding the ucdq lock, which is why this function is
   115  	// deferred to run before the unlock.
   116  	defer func() {
   117  		// Check if there is a thread running to process the queue.
   118  		if !ucdq.processThreadRunning {
   119  			ucdq.processThreadRunning = true
   120  			ucdq.staticRenter.tg.Launch(ucdq.threadedProcessQueue)
   121  		}
   122  	}()
   123  
   124  	// If the chunk is not a priority chunk, put it in the low priority lane.
   125  	if !uc.staticPriority {
   126  		ucdq.lowPriorityLane.PushBack(uc)
   127  		return
   128  	}
   129  
   130  	// If the chunk is a priority chunk, add it in the priority lane and then
   131  	// determine whether a low priority chunk needs to be bumped to the priority
   132  	// lane.
   133  	//
   134  	// The bumping happens when a priority chunk is added to ensure that low
   135  	// priority chunks are evenly distributed throughout the high priority
   136  	// queue, and that a sudden influx of high priority chunks doesn't mean that
   137  	// low priority chunks will have to wait a long time even if they get
   138  	// bumped.
   139  	ucdq.priorityLane.PushBack(uc)
   140  	if ucdq.lowPriorityLane.Len() == 0 {
   141  		// No need to worry about priority buildup if there is nothing waiting
   142  		// in the low priority lane.
   143  		return
   144  	}
   145  	// Tally up the new buildup caused by this new priority chunk.
   146  	ucdq.priorityBuildup += uc.staticMemoryNeeded
   147  
   148  	// Add items from the low priority lane as long as there is enough buildup
   149  	// to justify bumping them.
   150  	for x := ucdq.lowPriorityLane.Pop(); x != nil; x = ucdq.lowPriorityLane.Pop() {
   151  		// If there is buildup, add the item.
   152  		needed := x.staticMemoryNeeded * lowPriorityMinThroughputMultiplier
   153  		if ucdq.priorityBuildup >= needed {
   154  			ucdq.priorityBuildup -= needed
   155  			ucdq.priorityLane.PushBack(x)
   156  			continue
   157  		}
   158  		// Otherwise return the element. We are done.
   159  		ucdq.lowPriorityLane.PushFront(x)
   160  		break
   161  	}
   162  	// If all low priority items were bumped into the high priority lane, the
   163  	// buildup can be cleared out.
   164  	if ucdq.lowPriorityLane.Len() == 0 {
   165  		ucdq.priorityBuildup = 0
   166  	}
   167  }
   168  
   169  // threadedProcessQueue serializes the processing of chunks in the distribution
   170  // queue. If there are priority chunks, it'll handle those first, and then if
   171  // there are no chunks in the priority lane it'll handle things in the low
   172  // priority lane. Each lane is treated like a FIFO.
   173  //
   174  // When things are being pulled out of the low priority lane, the priority
   175  // buildup can be reduced because the low priority lane is not starving.
   176  //
   177  // The general structure of this function is to pull a chunk out of a queue,
   178  // then try to distribute the chunk. The distributor function may determine that
   179  // the workers are not ready to process the chunk yet. If the distributor
   180  // function indicates that a chunk was not distributed, the chunk should go back
   181  // into the queue it came out of. Then on the next iteration, we will grab the
   182  // highest priority chunk.
   183  func (ucdq *uploadChunkDistributionQueue) threadedProcessQueue() {
   184  	for {
   185  		// Check whether the renter has shut down, return immediately if so.
   186  		select {
   187  		case <-ucdq.staticRenter.tg.StopChan():
   188  			return
   189  		default:
   190  		}
   191  
   192  		// Extract the next item in the queue.
   193  		ucdq.mu.Lock()
   194  		// First check for the exit condition - the queue is empty. While
   195  		// holding the lock, release the process bool and then exit.
   196  		if ucdq.priorityLane.Len() == 0 && ucdq.lowPriorityLane.Len() == 0 {
   197  			ucdq.processThreadRunning = false
   198  			ucdq.mu.Unlock()
   199  			return
   200  		}
   201  		// At least one uc exists in the queue. Prefer to grab the priority one,
   202  		// if there is no priority one grab the low priority one. We need to
   203  		// remember which lane the uc came from because we may need to put it
   204  		// back into that lane later.
   205  		var nextUC *unfinishedUploadChunk
   206  		var priority bool
   207  		if ucdq.priorityLane.Len() > 0 {
   208  			nextUC = ucdq.priorityLane.Pop()
   209  			priority = true
   210  		} else {
   211  			nextUC = ucdq.lowPriorityLane.Pop()
   212  			priority = false
   213  		}
   214  		ucdq.mu.Unlock()
   215  
   216  		// Simulate chunk distribution but don't actually distribute it.
   217  		var distributed bool
   218  		if ucdq.staticRenter.staticDeps.Disrupt("DelayChunkDistribution") {
   219  			time.Sleep(time.Second)
   220  			distributed = true
   221  		} else {
   222  			// While not holding the lock but still blocking, pass the chunk off to
   223  			// the thread that will distribute the chunk to workers. This call can
   224  			// fail. If the call failed, the chunk should be re-inserted into the
   225  			// front of the low prior heap IFF the chunk was a low prio chunk.
   226  			distributed = ucdq.staticRenter.managedDistributeChunkToWorkers(nextUC)
   227  		}
   228  
   229  		// If the chunk was not distributed, we want to block briefly to give
   230  		// the workers time to process the items in their queue. The only reason
   231  		// that a chunk will not be distributed is because workers have too much
   232  		// work in their queue already.
   233  		if !distributed {
   234  			// NOTE: This could potentially be improved by switching it to a channel
   235  			// that waits for new chunks to appear or waits for busy/overloaded workers
   236  			// to report a better state. We opted not to do that here because 25ms is
   237  			// not a huge penalty to pay and there's a fair amount of complexity
   238  			// involved in switching to a better solution.
   239  			ucdq.staticRenter.tg.Sleep(uploadChunkDistributionBackoff)
   240  		}
   241  		if distributed && priority {
   242  			// If the chunk was distributed successfully and we pulled the chunk
   243  			// from the priority lane, there is nothing more to do.
   244  			continue
   245  		}
   246  		if distributed && !priority {
   247  			// If the chunk was distributed successfully and we pulled the chunk
   248  			// from the low priority lane, we need to subtract from the priority
   249  			// buildup as the low priority lane has made progress.
   250  			ucdq.mu.Lock()
   251  			needed := nextUC.staticMemoryNeeded * lowPriorityMinThroughputMultiplier
   252  			if ucdq.priorityBuildup < needed {
   253  				ucdq.priorityBuildup = 0
   254  			} else {
   255  				ucdq.priorityBuildup -= needed
   256  			}
   257  			ucdq.mu.Unlock()
   258  			continue
   259  		}
   260  		if !distributed && priority {
   261  			// If the chunk was not distributed, we need to push it back to the
   262  			// front of the priority lane and then cycle again.
   263  			ucdq.mu.Lock()
   264  			ucdq.priorityLane.PushFront(nextUC)
   265  			ucdq.mu.Unlock()
   266  			continue
   267  		}
   268  		if !distributed && !priority {
   269  			// If the chunk was not distributed, push it back into the front of
   270  			// the low priority lane. The next iteration may grab a high
   271  			// priority chunk if a new high prio chunk has appeared while we
   272  			// were checking on this chunk.
   273  			ucdq.mu.Lock()
   274  			ucdq.lowPriorityLane.PushFront(nextUC)
   275  			ucdq.mu.Unlock()
   276  			continue
   277  		}
   278  		panic("missing case, this code should not be reachable")
   279  	}
   280  }
   281  
   282  // managedDistributeChunkToWorkers is a function which will attempt to
   283  // distribute the chunk to workers for upload. If the distribution is
   284  // successful, it will return true. If the distribution is not successful, it
   285  // will return false, indicating that distribution needs to be retried.
   286  func (r *Renter) managedDistributeChunkToWorkers(uc *unfinishedUploadChunk) bool {
   287  	// Grab the best set of workers to receive this chunk. This function may
   288  	// take a significant amount of time to return, as it will wait until there
   289  	// are enough workers available to accept the chunk. This waiting pressure
   290  	// keeps throughput high because most workers will continue to be busy all
   291  	// the time, but it also significantly improves latency for high priority
   292  	// chunks because the distribution queue can ensure that priority chunks
   293  	// always get to the front of the worker line.
   294  	workers, finalized := r.managedFindBestUploadWorkerSet(uc)
   295  	if !finalized {
   296  		return false
   297  	}
   298  
   299  	// Give the chunk to each worker, marking the number of workers that have
   300  	// received the chunk. Only count the worker if the worker's upload queue
   301  	// accepts the job.
   302  	uc.managedIncreaseRemainingWorkers(len(workers))
   303  	jobsDistributed := 0
   304  	for _, w := range workers {
   305  		if w.callQueueUploadChunk(uc) {
   306  			jobsDistributed++
   307  		}
   308  	}
   309  
   310  	uc.managedUpdateDistributionTime()
   311  	r.staticRepairLog.Printf("Distributed chunk %v of %s to %v workers.", uc.staticIndex, uc.staticSiaPath, jobsDistributed)
   312  	// Cleanup is required after distribution to ensure that memory is released
   313  	// for any pieces which don't have a worker.
   314  	r.managedCleanUpUploadChunk(uc)
   315  	return true
   316  }
   317  
   318  // managedFindBestUploadWorkerSet will look through the set of available workers
   319  // and hand-pick the workers that should be used for the upload chunk. It may
   320  // also choose to wait until more workers are available, which means this
   321  // function can potentially block for long periods of time.
   322  func (r *Renter) managedFindBestUploadWorkerSet(uc *unfinishedUploadChunk) ([]*worker, bool) {
   323  	// Grab the set of workers to upload. If 'finalized' is false, it means
   324  	// that all of the good workers are already busy, and we need to wait
   325  	// before distributing the chunk.
   326  	workers, finalized := managedSelectWorkersForUploading(uc, r.staticWorkerPool.callWorkers())
   327  	if finalized {
   328  		return workers, true
   329  	}
   330  	return nil, false
   331  }
   332  
   333  // managedSelectWorkersForUploading is a function that will select workers to be
   334  // used in uploading a chunk to the network. This function can fail if there are
   335  // not enough workers that are ready to take on more work, in which case the
   336  // caller needs to wait before trying again.
   337  //
   338  // This function is meant to only be called by 'managedFindBestUploadWorkerSet',
   339  // which handles the retry mechanism for you. The functions are split up this
   340  // way to make the retry logic easier to understand.
   341  func managedSelectWorkersForUploading(uc *unfinishedUploadChunk, workers []*worker) ([]*worker, bool) {
   342  	r := uc.staticRenter
   343  
   344  	// Scan through the workers and determine how many workers have available
   345  	// slots to upload. Available workers and busy workers are both counted as
   346  	// viable candidates for receiving work.
   347  	var availableWorkers, busyWorkers, overloadedWorkers uint64
   348  	var nOnCooldown, nGFU int
   349  	totalWorkers := len(workers)
   350  	for _, w := range workers {
   351  		// Skip any worker that is on cooldown or is !GFU.
   352  		cache := w.staticCache()
   353  		w.mu.Lock()
   354  		onCooldown, _ := w.onUploadCooldown()
   355  		numUnprocessedChunks := w.unprocessedChunks.Len()
   356  		w.mu.Unlock()
   357  		gfu := cache.staticContractUtility.GoodForUpload
   358  		if onCooldown {
   359  			nOnCooldown++
   360  			continue
   361  		}
   362  		if !gfu {
   363  			nGFU++
   364  			continue
   365  		}
   366  
   367  		// Count the worker by status. A worker is 'available', 'busy', or
   368  		// 'overloaded' depending on how many jobs it has in its upload queue.
   369  		// Only available and busy workers are candidates to receive the
   370  		// unfinished chunk.
   371  		if numUnprocessedChunks < workerUploadBusyThreshold {
   372  			availableWorkers++
   373  		} else if numUnprocessedChunks < workerUploadOverloadedThreshold {
   374  			busyWorkers++
   375  		} else {
   376  			overloadedWorkers++
   377  			continue
   378  		}
   379  		workers[availableWorkers+busyWorkers-1] = w
   380  	}
   381  	// Truncate the set of workers to only include the available and busy
   382  	// workers that were added to the front of the queue while counting workers.
   383  	workers = workers[:availableWorkers+busyWorkers]
   384  
   385  	// Distribute the upload depending on the number of pieces and the number of
   386  	// workers. We want to handle every edge case where there are more pieces
   387  	// than workers total, which means that waiting is not going to improve the
   388  	// situation.
   389  	if availableWorkers >= uint64(uc.staticMinimumPieces) && availableWorkers+busyWorkers >= uint64(uc.staticPiecesNeeded) {
   390  		// This is the base success case. We have enough available workers to
   391  		// get the chunk 'available' on the Sia network ASAP, and we have enough
   392  		// busy workers to complete the chunk all the way.
   393  		return workers, true
   394  	}
   395  	if availableWorkers >= uint64(uc.staticMinimumPieces) && overloadedWorkers == 0 {
   396  		// This is an edge case where there are no overloaded workers, and there
   397  		// are enough available workers to make the chunk available on the Sia
   398  		// network. Because there are no overloaded workers, waiting longer is
   399  		// not going to allow us to make more progress, so we need to accept
   400  		// this chunk as-is.
   401  		r.staticLog.Printf("WARN: uploading chunk %v even though we don't have enough workers to get the chunk to full health: total %v available: %v busy: %v overloaded: %v !gfu: %v onCooldown: %v", uc.id, totalWorkers, availableWorkers, busyWorkers, overloadedWorkers, nGFU, nOnCooldown)
   402  		return workers, true
   403  	}
   404  	if len(workers) > 0 && overloadedWorkers == 0 && busyWorkers == 0 {
   405  		// This is the worst of the success cases. It means we don't even have
   406  		// enough workers to make the chunk available on the network, but all
   407  		// the workers that we do have are available. Even though this is a bad
   408  		// place to be, the right thing to do is move forward with the upload.
   409  		r.staticLog.Printf("WARN: uploading chunk %v even though we don't have enough workers to make chunk available: total: %v available: %v busy: %v overloaded: %v !gfu: %v onCooldown: %v", uc.id, totalWorkers, availableWorkers, busyWorkers, overloadedWorkers, nGFU, nOnCooldown)
   410  		return workers, true
   411  	}
   412  
   413  	// In all other cases, we should wait until either some busy workers have
   414  	// processed enough chunks to become available workers, or until some
   415  	// overloaded workers have processed enough chunks to become busy workers,
   416  	// or both.
   417  	return nil, false
   418  }