github.com/fozzysec/SiaPrime@v0.0.0-20190612043147-66c8e8d11fe3/modules/renter/downloadheap.go (about)

     1  package renter
     2  
     3  // The download heap is a heap that contains all the chunks that we are trying
     4  // to download, sorted by download priority. Each time there are resources
     5  // available to kick off another download, a chunk is popped off the heap,
     6  // prepared for downloading, and then sent off to the workers.
     7  //
     8  // Download jobs are added to the heap via a function call.
     9  
    10  import (
    11  	"container/heap"
    12  	"errors"
    13  	"time"
    14  )
    15  
    16  var (
    17  	errDownloadRenterClosed = errors.New("download could not be scheduled because renter is shutting down")
    18  	errInsufficientHosts    = errors.New("insufficient hosts to recover file")
    19  	errInsufficientPieces   = errors.New("couldn't fetch enough pieces to recover data")
    20  	errPrevErr              = errors.New("download could not be completed due to a previous error")
    21  )
    22  
    23  // downloadChunkHeap is a heap that is sorted first by file priority, then by
    24  // the start time of the download, and finally by the index of the chunk.  As
    25  // downloads are queued, they are added to the downloadChunkHeap. As resources
    26  // become available to execute downloads, chunks are pulled off of the heap and
    27  // distributed to workers.
    28  type downloadChunkHeap []*unfinishedDownloadChunk
    29  
    30  // Implementation of heap.Interface for downloadChunkHeap.
    31  func (dch downloadChunkHeap) Len() int { return len(dch) }
    32  func (dch downloadChunkHeap) Less(i, j int) bool {
    33  	// First sort by priority.
    34  	if dch[i].staticPriority != dch[j].staticPriority {
    35  		return dch[i].staticPriority > dch[j].staticPriority
    36  	}
    37  	// For equal priority, sort by start time.
    38  	if dch[i].download.staticStartTime != dch[j].download.staticStartTime {
    39  		return dch[i].download.staticStartTime.Before(dch[j].download.staticStartTime)
    40  	}
    41  	// For equal start time (typically meaning it's the same file), sort by
    42  	// chunkIndex.
    43  	//
    44  	// NOTE: To prevent deadlocks when acquiring memory and using writers that
    45  	// will streamline / order different chunks, we must make sure that we sort
    46  	// by chunkIndex such that the earlier chunks are selected first from the
    47  	// heap.
    48  	return dch[i].staticChunkIndex < dch[j].staticChunkIndex
    49  }
    50  func (dch downloadChunkHeap) Swap(i, j int)       { dch[i], dch[j] = dch[j], dch[i] }
    51  func (dch *downloadChunkHeap) Push(x interface{}) { *dch = append(*dch, x.(*unfinishedDownloadChunk)) }
    52  func (dch *downloadChunkHeap) Pop() interface{} {
    53  	old := *dch
    54  	n := len(old)
    55  	x := old[n-1]
    56  	*dch = old[0 : n-1]
    57  	return x
    58  }
    59  
    60  // acquireMemoryForDownloadChunk will block until memory is available for the
    61  // chunk to be downloaded. 'false' will be returned if the renter shuts down
    62  // before memory can be acquired.
    63  func (r *Renter) managedAcquireMemoryForDownloadChunk(udc *unfinishedDownloadChunk) bool {
    64  	// The amount of memory required is equal minimum number of pieces plus the
    65  	// overdrive amount.
    66  	//
    67  	// TODO: This allocation assumes that the erasure coding does not need extra
    68  	// memory to decode a bunch of pieces. Optimized erasure coding will not
    69  	// need extra memory to decode a bunch of pieces, though I do not believe
    70  	// our erasure coding has been optimized around this yet, so we may actually
    71  	// go over the memory limits when we decode pieces.
    72  	memoryRequired := uint64(udc.staticOverdrive+udc.erasureCode.MinPieces()) * udc.staticPieceSize
    73  	udc.memoryAllocated = memoryRequired
    74  	return r.memoryManager.Request(memoryRequired, memoryPriorityHigh)
    75  }
    76  
    77  // managedAddChunkToDownloadHeap will add a chunk to the download heap in a
    78  // thread-safe way.
    79  func (r *Renter) managedAddChunkToDownloadHeap(udc *unfinishedDownloadChunk) {
    80  	// The purpose of the chunk heap is to block work from happening until there
    81  	// is enough memory available to send off the work. If the chunk does not
    82  	// need any memory to be allocated, it should be given to the workers
    83  	// directly and immediately. This is actually a requirement in our memory
    84  	// model. If a download chunk does not need memory, that means that the
    85  	// memory has already been allocated and will actually be blocking new
    86  	// memory from being allocated until the download is complete. If the job is
    87  	// put in the heap and ends up behind a job which get stuck allocating
    88  	// memory, you get a deadlock.
    89  	//
    90  	// This is functionally equivalent to putting the chunk in the heap with
    91  	// maximum priority, such that the chunk is immediately removed from the
    92  	// heap and distributed to workers - the sole purpose of the heap is to
    93  	// block workers from receiving a chunk until memory has been allocated.
    94  	if !udc.staticNeedsMemory {
    95  		r.managedDistributeDownloadChunkToWorkers(udc)
    96  		return
    97  	}
    98  
    99  	// Put the chunk into the chunk heap.
   100  	r.downloadHeapMu.Lock()
   101  	r.downloadHeap.Push(udc)
   102  	r.downloadHeapMu.Unlock()
   103  }
   104  
   105  // managedBlockUntilOnline will block until the renter is online. The renter
   106  // will appropriately handle incoming download requests and stop signals while
   107  // waiting.
   108  func (r *Renter) managedBlockUntilOnline() bool {
   109  	for !r.g.Online() {
   110  		select {
   111  		case <-r.tg.StopChan():
   112  			return false
   113  		case <-time.After(offlineCheckFrequency):
   114  		}
   115  	}
   116  	return true
   117  }
   118  
   119  // managedDistributeDownloadChunkToWorkers will take a chunk and pass it out to
   120  // all of the workers.
   121  func (r *Renter) managedDistributeDownloadChunkToWorkers(udc *unfinishedDownloadChunk) {
   122  	// Distribute the chunk to workers, marking the number of workers
   123  	// that have received the work.
   124  	id := r.mu.Lock()
   125  	udc.mu.Lock()
   126  	udc.workersRemaining = len(r.workerPool)
   127  	udc.mu.Unlock()
   128  	for _, worker := range r.workerPool {
   129  		worker.managedQueueDownloadChunk(udc)
   130  	}
   131  	r.mu.Unlock(id)
   132  
   133  	// If there are no workers, there will be no workers to attempt to clean up
   134  	// the chunk, so we must make sure that managedCleanUp is called at least
   135  	// once on the chunk.
   136  	udc.managedCleanUp()
   137  }
   138  
   139  // managedNextDownloadChunk will fetch the next chunk from the download heap. If
   140  // the download heap is empty, 'nil' will be returned.
   141  func (r *Renter) managedNextDownloadChunk() *unfinishedDownloadChunk {
   142  	r.downloadHeapMu.Lock()
   143  	defer r.downloadHeapMu.Unlock()
   144  
   145  	for {
   146  		if r.downloadHeap.Len() <= 0 {
   147  			return nil
   148  		}
   149  		nextChunk := heap.Pop(r.downloadHeap).(*unfinishedDownloadChunk)
   150  		if !nextChunk.download.staticComplete() {
   151  			return nextChunk
   152  		}
   153  	}
   154  }
   155  
   156  // threadedDownloadLoop utilizes the worker pool to make progress on any queued
   157  // downloads.
   158  func (r *Renter) threadedDownloadLoop() {
   159  	err := r.tg.Add()
   160  	if err != nil {
   161  		return
   162  	}
   163  	defer r.tg.Done()
   164  
   165  	// Infinite loop to process downloads. Will return if r.tg.Stop() is called.
   166  LOOP:
   167  	for {
   168  		// Wait until the renter is online.
   169  		if !r.managedBlockUntilOnline() {
   170  			// The renter shut down before the internet connection was restored.
   171  			return
   172  		}
   173  
   174  		// Update the worker pool and fetch the current time. The loop will
   175  		// reset after a certain amount of time has passed.
   176  		r.managedUpdateWorkerPool()
   177  		workerUpdateTime := time.Now()
   178  
   179  		// Pull downloads out of the heap. Will break if the heap is empty, and
   180  		// will reset to the top of the outer loop if a reset condition is met.
   181  		for {
   182  			// Check that we still have an internet connection, and also that we
   183  			// do not need to update the worker pool yet.
   184  			if !r.g.Online() || time.Now().After(workerUpdateTime.Add(workerPoolUpdateTimeout)) {
   185  				// Reset to the top of the outer loop. Either we need to wait
   186  				// until we are online, or we need to refresh the worker pool.
   187  				// The outer loop will handle both situations.
   188  				continue LOOP
   189  			}
   190  
   191  			// Get the next chunk.
   192  			nextChunk := r.managedNextDownloadChunk()
   193  			if nextChunk == nil {
   194  				// Break out of the inner loop and wait for more work.
   195  				break
   196  			}
   197  
   198  			// Check if we got the chunk cached already.
   199  			if r.staticStreamCache.Retrieve(nextChunk) {
   200  				continue
   201  			}
   202  
   203  			// Get the required memory to download this chunk.
   204  			if !r.managedAcquireMemoryForDownloadChunk(nextChunk) {
   205  				// The renter shut down before memory could be acquired.
   206  				return
   207  			}
   208  			// Distribute the chunk to workers.
   209  			r.managedDistributeDownloadChunkToWorkers(nextChunk)
   210  		}
   211  
   212  		// Wait for more work.
   213  		select {
   214  		case <-r.tg.StopChan():
   215  			return
   216  		case <-r.newDownloads:
   217  		}
   218  	}
   219  }