gitlab.com/SiaPrime/SiaPrime@v1.4.1/modules/renter/uploadheap.go

gitlab.com/SiaPrime/SiaPrime@v1.4.1/modules/renter/uploadheap.go (about)

     1  package renter
     2  
     3  // TODO: replace managedRefreshHostsAndWorkers with structural updates to the
     4  // worker pool. The worker pool should maintain the map of hosts that
     5  // managedRefreshHostsAndWorkers builds every call, and the contractor should
     6  // work with the worker pool to instantly notify the worker pool of any changes
     7  // to the set of contracts.
     8  
     9  import (
    10  	"container/heap"
    11  	"io/ioutil"
    12  	"math"
    13  	"os"
    14  	"path/filepath"
    15  	"sort"
    16  	"strings"
    17  	"sync"
    18  	"time"
    19  
    20  	"gitlab.com/NebulousLabs/errors"
    21  	"gitlab.com/NebulousLabs/fastrand"
    22  
    23  	"gitlab.com/SiaPrime/SiaPrime/build"
    24  	"gitlab.com/SiaPrime/SiaPrime/modules"
    25  	"gitlab.com/SiaPrime/SiaPrime/modules/renter/siafile"
    26  	"gitlab.com/SiaPrime/SiaPrime/types"
    27  )
    28  
    29  // repairTarget is a helper type for telling the repair heap what type of
    30  // files/chunks to target for repair
    31  type repairTarget int
    32  
    33  // targetStuckChunks tells the repair loop to target stuck chunks for repair and
    34  // targetUnstuckChunks tells the repair loop to target unstuck chunks for repair
    35  const (
    36  	targetError repairTarget = iota
    37  	targetStuckChunks
    38  	targetUnstuckChunks
    39  	targetBackupChunks
    40  )
    41  
    42  // uploadChunkHeap is a bunch of priority-sorted chunks that need to be either
    43  // uploaded or repaired.
    44  type uploadChunkHeap []*unfinishedUploadChunk
    45  
    46  // Implementation of heap.Interface for uploadChunkHeap.
    47  func (uch uploadChunkHeap) Len() int { return len(uch) }
    48  func (uch uploadChunkHeap) Less(i, j int) bool {
    49  	// If only chunk i is high priority, return true to prioritize it.
    50  	if uch[i].priority && !uch[j].priority {
    51  		return true
    52  	}
    53  	// If only chunk j is high priority, return false to prioritize it.
    54  	if !uch[i].priority && uch[j].priority {
    55  		return false
    56  	}
    57  	// If the chunks have the same stuck status, check which chunk has the worse
    58  	// health. A higher health is a worse health
    59  	if uch[i].stuck == uch[j].stuck {
    60  		return uch[i].health > uch[j].health
    61  	}
    62  	// If chunk i is stuck, return true to prioritize it.
    63  	if uch[i].stuck {
    64  		return true
    65  	}
    66  	// Chunk j is stuck, return false to prioritize it.
    67  	return false
    68  }
    69  func (uch uploadChunkHeap) Swap(i, j int)       { uch[i], uch[j] = uch[j], uch[i] }
    70  func (uch *uploadChunkHeap) Push(x interface{}) { *uch = append(*uch, x.(*unfinishedUploadChunk)) }
    71  func (uch *uploadChunkHeap) Pop() interface{} {
    72  	old := *uch
    73  	n := len(old)
    74  	x := old[n-1]
    75  	*uch = old[:n-1]
    76  	return x
    77  }
    78  
    79  // reset clears the uploadChunkHeap and makes sure all the files belonging to
    80  // the chunks are closed
    81  func (uch *uploadChunkHeap) reset() (err error) {
    82  	for _, c := range *uch {
    83  		err = errors.Compose(err, c.fileEntry.Close())
    84  	}
    85  	*uch = uploadChunkHeap{}
    86  	return err
    87  }
    88  
    89  // uploadHeap contains a priority-sorted heap of all the chunks being uploaded
    90  // to the renter, along with some metadata.
    91  type uploadHeap struct {
    92  	heap uploadChunkHeap
    93  
    94  	// heapChunks is a map containing all the chunks that are currently in the
    95  	// heap. Chunks are added and removed from the map when chunks are pushed
    96  	// and popped off the heap
    97  	//
    98  	// repairingChunks is a map containing all the chunks are that currently
    99  	// assigned to workers and are being repaired/worked on.
   100  	repairingChunks   map[uploadChunkID]*unfinishedUploadChunk
   101  	stuckHeapChunks   map[uploadChunkID]*unfinishedUploadChunk
   102  	unstuckHeapChunks map[uploadChunkID]*unfinishedUploadChunk
   103  
   104  	// Control channels
   105  	newUploads        chan struct{}
   106  	repairNeeded      chan struct{}
   107  	stuckChunkFound   chan struct{}
   108  	stuckChunkSuccess chan struct{}
   109  
   110  	mu sync.Mutex
   111  }
   112  
   113  // managedExists checks if a chunk currently exists in the upload heap. A chunk
   114  // exists in the upload heap if it exists in any of the heap's tracking maps
   115  func (uh *uploadHeap) managedExists(id uploadChunkID) bool {
   116  	uh.mu.Lock()
   117  	defer uh.mu.Unlock()
   118  	_, existsUnstuckHeap := uh.unstuckHeapChunks[id]
   119  	_, existsRepairing := uh.repairingChunks[id]
   120  	_, existsStuckHeap := uh.stuckHeapChunks[id]
   121  	return existsUnstuckHeap || existsRepairing || existsStuckHeap
   122  }
   123  
   124  // managedLen will return the length of the heap
   125  func (uh *uploadHeap) managedLen() int {
   126  	uh.mu.Lock()
   127  	uhLen := uh.heap.Len()
   128  	uh.mu.Unlock()
   129  	return uhLen
   130  }
   131  
   132  // managedMarkRepairDone removes the chunk from the repairingChunks map of the
   133  // uploadHeap. It also performs a sanity check that the chunk was in the map,
   134  // this is to ensure that we are adding and removing the chunks as expected
   135  func (uh *uploadHeap) managedMarkRepairDone(id uploadChunkID) {
   136  	uh.mu.Lock()
   137  	defer uh.mu.Unlock()
   138  	_, ok := uh.repairingChunks[id]
   139  	if !ok {
   140  		build.Critical("Chunk is not in the repair map, this means it was removed prematurely or was never added")
   141  	}
   142  	delete(uh.repairingChunks, id)
   143  }
   144  
   145  // managedNumStuckChunks returns the number of stuck chunks in the heap
   146  func (uh *uploadHeap) managedNumStuckChunks() int {
   147  	uh.mu.Lock()
   148  	defer uh.mu.Unlock()
   149  	return len(uh.stuckHeapChunks)
   150  }
   151  
   152  // managedPush will try and add a chunk to the upload heap. If the chunk is
   153  // added it will return true otherwise it will return false
   154  func (uh *uploadHeap) managedPush(uuc *unfinishedUploadChunk) bool {
   155  	// Grab chunk stuck status
   156  	uuc.mu.Lock()
   157  	chunkStuck := uuc.stuck
   158  	uuc.mu.Unlock()
   159  
   160  	// Check if chunk is in any of the heap maps
   161  	uh.mu.Lock()
   162  	defer uh.mu.Unlock()
   163  	unstuckUUC, existsUnstuckHeap := uh.unstuckHeapChunks[uuc.id]
   164  	repairingUUC, existsRepairing := uh.repairingChunks[uuc.id]
   165  	stuckUUC, existsStuckHeap := uh.stuckHeapChunks[uuc.id]
   166  
   167  	// If the added chunk has a sourceReader and the existing one doesn't, replace
   168  	// them.
   169  	if uuc.sourceReader != nil && (existsUnstuckHeap || existsRepairing || existsStuckHeap) {
   170  		// Get the existing chunk.
   171  		var existingUUC *unfinishedUploadChunk
   172  		if existsStuckHeap {
   173  			existingUUC = stuckUUC
   174  		} else if existsRepairing {
   175  			existingUUC = repairingUUC
   176  		} else if existsUnstuckHeap {
   177  			existingUUC = unstuckUUC
   178  		}
   179  		// Cancel the chunk.
   180  		existingUUC.cancelMU.Lock()
   181  		existingUUC.canceled = true
   182  		existingUUC.cancelMU.Unlock()
   183  		// Wait for all workers to finish ongoing work on that chunk and try to push
   184  		// the new chunk again. This happens in a separate thread to avoid holding the
   185  		// uploadHeap lock while waiting.
   186  		go func() {
   187  			existingUUC.cancelWG.Wait()
   188  			uh.managedPush(uuc)
   189  		}()
   190  		return true // It's not pushed yet but it is guranteed to be pushed eventually.
   191  	}
   192  
   193  	// Check if the chunk can be added to the heap
   194  	canAddStuckChunk := chunkStuck && !existsStuckHeap && !existsRepairing && len(uh.stuckHeapChunks) < maxStuckChunksInHeap
   195  	canAddUnstuckChunk := !chunkStuck && !existsUnstuckHeap && !existsRepairing
   196  	if canAddStuckChunk {
   197  		uh.stuckHeapChunks[uuc.id] = uuc
   198  		heap.Push(&uh.heap, uuc)
   199  		return true
   200  	} else if canAddUnstuckChunk {
   201  		uh.unstuckHeapChunks[uuc.id] = uuc
   202  		heap.Push(&uh.heap, uuc)
   203  		return true
   204  	}
   205  	return false
   206  }
   207  
   208  // managedPop will pull a chunk off of the upload heap and return it.
   209  func (uh *uploadHeap) managedPop() (uc *unfinishedUploadChunk) {
   210  	uh.mu.Lock()
   211  	if len(uh.heap) > 0 {
   212  		uc = heap.Pop(&uh.heap).(*unfinishedUploadChunk)
   213  		delete(uh.unstuckHeapChunks, uc.id)
   214  		delete(uh.stuckHeapChunks, uc.id)
   215  		if _, exists := uh.repairingChunks[uc.id]; exists {
   216  			build.Critical("There should not be a chunk in the heap that can be popped that is currently being repaired")
   217  		}
   218  		uh.repairingChunks[uc.id] = uc
   219  	}
   220  	uh.mu.Unlock()
   221  	return uc
   222  }
   223  
   224  // managedReset will reset the slice and maps within the heap to free up memory.
   225  func (uh *uploadHeap) managedReset() error {
   226  	uh.mu.Lock()
   227  	defer uh.mu.Unlock()
   228  	uh.unstuckHeapChunks = make(map[uploadChunkID]*unfinishedUploadChunk)
   229  	uh.stuckHeapChunks = make(map[uploadChunkID]*unfinishedUploadChunk)
   230  	return uh.heap.reset()
   231  }
   232  
   233  // managedBuildUnfinishedChunk will pull out a single unfinished chunk of a file.
   234  func (r *Renter) managedBuildUnfinishedChunk(entry *siafile.SiaFileSetEntry, chunkIndex uint64, hosts map[string]struct{}, hostPublicKeys map[string]types.SiaPublicKey, priority bool, offline, goodForRenew map[string]bool) (*unfinishedUploadChunk, error) {
   235  	// Copy entry
   236  	entryCopy, err := entry.CopyEntry()
   237  	if err != nil {
   238  		r.log.Println("WARN: unable to copy siafile entry:", err)
   239  		return nil, errors.AddContext(err, "unable to copy file entry when trying to build the unfinished chunk")
   240  	}
   241  	if entryCopy == nil {
   242  		build.Critical("nil file entry return from CopyEntry, and no error should have been returned")
   243  		return nil, errors.New("CopyEntry returned a nil copy")
   244  	}
   245  	stuck, err := entry.StuckChunkByIndex(chunkIndex)
   246  	if err != nil {
   247  		r.log.Println("WARN: unable to get 'stuck' status:", err)
   248  		return nil, errors.AddContext(err, "unable to get 'stuck' status")
   249  	}
   250  	uuc := &unfinishedUploadChunk{
   251  		fileEntry: entryCopy,
   252  
   253  		id: uploadChunkID{
   254  			fileUID: entry.UID(),
   255  			index:   chunkIndex,
   256  		},
   257  
   258  		index:    chunkIndex,
   259  		length:   entry.ChunkSize(),
   260  		offset:   int64(chunkIndex * entry.ChunkSize()),
   261  		priority: priority,
   262  
   263  		// memoryNeeded has to also include the logical data, and also
   264  		// include the overhead for encryption.
   265  		//
   266  		// TODO / NOTE: If we adjust the file to have a flexible encryption
   267  		// scheme, we'll need to adjust the overhead stuff too.
   268  		//
   269  		// TODO: Currently we request memory for all of the pieces as well
   270  		// as the minimum pieces, but we perhaps don't need to request all
   271  		// of that.
   272  		memoryNeeded:  entry.PieceSize()*uint64(entry.ErasureCode().NumPieces()+entry.ErasureCode().MinPieces()) + uint64(entry.ErasureCode().NumPieces())*entry.MasterKey().Type().Overhead(),
   273  		minimumPieces: entry.ErasureCode().MinPieces(),
   274  		piecesNeeded:  entry.ErasureCode().NumPieces(),
   275  		stuck:         stuck,
   276  
   277  		physicalChunkData: make([][]byte, entry.ErasureCode().NumPieces()),
   278  
   279  		pieceUsage:  make([]bool, entry.ErasureCode().NumPieces()),
   280  		unusedHosts: make(map[string]struct{}, len(hosts)),
   281  	}
   282  
   283  	// Every chunk can have a different set of unused hosts.
   284  	for host := range hosts {
   285  		uuc.unusedHosts[host] = struct{}{}
   286  	}
   287  
   288  	// Iterate through the pieces of all chunks of the file and mark which
   289  	// hosts are already in use for a particular chunk. As you delete hosts
   290  	// from the 'unusedHosts' map, also increment the 'piecesCompleted' value.
   291  	pieces, err := entry.Pieces(chunkIndex)
   292  	if err != nil {
   293  		r.log.Println("failed to get pieces for building incomplete chunks", err)
   294  		if err := entry.SetStuck(chunkIndex, true); err != nil {
   295  			r.log.Printf("failed to set chunk %v stuck: %v", chunkIndex, err)
   296  		}
   297  		return nil, errors.AddContext(err, "error trying to get the pieces for the chunk")
   298  	}
   299  	for pieceIndex, pieceSet := range pieces {
   300  		for _, piece := range pieceSet {
   301  			hpk := piece.HostPubKey.String()
   302  			goodForRenew, exists2 := goodForRenew[hpk]
   303  			offline, exists := offline[hpk]
   304  			if !exists || !exists2 || !goodForRenew || offline {
   305  				// This piece cannot be counted towards redudnacy if the host is
   306  				// offline, is marked no good for renew, or is not available in
   307  				// the lookup maps.
   308  				continue
   309  			}
   310  
   311  			// Mark the chunk set based on the pieces in this contract.
   312  			_, exists = uuc.unusedHosts[piece.HostPubKey.String()]
   313  			redundantPiece := uuc.pieceUsage[pieceIndex]
   314  			if exists && !redundantPiece {
   315  				uuc.pieceUsage[pieceIndex] = true
   316  				uuc.piecesCompleted++
   317  				delete(uuc.unusedHosts, piece.HostPubKey.String())
   318  			} else if exists {
   319  				// This host has a piece, but it is the same piece another
   320  				// host has. We should still remove the host from the
   321  				// unusedHosts since one host having multiple pieces of a
   322  				// chunk might lead to unexpected issues. e.g. if a host
   323  				// has multiple pieces and another host with redundant
   324  				// pieces goes offline, we end up with false redundancy
   325  				// reporting.
   326  				delete(uuc.unusedHosts, piece.HostPubKey.String())
   327  			}
   328  		}
   329  	}
   330  	// Now that we have calculated the completed pieces for the chunk we can
   331  	// calculate the health of the chunk to avoid a call to ChunkHealth
   332  	uuc.health = 1 - (float64(uuc.piecesCompleted-uuc.minimumPieces) / float64(uuc.piecesNeeded-uuc.minimumPieces))
   333  	return uuc, nil
   334  }
   335  
   336  // managedBuildUnfinishedChunks will pull all of the unfinished chunks out of a
   337  // file.
   338  //
   339  // NOTE: each unfinishedUploadChunk needs its own SiaFileSetEntry. This is due
   340  // to the SiaFiles being removed from memory. Since the renter does not keep the
   341  // SiaFiles in memory the unfinishedUploadChunks need to close the SiaFile when
   342  // they are done and so cannot share a SiaFileSetEntry as the first chunk to
   343  // finish would then close the Entry and consequentially impact the remaining
   344  // chunks.
   345  func (r *Renter) managedBuildUnfinishedChunks(entry *siafile.SiaFileSetEntry, hosts map[string]struct{}, target repairTarget, offline, goodForRenew map[string]bool) []*unfinishedUploadChunk {
   346  	// If we don't have enough workers for the file, don't repair it right now.
   347  	minPieces := entry.ErasureCode().MinPieces()
   348  	r.staticWorkerPool.mu.RLock()
   349  	workerPoolLen := len(r.staticWorkerPool.workers)
   350  	r.staticWorkerPool.mu.RUnlock()
   351  	if workerPoolLen < minPieces {
   352  		// There are not enough workers for the chunk to reach minimum
   353  		// redundancy. Check if the allowance has enough hosts for the chunk to
   354  		// reach minimum redundancy
   355  		r.log.Debugln("Not building any chunks from file as there are not enough workers")
   356  		allowance := r.hostContractor.Allowance()
   357  		// Only perform this check when we are looking for unstuck chunks. This
   358  		// will prevent log spam from repeatedly logging to the user the issue
   359  		// with the file after marking the chunks as stuck
   360  		if allowance.Hosts < uint64(minPieces) && target == targetUnstuckChunks {
   361  			// There are not enough hosts in the allowance for the file to reach
   362  			// minimum redundancy. Mark all chunks as stuck
   363  			r.log.Printf("WARN: allownace had insufficient hosts for chunk to reach minimum redundancy, have %v need %v for file %v", allowance.Hosts, minPieces, entry.SiaFilePath())
   364  			if err := entry.SetAllStuck(true); err != nil {
   365  				r.log.Println("WARN: unable to mark all chunks as stuck:", err)
   366  			}
   367  		}
   368  		return nil
   369  	}
   370  
   371  	// Assemble chunk indexes, stuck Loop should only be adding stuck chunks and
   372  	// the repair loop should only be adding unstuck chunks
   373  	var chunkIndexes []uint64
   374  	for i := uint64(0); i < entry.NumChunks(); i++ {
   375  		stuck, err := entry.StuckChunkByIndex(i)
   376  		if err != nil {
   377  			r.log.Debugln("failed to get 'stuck' status of entry:", err)
   378  			continue
   379  		}
   380  		if (target == targetStuckChunks) == stuck {
   381  			chunkIndexes = append(chunkIndexes, i)
   382  		}
   383  	}
   384  
   385  	// Sanity check that we have chunk indices to go through
   386  	if len(chunkIndexes) == 0 {
   387  		r.log.Println("WARN: no chunk indices gathered, can't add chunks to heap")
   388  		return nil
   389  	}
   390  
   391  	// Build a map of host public keys. We assume that all entrys are the same.
   392  	pks := make(map[string]types.SiaPublicKey)
   393  	for _, pk := range entry.HostPublicKeys() {
   394  		pks[string(pk.Key)] = pk
   395  	}
   396  
   397  	// Assemble the set of chunks.
   398  	newUnfinishedChunks := make([]*unfinishedUploadChunk, 0, len(chunkIndexes))
   399  	for _, index := range chunkIndexes {
   400  		// Sanity check: fileUID should not be the empty value.
   401  		if entry.UID() == "" {
   402  			build.Critical("empty string for file UID")
   403  		}
   404  
   405  		// Create unfinishedUploadChunk
   406  		chunk, err := r.managedBuildUnfinishedChunk(entry, uint64(index), hosts, pks, false, offline, goodForRenew)
   407  		if err != nil {
   408  			r.log.Debugln("Error when building an unfinished chunk:", err)
   409  			continue
   410  		}
   411  		newUnfinishedChunks = append(newUnfinishedChunks, chunk)
   412  	}
   413  
   414  	// Iterate through the set of newUnfinishedChunks and remove any that are
   415  	// completed or are not downloadable.
   416  	incompleteChunks := newUnfinishedChunks[:0]
   417  	for _, chunk := range newUnfinishedChunks {
   418  		// Check the chunk status. A chunk is repairable if it can be fully
   419  		// downloaded, or if the source file is available on disk. We also check
   420  		// if the chunk needs repair, which is only true if more than a certain
   421  		// amount of redundancy is missing. We only repair above a certain
   422  		// threshold of missing redundancy to minimize the amount of repair work
   423  		// that gets triggered by host churn.
   424  		//
   425  		// While a file could be on disk as long as !os.IsNotExist(err), for the
   426  		// purposes of repairing a file is only considered on disk if it can be
   427  		// accessed without error. If there is an error accessing the file then
   428  		// it is likely that we can not read the file in which case it can not
   429  		// be used for repair.
   430  		_, err := os.Stat(chunk.fileEntry.LocalPath())
   431  		onDisk := err == nil
   432  		repairable := chunk.health <= 1 || onDisk
   433  		needsRepair := chunk.health >= RepairThreshold
   434  
   435  		// Add chunk to list of incompleteChunks if it is incomplete and
   436  		// repairable or if we are targeting stuck chunks
   437  		if needsRepair && (repairable || target == targetStuckChunks) {
   438  			incompleteChunks = append(incompleteChunks, chunk)
   439  			continue
   440  		}
   441  
   442  		// If a chunk is not able to be repaired, mark it as stuck.
   443  		if !repairable {
   444  			r.log.Println("Marking chunk", chunk.id, "as stuck due to not being repairable")
   445  			err = r.managedSetStuckAndClose(chunk, true)
   446  			if err != nil {
   447  				r.log.Debugln("WARN: unable to set chunk stuck status and close:", err)
   448  			}
   449  			continue
   450  		}
   451  
   452  		// Close entry of completed chunk
   453  		err = r.managedSetStuckAndClose(chunk, false)
   454  		if err != nil {
   455  			r.log.Debugln("WARN: unable to set chunk stuck status and close:", err)
   456  		}
   457  	}
   458  	return incompleteChunks
   459  }
   460  
   461  // managedAddChunksToHeap will add chunks to the upload heap one directory at a
   462  // time until the directory heap is empty or the uploadheap is full. It does
   463  // this by popping directories off the directory heap and adding the chunks from
   464  // that directory to the upload heap. If the worst health directory found is
   465  // sufficiently healthy then we return.
   466  func (r *Renter) managedAddChunksToHeap(hosts map[string]struct{}) (map[modules.SiaPath]struct{}, error) {
   467  	siaPaths := make(map[modules.SiaPath]struct{})
   468  	prevHeapLen := r.uploadHeap.managedLen()
   469  	// Loop until the upload heap has maxUploadHeapChunks in it or the directory
   470  	// heap is empty
   471  	for r.uploadHeap.managedLen() < maxUploadHeapChunks && r.directoryHeap.managedLen() > 0 {
   472  		select {
   473  		case <-r.tg.StopChan():
   474  			return siaPaths, errors.New("renter shutdown before we could finish adding chunks to heap")
   475  		default:
   476  		}
   477  
   478  		// Pop an explored directory off of the directory heap
   479  		dir, err := r.managedNextExploredDirectory()
   480  		if err != nil {
   481  			r.log.Println("WARN: error getting explored directory:", err)
   482  			// Reset the directory heap to try and help address the error
   483  			r.directoryHeap.managedReset()
   484  			return siaPaths, err
   485  		}
   486  
   487  		// Sanity Check if directory was returned
   488  		if dir == nil {
   489  			r.log.Debugln("no more chunks added to the upload heap because there are no more directories")
   490  			return siaPaths, nil
   491  		}
   492  
   493  		// Grab health and siaPath of the directory
   494  		dir.mu.Lock()
   495  		dirHealth := dir.health
   496  		dirSiaPath := dir.siaPath
   497  		dir.mu.Unlock()
   498  
   499  		// If the directory that was just popped is healthy then return
   500  		if dirHealth < RepairThreshold {
   501  			r.log.Debugln("no more chunks added to the upload heap because directory popped is healthy")
   502  			return siaPaths, nil
   503  		}
   504  
   505  		// Add chunks from the directory to the uploadHeap.
   506  		r.managedBuildChunkHeap(dirSiaPath, hosts, targetUnstuckChunks)
   507  
   508  		// Check to see if we are still adding chunks
   509  		heapLen := r.uploadHeap.managedLen()
   510  		if heapLen == prevHeapLen {
   511  			// No more chunks added to the uploadHeap from the worst health
   512  			// directory. This means that the worse health chunks are already in
   513  			// the heap or are currently being repaired, so return. This can be
   514  			// the case in new uploads or repair loop iterations triggered from
   515  			// bubble
   516  			r.log.Debugln("no more chunks added to the upload heap")
   517  			return siaPaths, nil
   518  		}
   519  		chunksAdded := heapLen - prevHeapLen
   520  		prevHeapLen = heapLen
   521  
   522  		// Since we added chunks from this directory, track the siaPath
   523  		//
   524  		// NOTE: we only want to remember each siaPath once which is why we use
   525  		// a map. We Don't check if the siaPath is already in the map because
   526  		// another thread could have added the directory back to the heap after
   527  		// we just popped it off. This is the case for new uploads.
   528  		siaPaths[dirSiaPath] = struct{}{}
   529  		r.log.Println("Added", chunksAdded, "chunks from", dirSiaPath, "to the upload heap")
   530  	}
   531  
   532  	return siaPaths, nil
   533  }
   534  
   535  // managedBuildAndPushRandomChunk randomly selects a file and builds the
   536  // unfinished chunks, then randomly adds chunksToAdd chunks to the upload heap
   537  func (r *Renter) managedBuildAndPushRandomChunk(files []*siafile.SiaFileSetEntry, chunksToAdd int, hosts map[string]struct{}, target repairTarget, offline, goodForRenew map[string]bool) {
   538  	// Sanity check that there are files
   539  	if len(files) == 0 {
   540  		return
   541  	}
   542  
   543  	// Create random indices for files
   544  	p := fastrand.Perm(len(files))
   545  	for i := 0; i < chunksToAdd && i < len(files); i++ {
   546  		// Grab random file
   547  		file := files[p[i]]
   548  
   549  		// Build the unfinished stuck chunks from the file
   550  		unfinishedUploadChunks := r.managedBuildUnfinishedChunks(file, hosts, target, offline, goodForRenew)
   551  
   552  		// Sanity check that there are stuck chunks
   553  		if len(unfinishedUploadChunks) == 0 {
   554  			continue
   555  		}
   556  
   557  		// Add random stuck chunks to the upload heap and set its stuckRepair field
   558  		// to true
   559  		randChunkIndex := fastrand.Intn(len(unfinishedUploadChunks))
   560  		randChunk := unfinishedUploadChunks[randChunkIndex]
   561  		randChunk.stuckRepair = true
   562  		if !r.uploadHeap.managedPush(randChunk) {
   563  			// Chunk wasn't added to the heap. Close the file
   564  			r.log.Debugln("WARN: stuck chunk", randChunk.id, "wasn't added to heap")
   565  			err := randChunk.fileEntry.Close()
   566  			if err != nil {
   567  				r.log.Println("WARN: unable to close file:", err)
   568  			}
   569  		}
   570  		unfinishedUploadChunks = append(unfinishedUploadChunks[:randChunkIndex], unfinishedUploadChunks[randChunkIndex+1:]...)
   571  		// Close the unused unfinishedUploadChunks
   572  		for _, chunk := range unfinishedUploadChunks {
   573  			err := chunk.fileEntry.Close()
   574  			if err != nil {
   575  				r.log.Println("WARN: unable to close file:", err)
   576  			}
   577  		}
   578  	}
   579  	return
   580  }
   581  
   582  // managedBuildAndPushChunks builds the unfinished upload chunks and adds them
   583  // to the upload heap
   584  //
   585  // NOTE: the files submitted to this function should all be from the same
   586  // directory
   587  func (r *Renter) managedBuildAndPushChunks(files []*siafile.SiaFileSetEntry, hosts map[string]struct{}, target repairTarget, offline, goodForRenew map[string]bool) {
   588  	// Sanity check that at least one file was provided
   589  	if len(files) == 0 {
   590  		build.Critical("managedBuildAndPushChunks called without providing any files")
   591  		return
   592  	}
   593  
   594  	// Loop through the whole set of files and get a list of chunks and build a
   595  	// temporary heap
   596  	var unfinishedChunkHeap uploadChunkHeap
   597  	var worstIgnoredHealth float64
   598  	dirHeapHealth := r.directoryHeap.managedPeekHealth()
   599  	for _, file := range files {
   600  		// For normal repairs check if file is a worse health than the directory
   601  		// heap
   602  		fileHealth := file.Metadata().CachedHealth
   603  		if fileHealth < dirHeapHealth && target == targetUnstuckChunks {
   604  			worstIgnoredHealth = math.Max(worstIgnoredHealth, fileHealth)
   605  			continue
   606  		}
   607  
   608  		// Build unfinished chunks from file and add them to the temp heap if
   609  		// they are a worse health than the directory heap
   610  		unfinishedUploadChunks := r.managedBuildUnfinishedChunks(file, hosts, target, offline, goodForRenew)
   611  		for i := 0; i < len(unfinishedUploadChunks); i++ {
   612  			chunk := unfinishedUploadChunks[i]
   613  			// Check to see the chunk is already in the upload heap
   614  			if r.uploadHeap.managedExists(chunk.id) {
   615  				// Close the file entry
   616  				err := chunk.fileEntry.Close()
   617  				if err != nil {
   618  					r.log.Println("WARN: unable to close file:", err)
   619  				}
   620  				// Since the chunk is already in the heap we do not need to
   621  				// track the health of the chunk
   622  				continue
   623  			}
   624  
   625  			// For normal repairs check if chunk has a worse health than the
   626  			// directory heap
   627  			if chunk.health < dirHeapHealth && target == targetUnstuckChunks {
   628  				// Track the health
   629  				worstIgnoredHealth = math.Max(worstIgnoredHealth, chunk.health)
   630  				// Close the file entry
   631  				err := chunk.fileEntry.Close()
   632  				if err != nil {
   633  					r.log.Println("WARN: unable to close file:", err)
   634  				}
   635  				continue
   636  			}
   637  
   638  			// Add chunk to temp heap
   639  			heap.Push(&unfinishedChunkHeap, chunk)
   640  
   641  			// Check if temp heap is growing too large. We want to restrict it
   642  			// to twice the size of the max upload heap size. This restriction
   643  			// should be applied to all repairs to prevent excessive memory
   644  			// usage.
   645  			if len(unfinishedChunkHeap) < maxUploadHeapChunks*2 {
   646  				continue
   647  			}
   648  
   649  			// Pop of the worst half of the heap
   650  			var chunksToKeep []*unfinishedUploadChunk
   651  			for len(unfinishedChunkHeap) > maxUploadHeapChunks {
   652  				chunksToKeep = append(chunksToKeep, heap.Pop(&unfinishedChunkHeap).(*unfinishedUploadChunk))
   653  			}
   654  
   655  			// Check health of next chunk
   656  			chunk = heap.Pop(&unfinishedChunkHeap).(*unfinishedUploadChunk)
   657  			worstIgnoredHealth = math.Max(worstIgnoredHealth, chunk.health)
   658  			// Close the file entry
   659  			err := chunk.fileEntry.Close()
   660  			if err != nil {
   661  				r.log.Println("WARN: unable to close file:", err)
   662  			}
   663  
   664  			// Reset temp heap to release memory
   665  			err = unfinishedChunkHeap.reset()
   666  			if err != nil {
   667  				r.log.Println("WARN: error resetting the temporary upload heap:", err)
   668  			}
   669  
   670  			// Add worst chunks back to heap
   671  			for _, chunk := range chunksToKeep {
   672  				heap.Push(&unfinishedChunkHeap, chunk)
   673  			}
   674  
   675  			// Make sure chunksToKeep is zeroed out in memory
   676  			chunksToKeep = []*unfinishedUploadChunk{}
   677  		}
   678  	}
   679  
   680  	// We now have a temporary heap of the worst chunks in the directory that
   681  	// are also worse than any other chunk in the directory heap. Now we try and
   682  	// add as many chunks as we can to the uploadHeap
   683  	for len(unfinishedChunkHeap) > 0 && (r.uploadHeap.managedLen() < maxUploadHeapChunks || target == targetBackupChunks) {
   684  		// Add chunk to the uploadHeap
   685  		chunk := heap.Pop(&unfinishedChunkHeap).(*unfinishedUploadChunk)
   686  		if !r.uploadHeap.managedPush(chunk) {
   687  			// We don't track the health of this chunk since the only reason it
   688  			// wouldn't be added to the heap is if it is already in the heap or
   689  			// is currently being repaired. Close the file.
   690  			err := chunk.fileEntry.Close()
   691  			if err != nil {
   692  				r.log.Println("WARN: unable to close file:", err)
   693  			}
   694  		}
   695  	}
   696  
   697  	// Check if there are still chunks left in the temp heap. If so check the
   698  	// health of the next chunk
   699  	if len(unfinishedChunkHeap) > 0 {
   700  		chunk := heap.Pop(&unfinishedChunkHeap).(*unfinishedUploadChunk)
   701  		worstIgnoredHealth = math.Max(worstIgnoredHealth, chunk.health)
   702  		// Close the chunk's file
   703  		err := chunk.fileEntry.Close()
   704  		if err != nil {
   705  			r.log.Println("WARN: unable to close file:", err)
   706  		}
   707  	}
   708  
   709  	// We are done with the temporary heap so reset it to help release the
   710  	// memory
   711  	err := unfinishedChunkHeap.reset()
   712  	if err != nil {
   713  		r.log.Println("WARN: error resetting the temporary upload heap:", err)
   714  	}
   715  
   716  	// Check if we were adding backup chunks, if so return here as backups are
   717  	// not added to the directory heap
   718  	if target == targetBackupChunks {
   719  		return
   720  	}
   721  
   722  	// Check if we should add the directory back to the directory heap
   723  	if worstIgnoredHealth < RepairThreshold {
   724  		return
   725  	}
   726  
   727  	// All files submitted are from the same directory so use the first one to
   728  	// get the directory siapath
   729  	dirSiaPath, err := r.staticFileSet.SiaPath(files[0]).Dir()
   730  	if err != nil {
   731  		r.log.Println("WARN: unable to get directory SiaPath and add directory back to directory heap:", err)
   732  		return
   733  	}
   734  
   735  	// Since directory is being added back as explored we only need to set the
   736  	// health as that is what will be used for sorting in the directory heap.
   737  	//
   738  	// The aggregate health is set to 'worstIgnoredHealth' as well. In the event
   739  	// that the directory gets added as unexplored because another copy of the
   740  	// unexplored directory exists on the directory heap, we need to make sure
   741  	// that the worst known health is represented in the aggregate value.
   742  	d := &directory{
   743  		aggregateHealth: worstIgnoredHealth,
   744  		health:          worstIgnoredHealth,
   745  		explored:        true,
   746  		siaPath:         dirSiaPath,
   747  	}
   748  	// Add the directory to the heap. If there is a conflict because the
   749  	// directory is already in the heap (for example, added by another thread or
   750  	// process), then the worst of the values between this dir and the one
   751  	// that's already in the dir will be used, to ensure that the repair loop
   752  	// will prioritize all bad value files.
   753  	r.directoryHeap.managedPush(d)
   754  }
   755  
   756  // managedBuildChunkHeap will iterate through all of the files in the renter and
   757  // construct a chunk heap.
   758  //
   759  // TODO: accept an input to indicate how much room is in the heap
   760  //
   761  // TODO: Explore whether there is a way to perform the task below without
   762  // opening a full file entry for each file in the directory.
   763  func (r *Renter) managedBuildChunkHeap(dirSiaPath modules.SiaPath, hosts map[string]struct{}, target repairTarget) {
   764  	// Get Directory files
   765  	var fileinfos []os.FileInfo
   766  	var err error
   767  	if target == targetBackupChunks {
   768  		fileinfos, err = ioutil.ReadDir(dirSiaPath.SiaDirSysPath(r.staticBackupsDir))
   769  	} else {
   770  		fileinfos, err = ioutil.ReadDir(dirSiaPath.SiaDirSysPath(r.staticFilesDir))
   771  	}
   772  	if err != nil {
   773  		r.log.Println("WARN: could not read directory:", err)
   774  		return
   775  	}
   776  	// Build files from fileinfos
   777  	var files []*siafile.SiaFileSetEntry
   778  	for _, fi := range fileinfos {
   779  		// skip sub directories and non siafiles
   780  		ext := filepath.Ext(fi.Name())
   781  		if fi.IsDir() || ext != modules.SiaFileExtension {
   782  			continue
   783  		}
   784  
   785  		// Open SiaFile
   786  		siaPath, err := dirSiaPath.Join(strings.TrimSuffix(fi.Name(), ext))
   787  		if err != nil {
   788  			r.log.Println("WARN: could not create siaPath:", err)
   789  			continue
   790  		}
   791  		var file *siafile.SiaFileSetEntry
   792  		if target == targetBackupChunks {
   793  			file, err = r.staticBackupFileSet.Open(siaPath)
   794  		} else {
   795  			file, err = r.staticFileSet.Open(siaPath)
   796  		}
   797  		if err != nil {
   798  			r.log.Println("WARN: could not open siafile:", err)
   799  			continue
   800  		}
   801  
   802  		// For stuck chunk repairs, check to see if file has stuck chunks
   803  		if target == targetStuckChunks && file.NumStuckChunks() == 0 {
   804  			// Close unneeded files
   805  			err := file.Close()
   806  			if err != nil {
   807  				r.log.Println("WARN: Could not close file:", err)
   808  			}
   809  			continue
   810  		}
   811  		// For normal repairs, ignore files that don't have any unstuck chunks
   812  		// or are healthy.
   813  		//
   814  		// We can used the cached value of health because it is updated during
   815  		// bubble. Since the repair loop operates off of the metadata
   816  		// information updated by bubble this cached health is accurate enough
   817  		// to use in order to determine if a file has any chunks that need
   818  		// repair
   819  		ignore := file.NumChunks() == file.NumStuckChunks() || file.Metadata().CachedHealth < RepairThreshold
   820  		if target == targetUnstuckChunks && ignore {
   821  			err := file.Close()
   822  			if err != nil {
   823  				r.log.Println("WARN: Could not close file:", err)
   824  			}
   825  			continue
   826  		}
   827  
   828  		files = append(files, file)
   829  	}
   830  
   831  	// Check if any files were selected from directory
   832  	if len(files) == 0 {
   833  		r.log.Debugln("No files pulled from `", dirSiaPath, "` to build the repair heap")
   834  		return
   835  	}
   836  
   837  	// If there are more files than there is room in the heap, sort the files by
   838  	// health and only use the required number of files to build the heap. In
   839  	// the absolute worst case, each file will be only contributing one chunk to
   840  	// the heap, so this shortcut will not be missing any important chunks. This
   841  	// shortcut will also not be used for directories that have fewer than
   842  	// 'maxUploadHeapChunks' files in them, minimzing the impact of this code in
   843  	// the typical case.
   844  	//
   845  	// This check only applies to normal repairs. Stuck repairs have their own
   846  	// way of managing the number of chunks added to the heap and backup chunks
   847  	// should always be added.
   848  	//
   849  	// v1.4.1 Benchmark: on a computer with an SSD, the time to sort 6,000 files
   850  	// is less than 50 milliseconds, while the time to process 250 files with 40
   851  	// chunks each using 'managedBuildAndPushChunks' is several seconds. Even in
   852  	// the worst case, where we are sorting 251 files with 1 chunk each, there
   853  	// is not much slowdown compared to skipping the sort, because the sort is
   854  	// so fast.
   855  	if len(files) > maxUploadHeapChunks && target == targetUnstuckChunks {
   856  		// Sort so that the highest health chunks will be first in the array.
   857  		// Higher health values equal worse health for the file, and we want to
   858  		// focus on the worst files.
   859  		sort.Slice(files, func(i, j int) bool {
   860  			return files[i].Metadata().CachedHealth > files[j].Metadata().CachedHealth
   861  		})
   862  		for i := maxUploadHeapChunks; i < len(files); i++ {
   863  			err := files[i].Close()
   864  			if err != nil {
   865  				r.log.Println("WARN: Could not close file:", err)
   866  			}
   867  		}
   868  		files = files[:maxUploadHeapChunks]
   869  	}
   870  
   871  	// Build the unfinished upload chunks and add them to the upload heap
   872  	offline, goodForRenew, _ := r.managedContractUtilityMaps()
   873  	switch target {
   874  	case targetBackupChunks:
   875  		r.log.Debugln("Attempting to add backup chunks to heap")
   876  		r.managedBuildAndPushChunks(files, hosts, target, offline, goodForRenew)
   877  	case targetStuckChunks:
   878  		r.log.Debugln("Attempting to add stuck chunk to heap")
   879  		r.managedBuildAndPushRandomChunk(files, maxStuckChunksInHeap, hosts, target, offline, goodForRenew)
   880  	case targetUnstuckChunks:
   881  		r.log.Debugln("Attempting to add chunks to heap")
   882  		r.managedBuildAndPushChunks(files, hosts, target, offline, goodForRenew)
   883  	default:
   884  		r.log.Println("WARN: repair target not recognized", target)
   885  	}
   886  
   887  	// Close all files
   888  	for _, file := range files {
   889  		err := file.Close()
   890  		if err != nil {
   891  			r.log.Println("WARN: Could not close file:", err)
   892  		}
   893  	}
   894  }
   895  
   896  // managedPrepareNextChunk takes the next chunk from the chunk heap and prepares
   897  // it for upload. Preparation includes blocking until enough memory is
   898  // available, fetching the logical data for the chunk (either from the disk or
   899  // from the network), erasure coding the logical data into the physical data,
   900  // and then finally passing the work onto the workers.
   901  func (r *Renter) managedPrepareNextChunk(uuc *unfinishedUploadChunk, hosts map[string]struct{}) error {
   902  	// Grab the next chunk, loop until we have enough memory, update the amount
   903  	// of memory available, and then spin up a thread to asynchronously handle
   904  	// the rest of the chunk tasks.
   905  	if !r.memoryManager.Request(uuc.memoryNeeded, memoryPriorityLow) {
   906  		return errors.New("couldn't request memory")
   907  	}
   908  	// Fetch the chunk in a separate goroutine, as it can take a long time and
   909  	// does not need to bottleneck the repair loop.
   910  	go r.threadedFetchAndRepairChunk(uuc)
   911  	return nil
   912  }
   913  
   914  // managedRefreshHostsAndWorkers will reset the set of hosts and the set of
   915  // workers for the renter.
   916  //
   917  // TODO: This function can be removed entirely if the worker pool is made to
   918  // keep a list of hosts. Then instead of passing around the hosts as a parameter
   919  // the cached value in the worker pool can be used instead. Using the cached
   920  // value in the worker pool is more accurate anyway because the hosts field will
   921  // match the set of workers that we have. Doing it the current way means there
   922  // can be drift between the set of workers and the set of hosts we are using to
   923  // build out the chunk heap.
   924  func (r *Renter) managedRefreshHostsAndWorkers() map[string]struct{} {
   925  	// Grab the current set of contracts and use them to build a list of hosts
   926  	// that are currently active. The hosts are assembled into a map where the
   927  	// key is the String() representation of the host's SiaPublicKey.
   928  	//
   929  	// TODO / NOTE: This code can be removed once files store the HostPubKey
   930  	// of the hosts they are using, instead of just the FileContractID.
   931  	currentContracts := r.hostContractor.Contracts()
   932  	hosts := make(map[string]struct{})
   933  	for _, contract := range currentContracts {
   934  		hosts[contract.HostPublicKey.String()] = struct{}{}
   935  	}
   936  	// Refresh the worker pool as well.
   937  	r.staticWorkerPool.callUpdate()
   938  	return hosts
   939  }
   940  
   941  // managedRepairLoop works through the uploadheap repairing chunks. The repair
   942  // loop will continue until the renter stops, there are no more chunks, or the
   943  // number of chunks in the uploadheap has dropped below the minUploadHeapSize
   944  func (r *Renter) managedRepairLoop(hosts map[string]struct{}) error {
   945  	// smallRepair indicates whether or not the repair loop should process all
   946  	// of the chunks in the heap instead of just processing down to the minimum
   947  	// heap size. We want to process all of the chunks if the rest of the
   948  	// directory heap is in good health and there are no more chunks that could
   949  	// be added to the heap.
   950  	smallRepair := r.directoryHeap.managedPeekHealth() < RepairThreshold
   951  
   952  	// Limit the amount of time spent in each iteration of the repair loop so
   953  	// that changes to the directory heap take effect sooner rather than later.
   954  	repairBreakTime := time.Now().Add(maxRepairLoopTime)
   955  
   956  	// Work through the heap repairing chunks until heap is empty for
   957  	// smallRepairs or heap drops below minUploadHeapSize for larger repairs, or
   958  	// until the total amount of time spent in one repair iteration has elapsed.
   959  	for r.uploadHeap.managedLen() >= minUploadHeapSize || smallRepair || time.Now().After(repairBreakTime) {
   960  		select {
   961  		case <-r.tg.StopChan():
   962  			// Return if the renter has shut down.
   963  			return errors.New("Repair loop interrupted because renter is shutting down")
   964  		default:
   965  		}
   966  
   967  		// Return if the renter is not online.
   968  		if !r.g.Online() {
   969  			return errors.New("repair loop returned early due to the renter been offline")
   970  		}
   971  
   972  		// Check if there is work by trying to pop off the next chunk from the
   973  		// heap.
   974  		nextChunk := r.uploadHeap.managedPop()
   975  		if nextChunk == nil {
   976  			// The heap is empty so reset it to free memory and return.
   977  			r.uploadHeap.managedReset()
   978  			return nil
   979  		}
   980  
   981  		// Make sure we have enough workers for this chunk to reach minimum
   982  		// redundancy.
   983  		r.staticWorkerPool.mu.RLock()
   984  		availableWorkers := len(r.staticWorkerPool.workers)
   985  		r.staticWorkerPool.mu.RUnlock()
   986  		if availableWorkers < nextChunk.minimumPieces {
   987  			// If the chunk is not stuck, check whether there are enough hosts
   988  			// in the allowance to support the chunk.
   989  			if !nextChunk.stuck {
   990  				// There are not enough available workers for the chunk to reach
   991  				// minimum redundancy. Check if the allowance has enough hosts
   992  				// for the chunk to reach minimum redundancy
   993  				allowance := r.hostContractor.Allowance()
   994  				if allowance.Hosts < uint64(nextChunk.minimumPieces) {
   995  					// There are not enough hosts in the allowance for this
   996  					// chunk to reach minimum redundancy. Log an error, set the
   997  					// chunk as stuck, and close the file
   998  					r.log.Printf("WARN: allownace had insufficient hosts for chunk to reach minimum redundancy, have %v need %v for chunk %v", allowance.Hosts, nextChunk.minimumPieces, nextChunk.id)
   999  					err := nextChunk.fileEntry.SetStuck(nextChunk.index, true)
  1000  					if err != nil {
  1001  						r.log.Debugln("WARN: unable to mark chunk as stuck:", err, nextChunk.id)
  1002  					}
  1003  				}
  1004  			}
  1005  
  1006  			// There are enough hosts set in the allowance so this is a
  1007  			// temporary issue with available workers, just ignore the chunk
  1008  			// for now and close the file
  1009  			err := nextChunk.fileEntry.Close()
  1010  			if err != nil {
  1011  				r.log.Debugln("WARN: unable to close file:", err, nextChunk.fileEntry.SiaFilePath())
  1012  			}
  1013  			// Remove the chunk from the repairingChunks map
  1014  			r.uploadHeap.managedMarkRepairDone(nextChunk.id)
  1015  			continue
  1016  		}
  1017  
  1018  		// Perform the work. managedPrepareNextChunk will block until
  1019  		// enough memory is available to perform the work, slowing this
  1020  		// thread down to using only the resources that are available.
  1021  		err := r.managedPrepareNextChunk(nextChunk, hosts)
  1022  		if err != nil {
  1023  			// An error was return which means the renter was unable to allocate
  1024  			// memory for the repair. Since that is not an issue with the file
  1025  			// we will just close the chunk file entry instead of marking it as
  1026  			// stuck
  1027  			r.log.Debugln("WARN: unable to prepare next chunk without issues", err, nextChunk.id)
  1028  			err = nextChunk.fileEntry.Close()
  1029  			if err != nil {
  1030  				r.log.Debugln("WARN: unable to close file:", err, nextChunk.fileEntry.SiaFilePath())
  1031  			}
  1032  			// Remove the chunk from the repairingChunks map
  1033  			r.uploadHeap.managedMarkRepairDone(nextChunk.id)
  1034  			continue
  1035  		}
  1036  	}
  1037  	return nil
  1038  }
  1039  
  1040  // threadedUploadAndRepair is a background thread that maintains a queue of
  1041  // chunks to repair. This thread attempts to prioritize repairing files and
  1042  // chunks with the lowest health, and attempts to keep heavy throughput
  1043  // sustained for data upload as long as there is at least one chunk in need of
  1044  // upload or repair.
  1045  func (r *Renter) threadedUploadAndRepair() {
  1046  	err := r.tg.Add()
  1047  	if err != nil {
  1048  		return
  1049  	}
  1050  	defer r.tg.Done()
  1051  
  1052  	// Perpetual loop to scan for more files and add chunks to the uploadheap.
  1053  	// The loop assumes that the heap has already been initialized (either at
  1054  	// startup, or after sleeping) and does checks to see whether there is any
  1055  	// work required. If there is not any work required, the loop will sleep
  1056  	// until woken up. If there is work required, the loop will begin to process
  1057  	// the chunks and directories in the repair heaps.
  1058  	//
  1059  	// After 'repairLoopResetFrequency', the repair loop will be reset. This
  1060  	// adds a layer of robustness in case the repair loop gets stuck or can't
  1061  	// work through the full heap quickly because the user keeps uploading new
  1062  	// files and keeping a minimum number of chunks in the repair heap.
  1063  	resetTime := time.Now().Add(repairLoopResetFrequency)
  1064  	for {
  1065  		// Return if the renter has shut down.
  1066  		select {
  1067  		case <-r.tg.StopChan():
  1068  			return
  1069  		default:
  1070  		}
  1071  
  1072  		// Wait until the renter is online to proceed. This function will return
  1073  		// 'false' if the renter has shut down before being online.
  1074  		if !r.managedBlockUntilOnline() {
  1075  			return
  1076  		}
  1077  		// Refresh the worker set.
  1078  		hosts := r.managedRefreshHostsAndWorkers()
  1079  
  1080  		// If enough time has elapsed to trigger a directory reset, reset the
  1081  		// directory.
  1082  		if time.Now().After(resetTime) {
  1083  			resetTime = time.Now().Add(repairLoopResetFrequency)
  1084  			r.directoryHeap.managedReset()
  1085  			err = r.managedPushUnexploredDirectory(modules.RootSiaPath())
  1086  			if err != nil {
  1087  				r.log.Println("WARN: error re-initializing the directory heap:", err)
  1088  			}
  1089  		}
  1090  
  1091  		// Add any chunks from the backup heap that need to be repaired. This
  1092  		// needs to be handled separately because currently the filesystem for
  1093  		// storing system files and chunks such as those related to snapshot
  1094  		// backups is different from the siafileset that stores non-system files
  1095  		// and chunks.
  1096  		heapLen := r.uploadHeap.managedLen()
  1097  		r.managedBuildChunkHeap(modules.RootSiaPath(), hosts, targetBackupChunks)
  1098  		numBackupchunks := r.uploadHeap.managedLen() - heapLen
  1099  		if numBackupchunks > 0 {
  1100  			r.log.Println("Added", numBackupchunks, "backup chunks to the upload heap")
  1101  		}
  1102  
  1103  		// Check if there is work to do. If the filesystem is healthy and the
  1104  		// heap is empty, there is no work to do and the thread should block
  1105  		// until there is work to do.
  1106  		if r.uploadHeap.managedLen() == 0 && r.directoryHeap.managedPeekHealth() < RepairThreshold {
  1107  			// TODO: This has a tiny window where it might be dumping out chunks
  1108  			// that need health, if the upload call is appending to the
  1109  			// directory heap because there is a new upload.
  1110  			//
  1111  			// I believe that a good fix for this would be to change the upload
  1112  			// heap so that it performs a rapid bubble before trying to insert
  1113  			// the chunks into the heap. Then, even if a reset is triggered,
  1114  			// because a rapid bubble has already completed updating the health
  1115  			// of the root dir, it will be considered fairly.
  1116  			r.directoryHeap.managedReset()
  1117  
  1118  			// If the file system is healthy then block until there is a new
  1119  			// upload or there is a repair that is needed.
  1120  			select {
  1121  			case <-r.uploadHeap.newUploads:
  1122  				r.log.Debugln("repair loop triggered by new upload channel")
  1123  			case <-r.uploadHeap.repairNeeded:
  1124  				r.log.Debugln("repair loop triggered by repair needed channel")
  1125  			case <-r.tg.StopChan():
  1126  				return
  1127  			}
  1128  
  1129  			err = r.managedPushUnexploredDirectory(modules.RootSiaPath())
  1130  			if err != nil {
  1131  				// If there is an error initializing the directory heap log
  1132  				// the error. We don't want to sleep here as we were trigger
  1133  				// to repair chunks so we don't want to delay the repair if
  1134  				// there are chunks in the upload heap already.
  1135  				r.log.Println("WARN: error re-initializing the directory heap:", err)
  1136  			}
  1137  
  1138  			// Continue here to force the code to re-check for backups, to
  1139  			// re-block until it's online, and to refresh the worker pool.
  1140  			continue
  1141  		}
  1142  
  1143  		// Add chunks to heap.
  1144  		dirSiaPaths := make(map[modules.SiaPath]struct{})
  1145  		dirSiaPaths, err = r.managedAddChunksToHeap(hosts)
  1146  		if err != nil {
  1147  			// Log the error but don't sleep as there are potentially chunks in
  1148  			// the heap from new uploads. If the heap is empty the next check
  1149  			// will catch that and handle it as an error
  1150  			r.log.Debugln("WARN: error adding chunks to the heap:", err)
  1151  		}
  1152  
  1153  		// There are benign edge cases where the heap will be empty after chunks
  1154  		// have been added. For example, if a chunk has gotten more healthy
  1155  		// since the last health check due to one of its hosts coming back
  1156  		// online. In these cases, the best course of action is to proceed with
  1157  		// the repair and move on to the next directories in the directory heap.
  1158  		// The repair loop will return immediately if it is given little or no
  1159  		// work but it can see that there is more work that it could be given.
  1160  
  1161  		r.log.Debugln("Executing an upload and repair cycle, uploadHeap has", r.uploadHeap.managedLen(), "chunks in it")
  1162  		err = r.managedRepairLoop(hosts)
  1163  		if err != nil {
  1164  			// If there was an error with the repair loop sleep for a little bit
  1165  			// and then try again. Here we do not skip to the next iteration as
  1166  			// we want to call bubble on the impacted directories
  1167  			r.log.Println("WARN: there was an error in the repair loop:", err)
  1168  			select {
  1169  			case <-time.After(uploadAndRepairErrorSleepDuration):
  1170  			case <-r.tg.StopChan():
  1171  				return
  1172  			}
  1173  		}
  1174  
  1175  		// Call threadedBubbleMetadata to update the filesystem.
  1176  		for dirSiaPath := range dirSiaPaths {
  1177  			// We call bubble in a go routine so that it is not a bottle neck
  1178  			// for the repair loop iterations. This however can lead to some
  1179  			// additional unneeded cycles of the repair loop as a result of when
  1180  			// these bubbles reach root. This cycles however will be handled and
  1181  			// can be seen in the logs.
  1182  			go r.threadedBubbleMetadata(dirSiaPath)
  1183  		}
  1184  	}
  1185  }