github.com/nebulouslabs/sia@v1.3.7/modules/renter/uploadheap.go (about)

     1  package renter
     2  
     3  // TODO / NOTE: Once the filesystem is tree-based, instead of continually
     4  // looping through the whole filesystem we can add values to the file metadata
     5  // for each folder + file, where the folder scan time is the least recent time
     6  // of any file in the folder, and the folder health is the lowest health of any
     7  // file in the folder. This will allow us to go one folder at a time and focus
     8  // on problem areas instead of doing everything all at once every iteration.
     9  // This should boost scalability.
    10  
    11  // TODO / NOTE: We need to upgrade the contractor before we can do this, but we
    12  // need to be checking for every piece within a contract, and checking that the
    13  // piece is still available in the contract that we have, that the host did not
    14  // lose or nullify the piece.
    15  
    16  // TODO: Renter will try to download to repair a piece even if there are not
    17  // enough workers to make any progress on the repair.  This should be fixed.
    18  
    19  import (
    20  	"container/heap"
    21  	"os"
    22  	"sync"
    23  	"time"
    24  
    25  	"github.com/NebulousLabs/Sia/build"
    26  	"github.com/NebulousLabs/Sia/crypto"
    27  	"github.com/NebulousLabs/Sia/types"
    28  )
    29  
    30  // uploadHeap contains a priority-sorted heap of all the chunks being uploaded
    31  // to the renter, along with some metadata.
    32  type uploadHeap struct {
    33  	// activeChunks contains a list of all the chunks actively being worked on.
    34  	// These chunks will either be in the heap, or will be in the queues of some
    35  	// of the workers. A chunk is added to the activeChunks map as soon as it is
    36  	// added to the uploadHeap, and it is removed from the map as soon as the
    37  	// last worker completes work on the chunk.
    38  	activeChunks map[uploadChunkID]struct{}
    39  	heap         uploadChunkHeap
    40  	newUploads   chan struct{}
    41  	mu           sync.Mutex
    42  }
    43  
    44  // uploadChunkHeap is a bunch of priority-sorted chunks that need to be either
    45  // uploaded or repaired.
    46  //
    47  // TODO: When the file system is adjusted to have a tree structure, the
    48  // filesystem itself will serve as the uploadChunkHeap, making this structure
    49  // unnecessary. The repair loop might be moved to repair.go.
    50  type uploadChunkHeap []*unfinishedUploadChunk
    51  
    52  // Implementation of heap.Interface for uploadChunkHeap.
    53  func (uch uploadChunkHeap) Len() int { return len(uch) }
    54  func (uch uploadChunkHeap) Less(i, j int) bool {
    55  	return float64(uch[i].piecesCompleted)/float64(uch[i].piecesNeeded) < float64(uch[j].piecesCompleted)/float64(uch[j].piecesNeeded)
    56  }
    57  func (uch uploadChunkHeap) Swap(i, j int)       { uch[i], uch[j] = uch[j], uch[i] }
    58  func (uch *uploadChunkHeap) Push(x interface{}) { *uch = append(*uch, x.(*unfinishedUploadChunk)) }
    59  func (uch *uploadChunkHeap) Pop() interface{} {
    60  	old := *uch
    61  	n := len(old)
    62  	x := old[n-1]
    63  	*uch = old[0 : n-1]
    64  	return x
    65  }
    66  
    67  // managedPush will add a chunk to the upload heap.
    68  func (uh *uploadHeap) managedPush(uuc *unfinishedUploadChunk) {
    69  	// Create the unique chunk id.
    70  	ucid := uploadChunkID{
    71  		fileUID: uuc.renterFile.staticUID,
    72  		index:   uuc.index,
    73  	}
    74  	// Sanity check: fileUID should not be the empty value.
    75  	if uuc.renterFile.staticUID == "" {
    76  		panic("empty string for file UID")
    77  	}
    78  
    79  	// Check whether this chunk is already being repaired. If not, add it to the
    80  	// upload chunk heap.
    81  	uh.mu.Lock()
    82  	_, exists := uh.activeChunks[ucid]
    83  	if !exists {
    84  		uh.activeChunks[ucid] = struct{}{}
    85  		uh.heap.Push(uuc)
    86  	}
    87  	uh.mu.Unlock()
    88  }
    89  
    90  // managedPop will pull a chunk off of the upload heap and return it.
    91  func (uh *uploadHeap) managedPop() (uc *unfinishedUploadChunk) {
    92  	uh.mu.Lock()
    93  	if len(uh.heap) > 0 {
    94  		uc = heap.Pop(&uh.heap).(*unfinishedUploadChunk)
    95  	}
    96  	uh.mu.Unlock()
    97  	return uc
    98  }
    99  
   100  // buildUnfinishedChunks will pull all of the unfinished chunks out of a file.
   101  //
   102  // TODO / NOTE: This code can be substantially simplified once the files store
   103  // the HostPubKey instead of the FileContractID, and can be simplified even
   104  // further once the layout is per-chunk instead of per-filecontract.
   105  func (r *Renter) buildUnfinishedChunks(f *file, hosts map[string]struct{}) []*unfinishedUploadChunk {
   106  	// Files are not threadsafe.
   107  	f.mu.Lock()
   108  	defer f.mu.Unlock()
   109  
   110  	// If the file is not being tracked, don't repair it.
   111  	trackedFile, exists := r.persist.Tracking[f.name]
   112  	if !exists {
   113  		return nil
   114  	}
   115  
   116  	// If we don't have enough workers for the file, don't repair it right now.
   117  	if len(r.workerPool) < f.erasureCode.MinPieces() {
   118  		return nil
   119  	}
   120  
   121  	// Assemble the set of chunks.
   122  	//
   123  	// TODO / NOTE: Future files may have a different method for determining the
   124  	// number of chunks. Changes will be made due to things like sparse files,
   125  	// and the fact that chunks are going to be different sizes.
   126  	chunkCount := f.numChunks()
   127  	newUnfinishedChunks := make([]*unfinishedUploadChunk, chunkCount)
   128  	for i := uint64(0); i < chunkCount; i++ {
   129  		newUnfinishedChunks[i] = &unfinishedUploadChunk{
   130  			renterFile: f,
   131  			localPath:  trackedFile.RepairPath,
   132  
   133  			id: uploadChunkID{
   134  				fileUID: f.staticUID,
   135  				index:   i,
   136  			},
   137  
   138  			index:  i,
   139  			length: f.staticChunkSize(),
   140  			offset: int64(i * f.staticChunkSize()),
   141  
   142  			// memoryNeeded has to also include the logical data, and also
   143  			// include the overhead for encryption.
   144  			//
   145  			// TODO / NOTE: If we adjust the file to have a flexible encryption
   146  			// scheme, we'll need to adjust the overhead stuff too.
   147  			//
   148  			// TODO: Currently we request memory for all of the pieces as well
   149  			// as the minimum pieces, but we perhaps don't need to request all
   150  			// of that.
   151  			memoryNeeded:  f.pieceSize*uint64(f.erasureCode.NumPieces()+f.erasureCode.MinPieces()) + uint64(f.erasureCode.NumPieces()*crypto.TwofishOverhead),
   152  			minimumPieces: f.erasureCode.MinPieces(),
   153  			piecesNeeded:  f.erasureCode.NumPieces(),
   154  
   155  			physicalChunkData: make([][]byte, f.erasureCode.NumPieces()),
   156  
   157  			pieceUsage:  make([]bool, f.erasureCode.NumPieces()),
   158  			unusedHosts: make(map[string]struct{}),
   159  		}
   160  		// Every chunk can have a different set of unused hosts.
   161  		for host := range hosts {
   162  			newUnfinishedChunks[i].unusedHosts[host] = struct{}{}
   163  		}
   164  	}
   165  
   166  	// Iterate through the contracts of the file and mark which hosts are
   167  	// already in use for the chunk. As you delete hosts from the 'unusedHosts'
   168  	// map, also increment the 'piecesCompleted' value.
   169  	saveFile := false
   170  	for fcid, fileContract := range f.contracts {
   171  		pk := r.hostContractor.ResolveIDToPubKey(fcid)
   172  		recentContract, exists := r.hostContractor.ContractByPublicKey(pk)
   173  		contractUtility, exists2 := r.hostContractor.ContractUtility(pk)
   174  		if exists != exists2 {
   175  			build.Critical("got a contract without utility or vice versa which shouldn't happen",
   176  				exists, exists2)
   177  		}
   178  		if !exists || !exists2 {
   179  			// File contract does not seem to be part of the host anymore.
   180  			// Delete this contract and mark the file to be saved.
   181  			delete(f.contracts, fcid)
   182  			saveFile = true
   183  			continue
   184  		}
   185  		if !contractUtility.GoodForRenew {
   186  			// We are no longer renewing with this contract, so it does not
   187  			// count for redundancy.
   188  			continue
   189  		}
   190  		hpk := recentContract.HostPublicKey
   191  
   192  		// Mark the chunk set based on the pieces in this contract.
   193  		for _, piece := range fileContract.Pieces {
   194  			_, exists := newUnfinishedChunks[piece.Chunk].unusedHosts[hpk.String()]
   195  			redundantPiece := newUnfinishedChunks[piece.Chunk].pieceUsage[piece.Piece]
   196  			if exists && !redundantPiece {
   197  				newUnfinishedChunks[piece.Chunk].pieceUsage[piece.Piece] = true
   198  				newUnfinishedChunks[piece.Chunk].piecesCompleted++
   199  				delete(newUnfinishedChunks[piece.Chunk].unusedHosts, hpk.String())
   200  			} else if exists {
   201  				// This host has a piece, but it is the same piece another host
   202  				// has. We should still remove the host from the unusedHosts
   203  				// since one host having multiple pieces of a chunk might lead
   204  				// to unexpected issues.
   205  				delete(newUnfinishedChunks[piece.Chunk].unusedHosts, hpk.String())
   206  			}
   207  		}
   208  	}
   209  	// If 'saveFile' is marked, it means we deleted some dead contracts and
   210  	// cleaned up the file a bit. Save the file to clean up some space on disk
   211  	// and prevent the same work from being repeated after the next restart.
   212  	//
   213  	// TODO / NOTE: This process isn't going to make sense anymore once we
   214  	// switch to chunk-based saving.
   215  	if saveFile {
   216  		err := r.saveFile(f)
   217  		if err != nil {
   218  			r.log.Println("error while saving a file after pruning some contracts from it:", err)
   219  		}
   220  	}
   221  
   222  	// Iterate through the set of newUnfinishedChunks and remove any that are
   223  	// completed.
   224  	incompleteChunks := newUnfinishedChunks[:0]
   225  	for i := 0; i < len(newUnfinishedChunks); i++ {
   226  		if newUnfinishedChunks[i].piecesCompleted < newUnfinishedChunks[i].piecesNeeded {
   227  			incompleteChunks = append(incompleteChunks, newUnfinishedChunks[i])
   228  		}
   229  	}
   230  	// TODO: Don't return chunks that can't be downloaded, uploaded or otherwise
   231  	// helped by the upload process.
   232  	return incompleteChunks
   233  }
   234  
   235  // managedBuildChunkHeap will iterate through all of the files in the renter and
   236  // construct a chunk heap.
   237  func (r *Renter) managedBuildChunkHeap(hosts map[string]struct{}) {
   238  	// Loop through the whole set of files and get a list of chunks to add to
   239  	// the heap.
   240  	id := r.mu.RLock()
   241  	goodForRenew := make(map[types.FileContractID]bool)
   242  	offline := make(map[types.FileContractID]bool)
   243  	for _, file := range r.files {
   244  		file.mu.RLock()
   245  		for cid := range file.contracts {
   246  			resolvedID := r.hostContractor.ResolveIDToPubKey(cid)
   247  			cu, ok := r.hostContractor.ContractUtility(resolvedID)
   248  			goodForRenew[cid] = ok && cu.GoodForRenew
   249  			offline[cid] = r.hostContractor.IsOffline(resolvedID)
   250  		}
   251  		file.mu.RUnlock()
   252  
   253  		unfinishedUploadChunks := r.buildUnfinishedChunks(file, hosts)
   254  		for i := 0; i < len(unfinishedUploadChunks); i++ {
   255  			r.uploadHeap.managedPush(unfinishedUploadChunks[i])
   256  		}
   257  	}
   258  	for _, file := range r.files {
   259  		file.mu.RLock()
   260  		// check for local file
   261  		tf, exists := r.persist.Tracking[file.name]
   262  		if exists {
   263  			// Check if local file is missing and redundancy is less than 1
   264  			// log warning to renter log
   265  			if _, err := os.Stat(tf.RepairPath); os.IsNotExist(err) && file.redundancy(offline, goodForRenew) < 1 {
   266  				r.log.Println("File not found on disk and possibly unrecoverable:", tf.RepairPath)
   267  			}
   268  		}
   269  		file.mu.RUnlock()
   270  	}
   271  	r.mu.RUnlock(id)
   272  }
   273  
   274  // managedPrepareNextChunk takes the next chunk from the chunk heap and prepares
   275  // it for upload. Preparation includes blocking until enough memory is
   276  // available, fetching the logical data for the chunk (either from the disk or
   277  // from the network), erasure coding the logical data into the physical data,
   278  // and then finally passing the work onto the workers.
   279  func (r *Renter) managedPrepareNextChunk(uuc *unfinishedUploadChunk, hosts map[string]struct{}) {
   280  	// Grab the next chunk, loop until we have enough memory, update the amount
   281  	// of memory available, and then spin up a thread to asynchronously handle
   282  	// the rest of the chunk tasks.
   283  	if !r.memoryManager.Request(uuc.memoryNeeded, memoryPriorityLow) {
   284  		return
   285  	}
   286  	// Fetch the chunk in a separate goroutine, as it can take a long time and
   287  	// does not need to bottleneck the repair loop.
   288  	go r.managedFetchAndRepairChunk(uuc)
   289  }
   290  
   291  // managedRefreshHostsAndWorkers will reset the set of hosts and the set of
   292  // workers for the renter.
   293  func (r *Renter) managedRefreshHostsAndWorkers() map[string]struct{} {
   294  	// Grab the current set of contracts and use them to build a list of hosts
   295  	// that are currently active. The hosts are assembled into a map where the
   296  	// key is the String() representation of the host's SiaPublicKey.
   297  	//
   298  	// TODO / NOTE: This code can be removed once files store the HostPubKey
   299  	// of the hosts they are using, instead of just the FileContractID.
   300  	currentContracts := r.hostContractor.Contracts()
   301  	hosts := make(map[string]struct{})
   302  	for _, contract := range currentContracts {
   303  		hosts[contract.HostPublicKey.String()] = struct{}{}
   304  	}
   305  	// Refresh the worker pool as well.
   306  	r.managedUpdateWorkerPool()
   307  	return hosts
   308  }
   309  
   310  // threadedUploadLoop is a background thread that checks on the health of files,
   311  // tracking the least healthy files and queuing the worst ones for repair.
   312  func (r *Renter) threadedUploadLoop() {
   313  	err := r.tg.Add()
   314  	if err != nil {
   315  		return
   316  	}
   317  	defer r.tg.Done()
   318  
   319  	for {
   320  		// Wait until the renter is online to proceed.
   321  		if !r.managedBlockUntilOnline() {
   322  			// The renter shut down before the internet connection was restored.
   323  			return
   324  		}
   325  
   326  		// Refresh the worker pool and get the set of hosts that are currently
   327  		// useful for uploading.
   328  		hosts := r.managedRefreshHostsAndWorkers()
   329  
   330  		// Build a min-heap of chunks organized by upload progress.
   331  		//
   332  		// TODO: After replacing the filesystem to resemble a tree, we'll be
   333  		// able to go through the filesystem piecewise instead of doing
   334  		// everything all at once.
   335  		r.managedBuildChunkHeap(hosts)
   336  		r.uploadHeap.mu.Lock()
   337  		heapLen := r.uploadHeap.heap.Len()
   338  		r.uploadHeap.mu.Unlock()
   339  		r.log.Println("Repairing", heapLen, "chunks")
   340  
   341  		// Work through the heap. Chunks will be processed one at a time until
   342  		// the heap is whittled down. When the heap is empty, we wait for new
   343  		// files in a loop and then process those. When the rebuild signal is
   344  		// received, we start over with the outer loop that rebuilds the heap
   345  		// and re-checks the health of all the files.
   346  		rebuildHeapSignal := time.After(rebuildChunkHeapInterval)
   347  		for {
   348  			// Return if the renter has shut down.
   349  			select {
   350  			case <-r.tg.StopChan():
   351  				return
   352  			default:
   353  			}
   354  
   355  			// Break to the outer loop if not online.
   356  			if !r.g.Online() {
   357  				break
   358  			}
   359  
   360  			// Check if there is work by trying to pop of the next chunk from
   361  			// the heap.
   362  			nextChunk := r.uploadHeap.managedPop()
   363  			if nextChunk == nil {
   364  				break
   365  			}
   366  
   367  			// Make sure we have enough workers for this chunk to reach minimum
   368  			// redundancy. Otherwise we ignore this chunk for now and try again
   369  			// the next time we rebuild the heap and refresh the workers.
   370  			id := r.mu.RLock()
   371  			availableWorkers := len(r.workerPool)
   372  			r.mu.RUnlock(id)
   373  			if availableWorkers < nextChunk.minimumPieces {
   374  				continue
   375  			}
   376  
   377  			// Perform the work. managedPrepareNextChunk will block until
   378  			// enough memory is available to perform the work, slowing this
   379  			// thread down to using only the resources that are available.
   380  			r.managedPrepareNextChunk(nextChunk, hosts)
   381  			continue
   382  		}
   383  
   384  		// Block until new work is required.
   385  		select {
   386  		case <-r.uploadHeap.newUploads:
   387  			// User has uploaded a new file.
   388  		case <-rebuildHeapSignal:
   389  			// Time to check the filesystem health again.
   390  		case <-r.tg.StopChan():
   391  			// The renter has shut down.
   392  			return
   393  		}
   394  	}
   395  }