gitlab.com/jokerrs1/Sia@v1.3.2/modules/renter/uploadheap.go (about)

     1  package renter
     2  
     3  // TODO / NOTE: Once the filesystem is tree-based, instead of continually
     4  // looping through the whole filesystem we can add values to the file metadata
     5  // for each folder + file, where the folder scan time is the least recent time
     6  // of any file in the folder, and the folder health is the lowest health of any
     7  // file in the folder. This will allow us to go one folder at a time and focus
     8  // on problem areas instead of doing everything all at once every iteration.
     9  // This should boost scalability.
    10  
    11  // TODO / NOTE: We need to upgrade the contractor before we can do this, but we
    12  // need to be checking for every piece within a contract, and checking that the
    13  // piece is still available in the contract that we have, that the host did not
    14  // lose or nullify the piece.
    15  
    16  // TODO: Renter will try to download to repair a piece even if there are not
    17  // enough workers to make any progress on the repair.  This should be fixed.
    18  
    19  import (
    20  	"container/heap"
    21  	"sync"
    22  	"time"
    23  
    24  	"github.com/NebulousLabs/Sia/crypto"
    25  )
    26  
    27  // uploadHeap contains a priority-sorted heap of all the chunks being uploaded
    28  // to the renter, along with some metadata.
    29  type uploadHeap struct {
    30  	// activeChunks contains a list of all the chunks actively being worked on.
    31  	// These chunks will either be in the heap, or will be in the queues of some
    32  	// of the workers. A chunk is added to the activeChunks map as soon as it is
    33  	// added to the uploadHeap, and it is removed from the map as soon as the
    34  	// last worker completes work on the chunk.
    35  	activeChunks map[uploadChunkID]struct{}
    36  	heap         uploadChunkHeap
    37  	newUploads   chan struct{}
    38  	mu           sync.Mutex
    39  }
    40  
    41  // uploadChunkHeap is a bunch of priority-sorted chunks that need to be either
    42  // uploaded or repaired.
    43  //
    44  // TODO: When the file system is adjusted to have a tree structure, the
    45  // filesystem itself will serve as the uploadChunkHeap, making this structure
    46  // unnecessary. The repair loop might be moved to repair.go.
    47  type uploadChunkHeap []*unfinishedUploadChunk
    48  
    49  // Implementation of heap.Interface for uploadChunkHeap.
    50  func (uch uploadChunkHeap) Len() int { return len(uch) }
    51  func (uch uploadChunkHeap) Less(i, j int) bool {
    52  	return float64(uch[i].piecesCompleted)/float64(uch[i].piecesNeeded) < float64(uch[j].piecesCompleted)/float64(uch[j].piecesNeeded)
    53  }
    54  func (uch uploadChunkHeap) Swap(i, j int)       { uch[i], uch[j] = uch[j], uch[i] }
    55  func (uch *uploadChunkHeap) Push(x interface{}) { *uch = append(*uch, x.(*unfinishedUploadChunk)) }
    56  func (uch *uploadChunkHeap) Pop() interface{} {
    57  	old := *uch
    58  	n := len(old)
    59  	x := old[n-1]
    60  	*uch = old[0 : n-1]
    61  	return x
    62  }
    63  
    64  // managedAddChunkToUploadHeap will add a chunk to the upload heap.
    65  func (uh *uploadHeap) managedPush(uuc *unfinishedUploadChunk) {
    66  	// Create the unique chunk id.
    67  	ucid := uploadChunkID{
    68  		fileUID: uuc.renterFile.staticUID,
    69  		index:   uuc.index,
    70  	}
    71  	// Sanity check: fileUID should not be the empty value.
    72  	if uuc.renterFile.staticUID == "" {
    73  		panic("empty string for file UID")
    74  	}
    75  
    76  	// Check whether this chunk is already being repaired. If not, add it to the
    77  	// upload chunk heap.
    78  	uh.mu.Lock()
    79  	_, exists := uh.activeChunks[ucid]
    80  	if !exists {
    81  		uh.activeChunks[ucid] = struct{}{}
    82  		uh.heap.Push(uuc)
    83  	}
    84  	uh.mu.Unlock()
    85  }
    86  
    87  // managedPop will pull a chunk off of the upload heap and return it.
    88  func (uh *uploadHeap) managedPop() (uc *unfinishedUploadChunk) {
    89  	uh.mu.Lock()
    90  	if len(uh.heap) > 0 {
    91  		uc = heap.Pop(&uh.heap).(*unfinishedUploadChunk)
    92  	}
    93  	uh.mu.Unlock()
    94  	return uc
    95  }
    96  
    97  // buildUnfinishedChunks will pull all of the unfinished chunks out of a file.
    98  //
    99  // TODO / NOTE: This code can be substantially simplified once the files store
   100  // the HostPubKey instead of the FileContractID, and can be simplified even
   101  // further once the layout is per-chunk instead of per-filecontract.
   102  func (r *Renter) buildUnfinishedChunks(f *file, hosts map[string]struct{}) []*unfinishedUploadChunk {
   103  	// Files are not threadsafe.
   104  	f.mu.Lock()
   105  	defer f.mu.Unlock()
   106  
   107  	// If the file is not being tracked, don't repair it.
   108  	trackedFile, exists := r.tracking[f.name]
   109  	if !exists {
   110  		return nil
   111  	}
   112  
   113  	// Assemble the set of chunks.
   114  	//
   115  	// TODO / NOTE: Future files may have a different method for determining the
   116  	// number of chunks. Changes will be made due to things like sparse files,
   117  	// and the fact that chunks are going to be different sizes.
   118  	chunkCount := f.numChunks()
   119  	newUnfinishedChunks := make([]*unfinishedUploadChunk, chunkCount)
   120  	for i := uint64(0); i < chunkCount; i++ {
   121  		newUnfinishedChunks[i] = &unfinishedUploadChunk{
   122  			renterFile: f,
   123  			localPath:  trackedFile.RepairPath,
   124  
   125  			id: uploadChunkID{
   126  				fileUID: f.staticUID,
   127  				index:   i,
   128  			},
   129  
   130  			index:  i,
   131  			length: f.staticChunkSize(),
   132  			offset: int64(i * f.staticChunkSize()),
   133  
   134  			// memoryNeeded has to also include the logical data, and also
   135  			// include the overhead for encryption.
   136  			//
   137  			// TODO / NOTE: If we adjust the file to have a flexible encryption
   138  			// scheme, we'll need to adjust the overhead stuff too.
   139  			//
   140  			// TODO: Currently we request memory for all of the pieces as well
   141  			// as the minimum pieces, but we perhaps don't need to request all
   142  			// of that.
   143  			memoryNeeded:  f.pieceSize*uint64(f.erasureCode.NumPieces()+f.erasureCode.MinPieces()) + uint64(f.erasureCode.NumPieces()*crypto.TwofishOverhead),
   144  			minimumPieces: f.erasureCode.MinPieces(),
   145  			piecesNeeded:  f.erasureCode.NumPieces(),
   146  
   147  			physicalChunkData: make([][]byte, f.erasureCode.NumPieces()),
   148  
   149  			pieceUsage:  make([]bool, f.erasureCode.NumPieces()),
   150  			unusedHosts: make(map[string]struct{}),
   151  		}
   152  		// Every chunk can have a different set of unused hosts.
   153  		for host := range hosts {
   154  			newUnfinishedChunks[i].unusedHosts[host] = struct{}{}
   155  		}
   156  	}
   157  
   158  	// Iterate through the contracts of the file and mark which hosts are
   159  	// already in use for the chunk. As you delete hosts from the 'unusedHosts'
   160  	// map, also increment the 'piecesCompleted' value.
   161  	saveFile := false
   162  	for fcid, fileContract := range f.contracts {
   163  		recentContract, exists := r.hostContractor.ContractByID(fcid)
   164  		contractUtility, exists2 := r.hostContractor.ContractUtility(fcid)
   165  		if !exists || !exists2 {
   166  			// File contract does not seem to be part of the host anymore.
   167  			// Delete this contract and mark the file to be saved.
   168  			delete(f.contracts, fcid)
   169  			saveFile = true
   170  			continue
   171  		}
   172  		if !contractUtility.GoodForRenew {
   173  			// We are no longer renewing with this contract, so it does not
   174  			// count for redundancy.
   175  			continue
   176  		}
   177  		hpk := recentContract.HostPublicKey
   178  
   179  		// Mark the chunk set based on the pieces in this contract.
   180  		for _, piece := range fileContract.Pieces {
   181  			_, exists := newUnfinishedChunks[piece.Chunk].unusedHosts[hpk.String()]
   182  			redundantPiece := newUnfinishedChunks[piece.Chunk].pieceUsage[piece.Piece]
   183  			if exists && !redundantPiece {
   184  				newUnfinishedChunks[piece.Chunk].pieceUsage[piece.Piece] = true
   185  				newUnfinishedChunks[piece.Chunk].piecesCompleted++
   186  				delete(newUnfinishedChunks[piece.Chunk].unusedHosts, hpk.String())
   187  			} else if exists {
   188  				// This host has a piece, but it is the same piece another host
   189  				// has. We should still remove the host from the unusedHosts
   190  				// since one host having multiple pieces of a chunk might lead
   191  				// to unexpected issues.
   192  				delete(newUnfinishedChunks[piece.Chunk].unusedHosts, hpk.String())
   193  			}
   194  		}
   195  	}
   196  	// If 'saveFile' is marked, it means we deleted some dead contracts and
   197  	// cleaned up the file a bit. Save the file to clean up some space on disk
   198  	// and prevent the same work from being repeated after the next restart.
   199  	//
   200  	// TODO / NOTE: This process isn't going to make sense anymore once we
   201  	// switch to chunk-based saving.
   202  	if saveFile {
   203  		err := r.saveFile(f)
   204  		if err != nil {
   205  			r.log.Println("error while saving a file after pruning some contracts from it:", err)
   206  		}
   207  	}
   208  
   209  	// Iterate through the set of newUnfinishedChunks and remove any that are
   210  	// completed.
   211  	incompleteChunks := newUnfinishedChunks[:0]
   212  	for i := 0; i < len(newUnfinishedChunks); i++ {
   213  		if newUnfinishedChunks[i].piecesCompleted < newUnfinishedChunks[i].piecesNeeded {
   214  			incompleteChunks = append(incompleteChunks, newUnfinishedChunks[i])
   215  		}
   216  	}
   217  	// TODO: Don't return chunks that can't be downloaded, uploaded or otherwise
   218  	// helped by the upload process.
   219  	return incompleteChunks
   220  }
   221  
   222  // managedBuildChunkHeap will iterate through all of the files in the renter and
   223  // construct a chunk heap.
   224  func (r *Renter) managedBuildChunkHeap(hosts map[string]struct{}) {
   225  	// Loop through the whole set of files and get a list of chunks to add to
   226  	// the heap.
   227  	id := r.mu.Lock()
   228  	for _, file := range r.files {
   229  		unfinishedUploadChunks := r.buildUnfinishedChunks(file, hosts)
   230  		for i := 0; i < len(unfinishedUploadChunks); i++ {
   231  			r.uploadHeap.managedPush(unfinishedUploadChunks[i])
   232  		}
   233  	}
   234  	r.mu.Unlock(id)
   235  }
   236  
   237  // managedPrepareNextChunk takes the next chunk from the chunk heap and prepares
   238  // it for upload. Preparation includes blocking until enough memory is
   239  // available, fetching the logical data for the chunk (either from the disk or
   240  // from the network), erasure coding the logical data into the physical data,
   241  // and then finally passing the work onto the workers.
   242  func (r *Renter) managedPrepareNextChunk(uuc *unfinishedUploadChunk, hosts map[string]struct{}) {
   243  	// Grab the next chunk, loop until we have enough memory, update the amount
   244  	// of memory available, and then spin up a thread to asynchronously handle
   245  	// the rest of the chunk tasks.
   246  	if !r.memoryManager.Request(uuc.memoryNeeded, memoryPriorityLow) {
   247  		return
   248  	}
   249  	// Fetch the chunk in a separate goroutine, as it can take a long time and
   250  	// does not need to bottleneck the repair loop.
   251  	go r.managedFetchAndRepairChunk(uuc)
   252  }
   253  
   254  // managedRefreshHostsAndWorkers will reset the set of hosts and the set of
   255  // workers for the renter.
   256  func (r *Renter) managedRefreshHostsAndWorkers() map[string]struct{} {
   257  	// Grab the current set of contracts and use them to build a list of hosts
   258  	// that are currently active. The hosts are assembled into a map where the
   259  	// key is the String() representation of the host's SiaPublicKey.
   260  	//
   261  	// TODO / NOTE: This code can be removed once files store the HostPubKey
   262  	// of the hosts they are using, instead of just the FileContractID.
   263  	currentContracts := r.hostContractor.Contracts()
   264  	hosts := make(map[string]struct{})
   265  	for _, contract := range currentContracts {
   266  		hosts[contract.HostPublicKey.String()] = struct{}{}
   267  	}
   268  	// Refresh the worker pool as well.
   269  	r.managedUpdateWorkerPool()
   270  	return hosts
   271  }
   272  
   273  // threadedUploadLoop is a background thread that checks on the health of files,
   274  // tracking the least healthy files and queuing the worst ones for repair.
   275  func (r *Renter) threadedUploadLoop() {
   276  	err := r.tg.Add()
   277  	if err != nil {
   278  		return
   279  	}
   280  	defer r.tg.Done()
   281  
   282  	for {
   283  		// Wait until the renter is online to proceed.
   284  		if !r.managedBlockUntilOnline() {
   285  			// The renter shut down before the internet connection was restored.
   286  			return
   287  		}
   288  
   289  		// Refresh the worker pool and get the set of hosts that are currently
   290  		// useful for uploading.
   291  		hosts := r.managedRefreshHostsAndWorkers()
   292  
   293  		// Build a min-heap of chunks organized by upload progress.
   294  		//
   295  		// TODO: After replacing the filesystem to resemble a tree, we'll be
   296  		// able to go through the filesystem piecewise instead of doing
   297  		// everything all at once.
   298  		r.managedBuildChunkHeap(hosts)
   299  		r.uploadHeap.mu.Lock()
   300  		heapLen := r.uploadHeap.heap.Len()
   301  		r.uploadHeap.mu.Unlock()
   302  		r.log.Println("Repairing", heapLen, "chunks")
   303  
   304  		// Work through the heap. Chunks will be processed one at a time until
   305  		// the heap is whittled down. When the heap is empty, we wait for new
   306  		// files in a loop and then process those. When the rebuild signal is
   307  		// received, we start over with the outer loop that rebuilds the heap
   308  		// and re-checks the health of all the files.
   309  		rebuildHeapSignal := time.After(rebuildChunkHeapInterval)
   310  		for {
   311  			// Return if the renter has shut down.
   312  			select {
   313  			case <-r.tg.StopChan():
   314  				return
   315  			default:
   316  			}
   317  
   318  			// Break to the outer loop if not online.
   319  			if !r.g.Online() {
   320  				break
   321  			}
   322  
   323  			// If there is work to do, perform the work. managedPrepareNextChunk
   324  			// will block until enough memory is available to perform the work,
   325  			// slowing this thread down to using only the resources that are
   326  			// available.
   327  			nextChunk := r.uploadHeap.managedPop()
   328  			if nextChunk != nil {
   329  				r.managedPrepareNextChunk(nextChunk, hosts)
   330  				continue
   331  			}
   332  			break
   333  		}
   334  
   335  		// Block until new work is required.
   336  		select {
   337  		case <-r.uploadHeap.newUploads:
   338  			// User has uploaded a new file.
   339  		case <-rebuildHeapSignal:
   340  			// Time to check the filesystem health again.
   341  		case <-r.tg.StopChan():
   342  			// Thre renter has shut down.
   343  			return
   344  		}
   345  	}
   346  }