github.com/NebulousLabs/Sia@v1.3.7/modules/renter/uploadchunk.go (about)

     1  package renter
     2  
     3  import (
     4  	"io"
     5  	"os"
     6  	"sync"
     7  
     8  	"github.com/NebulousLabs/Sia/crypto"
     9  
    10  	"github.com/NebulousLabs/errors"
    11  )
    12  
    13  // uploadChunkID is a unique identifier for each chunk in the renter.
    14  type uploadChunkID struct {
    15  	fileUID string // Unique to each file.
    16  	index   uint64 // Unique to each chunk within a file.
    17  }
    18  
    19  // unfinishedUploadChunk contains a chunk from the filesystem that has not
    20  // finished uploading, including knowledge of the progress.
    21  type unfinishedUploadChunk struct {
    22  	// Information about the file. localPath may be the empty string if the file
    23  	// is known not to exist locally.
    24  	id         uploadChunkID
    25  	localPath  string
    26  	renterFile *file
    27  
    28  	// Information about the chunk, namely where it exists within the file.
    29  	//
    30  	// TODO / NOTE: As we change the file mapper, we're probably going to have
    31  	// to update these fields. Compatibility shouldn't be an issue because this
    32  	// struct is not persisted anywhere, it's always built from other
    33  	// structures.
    34  	index          uint64
    35  	length         uint64
    36  	memoryNeeded   uint64 // memory needed in bytes
    37  	memoryReleased uint64 // memory that has been returned of memoryNeeded
    38  	minimumPieces  int    // number of pieces required to recover the file.
    39  	offset         int64  // Offset of the chunk within the file.
    40  	piecesNeeded   int    // number of pieces to achieve a 100% complete upload
    41  
    42  	// The logical data is the data that is presented to the user when the user
    43  	// requests the chunk. The physical data is all of the pieces that get
    44  	// stored across the network.
    45  	logicalChunkData  [][]byte
    46  	physicalChunkData [][]byte
    47  
    48  	// Worker synchronization fields. The mutex only protects these fields.
    49  	//
    50  	// When a worker passes over a piece for upload to go on standby:
    51  	//	+ the worker should add itself to the list of standby chunks
    52  	//  + the worker should call for memory to be released
    53  	//
    54  	// When a worker passes over a piece because it's not useful:
    55  	//	+ the worker should decrement the number of workers remaining
    56  	//	+ the worker should call for memory to be released
    57  	//
    58  	// When a worker accepts a piece for upload:
    59  	//	+ the worker should increment the number of pieces registered
    60  	// 	+ the worker should mark the piece usage for the piece it is uploading
    61  	//	+ the worker should decrement the number of workers remaining
    62  	//
    63  	// When a worker completes an upload (success or failure):
    64  	//	+ the worker should decrement the number of pieces registered
    65  	//  + the worker should call for memory to be released
    66  	//
    67  	// When a worker completes an upload (failure):
    68  	//	+ the worker should unmark the piece usage for the piece it registered
    69  	//	+ the worker should notify the standby workers of a new available piece
    70  	//
    71  	// When a worker completes an upload successfully:
    72  	//	+ the worker should increment the number of pieces completed
    73  	//	+ the worker should decrement the number of pieces registered
    74  	//	+ the worker should release the memory for the completed piece
    75  	mu               sync.Mutex
    76  	pieceUsage       []bool              // 'true' if a piece is either uploaded, or a worker is attempting to upload that piece.
    77  	piecesCompleted  int                 // number of pieces that have been fully uploaded.
    78  	piecesRegistered int                 // number of pieces that are being uploaded, but aren't finished yet (may fail).
    79  	released         bool                // whether this chunk has been released from the active chunks set.
    80  	unusedHosts      map[string]struct{} // hosts that aren't yet storing any pieces or performing any work.
    81  	workersRemaining int                 // number of inactive workers still able to upload a piece.
    82  	workersStandby   []*worker           // workers that can be used if other workers fail.
    83  }
    84  
    85  // managedNotifyStandbyWorkers is called when a worker fails to upload a piece, meaning
    86  // that the standby workers may now be needed to help the piece finish
    87  // uploading.
    88  func (uc *unfinishedUploadChunk) managedNotifyStandbyWorkers() {
    89  	// Copy the standby workers into a new slice and reset it since we can't
    90  	// hold the lock while calling the managed function.
    91  	uc.mu.Lock()
    92  	standbyWorkers := make([]*worker, len(uc.workersStandby))
    93  	copy(standbyWorkers, uc.workersStandby)
    94  	uc.workersStandby = uc.workersStandby[:0]
    95  	uc.mu.Unlock()
    96  
    97  	for i := 0; i < len(standbyWorkers); i++ {
    98  		standbyWorkers[i].managedQueueUploadChunk(uc)
    99  	}
   100  }
   101  
   102  // managedDistributeChunkToWorkers will take a chunk with fully prepared
   103  // physical data and distribute it to the worker pool.
   104  func (r *Renter) managedDistributeChunkToWorkers(uc *unfinishedUploadChunk) {
   105  	// Give the chunk to each worker, marking the number of workers that have
   106  	// received the chunk. The workers cannot be interacted with while the
   107  	// renter is holding a lock, so we need to build a list of workers while
   108  	// under lock and then launch work jobs after that.
   109  	id := r.mu.RLock()
   110  	uc.workersRemaining += len(r.workerPool)
   111  	workers := make([]*worker, 0, len(r.workerPool))
   112  	for _, worker := range r.workerPool {
   113  		workers = append(workers, worker)
   114  	}
   115  	r.mu.RUnlock(id)
   116  	for _, worker := range workers {
   117  		worker.managedQueueUploadChunk(uc)
   118  	}
   119  }
   120  
   121  // managedDownloadLogicalChunkData will fetch the logical chunk data by sending a
   122  // download to the renter's downloader, and then using the data that gets
   123  // returned.
   124  func (r *Renter) managedDownloadLogicalChunkData(chunk *unfinishedUploadChunk) error {
   125  	//  Determine what the download length should be. Normally it is just the
   126  	//  chunk size, but if this is the last chunk we need to download less
   127  	//  because the file is not that large.
   128  	//
   129  	// TODO: There is a disparity in the way that the upload and download code
   130  	// handle the last chunk, which may not be full sized.
   131  	downloadLength := chunk.length
   132  	if chunk.index == chunk.renterFile.numChunks()-1 && chunk.renterFile.size%chunk.length != 0 {
   133  		downloadLength = chunk.renterFile.size % chunk.length
   134  	}
   135  
   136  	// Create the download.
   137  	buf := NewDownloadDestinationBuffer(chunk.length)
   138  	d, err := r.managedNewDownload(downloadParams{
   139  		destination:     buf,
   140  		destinationType: "buffer",
   141  		file:            chunk.renterFile,
   142  
   143  		latencyTarget: 200e3, // No need to rush latency on repair downloads.
   144  		length:        downloadLength,
   145  		needsMemory:   false, // We already requested memory, the download memory fits inside of that.
   146  		offset:        uint64(chunk.offset),
   147  		overdrive:     0, // No need to rush the latency on repair downloads.
   148  		priority:      0, // Repair downloads are completely de-prioritized.
   149  	})
   150  	if err != nil {
   151  		return err
   152  	}
   153  
   154  	// Set the in-memory buffer to nil just to be safe in case of a memory
   155  	// leak.
   156  	defer func() {
   157  		d.destination = nil
   158  	}()
   159  
   160  	// Wait for the download to complete.
   161  	select {
   162  	case <-d.completeChan:
   163  	case <-r.tg.StopChan():
   164  		return errors.New("repair download interrupted by stop call")
   165  	}
   166  	if d.Err() != nil {
   167  		buf = nil
   168  		return d.Err()
   169  	}
   170  	chunk.logicalChunkData = [][]byte(buf)
   171  	return nil
   172  }
   173  
   174  // managedFetchAndRepairChunk will fetch the logical data for a chunk, create
   175  // the physical pieces for the chunk, and then distribute them.
   176  func (r *Renter) managedFetchAndRepairChunk(chunk *unfinishedUploadChunk) {
   177  	// Calculate the amount of memory needed for erasure coding. This will need
   178  	// to be released if there's an error before erasure coding is complete.
   179  	erasureCodingMemory := chunk.renterFile.pieceSize * uint64(chunk.renterFile.erasureCode.MinPieces())
   180  
   181  	// Calculate the amount of memory to release due to already completed
   182  	// pieces. This memory gets released during encryption, but needs to be
   183  	// released if there's a failure before encryption happens.
   184  	var pieceCompletedMemory uint64
   185  	for i := 0; i < len(chunk.pieceUsage); i++ {
   186  		if chunk.pieceUsage[i] {
   187  			pieceCompletedMemory += chunk.renterFile.pieceSize + crypto.TwofishOverhead
   188  		}
   189  	}
   190  
   191  	// Ensure that memory is released and that the chunk is cleaned up properly
   192  	// after the chunk is distributed.
   193  	//
   194  	// Need to ensure the erasure coding memory is released as well as the
   195  	// physical chunk memory. Physical chunk memory is released by setting
   196  	// 'workersRemaining' to zero if the repair fails before being distributed
   197  	// to workers. Erasure coding memory is released manually if the repair
   198  	// fails before the erasure coding occurs.
   199  	defer r.managedCleanUpUploadChunk(chunk)
   200  
   201  	// Fetch the logical data for the chunk.
   202  	err := r.managedFetchLogicalChunkData(chunk)
   203  	if err != nil {
   204  		// Logical data is not available, cannot upload. Chunk will not be
   205  		// distributed to workers, therefore set workersRemaining equal to zero.
   206  		// The erasure coding memory has not been released yet, be sure to
   207  		// release that as well.
   208  		chunk.logicalChunkData = nil
   209  		chunk.workersRemaining = 0
   210  		r.memoryManager.Return(erasureCodingMemory + pieceCompletedMemory)
   211  		chunk.memoryReleased += erasureCodingMemory + pieceCompletedMemory
   212  		r.log.Debugln("Fetching logical data of a chunk failed:", err)
   213  		return
   214  	}
   215  
   216  	// Create the physical pieces for the data. Immediately release the logical
   217  	// data.
   218  	//
   219  	// TODO: The logical data is the first few chunks of the physical data. If
   220  	// the memory is not being handled cleanly here, we should leverage that
   221  	// fact to reduce the total memory required to create the physical data.
   222  	// That will also change the amount of memory we need to allocate, and the
   223  	// number of times we need to return memory.
   224  	chunk.physicalChunkData, err = chunk.renterFile.erasureCode.EncodeShards(chunk.logicalChunkData)
   225  	chunk.logicalChunkData = nil
   226  	r.memoryManager.Return(erasureCodingMemory)
   227  	chunk.memoryReleased += erasureCodingMemory
   228  	if err != nil {
   229  		// Physical data is not available, cannot upload. Chunk will not be
   230  		// distributed to workers, therefore set workersRemaining equal to zero.
   231  		chunk.workersRemaining = 0
   232  		r.memoryManager.Return(pieceCompletedMemory)
   233  		chunk.memoryReleased += pieceCompletedMemory
   234  		for i := 0; i < len(chunk.physicalChunkData); i++ {
   235  			chunk.physicalChunkData[i] = nil
   236  		}
   237  		r.log.Debugln("Fetching physical data of a chunk failed:", err)
   238  		return
   239  	}
   240  
   241  	// Sanity check - we should have at least as many physical data pieces as we
   242  	// do elements in our piece usage.
   243  	if len(chunk.physicalChunkData) < len(chunk.pieceUsage) {
   244  		r.log.Critical("not enough physical pieces to match the upload settings of the file")
   245  		return
   246  	}
   247  	// Loop through the pieces and encrypt any that are needed, while dropping
   248  	// any pieces that are not needed.
   249  	for i := 0; i < len(chunk.pieceUsage); i++ {
   250  		if chunk.pieceUsage[i] {
   251  			chunk.physicalChunkData[i] = nil
   252  		} else {
   253  			// Encrypt the piece.
   254  			key := deriveKey(chunk.renterFile.masterKey, chunk.index, uint64(i))
   255  			chunk.physicalChunkData[i] = key.EncryptBytes(chunk.physicalChunkData[i])
   256  		}
   257  	}
   258  	// Return the released memory.
   259  	if pieceCompletedMemory > 0 {
   260  		r.memoryManager.Return(pieceCompletedMemory)
   261  		chunk.memoryReleased += pieceCompletedMemory
   262  	}
   263  
   264  	// Distribute the chunk to the workers.
   265  	r.managedDistributeChunkToWorkers(chunk)
   266  }
   267  
   268  // managedFetchLogicalChunkData will get the raw data for a chunk, pulling it from disk if
   269  // possible but otherwise queueing a download.
   270  //
   271  // chunk.data should be passed as 'nil' to the download, to keep memory usage as
   272  // light as possible.
   273  func (r *Renter) managedFetchLogicalChunkData(chunk *unfinishedUploadChunk) error {
   274  	// Only download this file if more than 25% of the redundancy is missing.
   275  	numParityPieces := float64(chunk.piecesNeeded - chunk.minimumPieces)
   276  	minMissingPiecesToDownload := int(numParityPieces * RemoteRepairDownloadThreshold)
   277  	download := chunk.piecesCompleted+minMissingPiecesToDownload < chunk.piecesNeeded
   278  
   279  	// Download the chunk if it's not on disk.
   280  	if chunk.localPath == "" && download {
   281  		return r.managedDownloadLogicalChunkData(chunk)
   282  	} else if chunk.localPath == "" {
   283  		return errors.New("file not available locally")
   284  	}
   285  
   286  	// Try to read the data from disk. If that fails at any point, prefer to
   287  	// download the chunk.
   288  	//
   289  	// TODO: Might want to remove the file from the renter tracking if the disk
   290  	// loading fails. Should do this after we swap the file format, the tracking
   291  	// data for the file should reside in the file metadata and not in a
   292  	// separate struct.
   293  	osFile, err := os.Open(chunk.localPath)
   294  	if err != nil && download {
   295  		return r.managedDownloadLogicalChunkData(chunk)
   296  	} else if err != nil {
   297  		return errors.Extend(err, errors.New("failed to open file locally"))
   298  	}
   299  	defer osFile.Close()
   300  	// TODO: Once we have enabled support for small chunks, we should stop
   301  	// needing to ignore the EOF errors, because the chunk size should always
   302  	// match the tail end of the file. Until then, we ignore io.EOF.
   303  	buf := NewDownloadDestinationBuffer(chunk.length)
   304  	sr := io.NewSectionReader(osFile, chunk.offset, int64(chunk.length))
   305  	_, err = buf.ReadFrom(sr)
   306  	if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF && download {
   307  		r.log.Debugln("failed to read file, downloading instead:", err)
   308  		return r.managedDownloadLogicalChunkData(chunk)
   309  	} else if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
   310  		r.log.Debugln("failed to read file locally:", err)
   311  		return errors.Extend(err, errors.New("failed to read file locally"))
   312  	}
   313  	chunk.logicalChunkData = buf
   314  
   315  	// Data successfully read from disk.
   316  	return nil
   317  }
   318  
   319  // managedCleanUpUploadChunk will check the state of the chunk and perform any
   320  // cleanup required. This can include returning rememory and releasing the chunk
   321  // from the map of active chunks in the chunk heap.
   322  func (r *Renter) managedCleanUpUploadChunk(uc *unfinishedUploadChunk) {
   323  	uc.mu.Lock()
   324  	piecesAvailable := 0
   325  	var memoryReleased uint64
   326  	// Release any unnecessary pieces, counting any pieces that are
   327  	// currently available.
   328  	for i := 0; i < len(uc.pieceUsage); i++ {
   329  		// Skip the piece if it's not available.
   330  		if uc.pieceUsage[i] {
   331  			continue
   332  		}
   333  
   334  		// If we have all the available pieces we need, release this piece.
   335  		// Otherwise, mark that there's another piece available. This algorithm
   336  		// will prefer releasing later pieces, which improves computational
   337  		// complexity for erasure coding.
   338  		if piecesAvailable >= uc.workersRemaining {
   339  			memoryReleased += uc.renterFile.pieceSize + crypto.TwofishOverhead
   340  			uc.physicalChunkData[i] = nil
   341  			// Mark this piece as taken so that we don't double release memory.
   342  			uc.pieceUsage[i] = true
   343  		} else {
   344  			piecesAvailable++
   345  		}
   346  	}
   347  
   348  	// Check if the chunk needs to be removed from the list of active
   349  	// chunks. It needs to be removed if the chunk is complete, but hasn't
   350  	// yet been released.
   351  	chunkComplete := uc.workersRemaining == 0 && uc.piecesRegistered == 0
   352  	released := uc.released
   353  	if chunkComplete && !released {
   354  		uc.released = true
   355  	}
   356  	uc.memoryReleased += uint64(memoryReleased)
   357  	totalMemoryReleased := uc.memoryReleased
   358  	uc.mu.Unlock()
   359  
   360  	// If there are pieces available, add the standby workers to collect them.
   361  	// Standby workers are only added to the chunk when piecesAvailable is equal
   362  	// to zero, meaning this code will only trigger if the number of pieces
   363  	// available increases from zero. That can only happen if a worker
   364  	// experiences an error during upload.
   365  	if piecesAvailable > 0 {
   366  		uc.managedNotifyStandbyWorkers()
   367  	}
   368  	// If required, return the memory to the renter.
   369  	if memoryReleased > 0 {
   370  		r.memoryManager.Return(memoryReleased)
   371  	}
   372  	// If required, remove the chunk from the set of active chunks.
   373  	if chunkComplete && !released {
   374  		r.uploadHeap.mu.Lock()
   375  		delete(r.uploadHeap.activeChunks, uc.id)
   376  		r.uploadHeap.mu.Unlock()
   377  	}
   378  	// Sanity check - all memory should be released if the chunk is complete.
   379  	if chunkComplete && totalMemoryReleased != uc.memoryNeeded {
   380  		r.log.Critical("No workers remaining, but not all memory released:", uc.workersRemaining, uc.piecesRegistered, uc.memoryReleased, uc.memoryNeeded)
   381  	}
   382  }