github.com/Synthesix/Sia@v1.3.3-0.20180413141344-f863baeed3ca/modules/renter/uploadchunk.go

github.com/Synthesix/Sia@v1.3.3-0.20180413141344-f863baeed3ca/modules/renter/uploadchunk.go (about)

     1  package renter
     2  
     3  import (
     4  	"io"
     5  	"os"
     6  	"sync"
     7  
     8  	"github.com/Synthesix/Sia/crypto"
     9  
    10  	"github.com/NebulousLabs/errors"
    11  )
    12  
    13  // uploadChunkID is a unique identifier for each chunk in the renter.
    14  type uploadChunkID struct {
    15  	fileUID string // Unique to each file.
    16  	index   uint64 // Unique to each chunk within a file.
    17  }
    18  
    19  // unfinishedUploadChunk contains a chunk from the filesystem that has not
    20  // finished uploading, including knowledge of the progress.
    21  type unfinishedUploadChunk struct {
    22  	// Information about the file. localPath may be the empty string if the file
    23  	// is known not to exist locally.
    24  	id         uploadChunkID
    25  	localPath  string
    26  	renterFile *file
    27  
    28  	// Information about the chunk, namely where it exists within the file.
    29  	//
    30  	// TODO / NOTE: As we change the file mapper, we're probably going to have
    31  	// to update these fields. Compatibility shouldn't be an issue because this
    32  	// struct is not persisted anywhere, it's always built from other
    33  	// structures.
    34  	index          uint64
    35  	length         uint64
    36  	memoryNeeded   uint64 // memory needed in bytes
    37  	memoryReleased uint64 // memory that has been returned of memoryNeeded
    38  	minimumPieces  int    // number of pieces required to recover the file.
    39  	offset         int64  // Offset of the chunk within the file.
    40  	piecesNeeded   int    // number of pieces to achieve a 100% complete upload
    41  
    42  	// The logical data is the data that is presented to the user when the user
    43  	// requests the chunk. The physical data is all of the pieces that get
    44  	// stored across the network.
    45  	logicalChunkData  []byte
    46  	physicalChunkData [][]byte
    47  
    48  	// Worker synchronization fields. The mutex only protects these fields.
    49  	//
    50  	// When a worker passes over a piece for upload to go on standby:
    51  	//	+ the worker should add itself to the list of standby chunks
    52  	//  + the worker should call for memory to be released
    53  	//
    54  	// When a worker passes over a piece because it's not useful:
    55  	//	+ the worker should decrement the number of workers remaining
    56  	//	+ the worker should call for memory to be released
    57  	//
    58  	// When a worker accepts a piece for upload:
    59  	//	+ the worker should increment the number of pieces registered
    60  	// 	+ the worker should mark the piece usage for the piece it is uploading
    61  	//	+ the worker should decrement the number of workers remaining
    62  	//
    63  	// When a worker completes an upload (success or failure):
    64  	//	+ the worker should decrement the number of pieces registered
    65  	//  + the worker should call for memory to be released
    66  	//
    67  	// When a worker completes an upload (failure):
    68  	//	+ the worker should unmark the piece usage for the piece it registered
    69  	//	+ the worker should notify the standby workers of a new available piece
    70  	//
    71  	// When a worker completes an upload successfully:
    72  	//	+ the worker should increment the number of pieces completed
    73  	//	+ the worker should decrement the number of pieces registered
    74  	//	+ the worker should release the memory for the completed piece
    75  	mu               sync.Mutex
    76  	pieceUsage       []bool              // 'true' if a piece is either uploaded, or a worker is attempting to upload that piece.
    77  	piecesCompleted  int                 // number of pieces that have been fully uploaded.
    78  	piecesRegistered int                 // number of pieces that are being uploaded, but aren't finished yet (may fail).
    79  	released         bool                // whether this chunk has been released from the active chunks set.
    80  	unusedHosts      map[string]struct{} // hosts that aren't yet storing any pieces or performing any work.
    81  	workersRemaining int                 // number of inactive workers still able to upload a piece.
    82  	workersStandby   []*worker           // workers that can be used if other workers fail.
    83  }
    84  
    85  // managedNotifyStandbyWorkers is called when a worker fails to upload a piece, meaning
    86  // that the standby workers may now be needed to help the piece finish
    87  // uploading.
    88  func (uc *unfinishedUploadChunk) managedNotifyStandbyWorkers() {
    89  	// Copy the standby workers into a new slice and reset it since we can't
    90  	// hold the lock while calling the managed function.
    91  	uc.mu.Lock()
    92  	standbyWorkers := make([]*worker, len(uc.workersStandby))
    93  	copy(standbyWorkers, uc.workersStandby)
    94  	uc.workersStandby = uc.workersStandby[:0]
    95  	uc.mu.Unlock()
    96  
    97  	for i := 0; i < len(standbyWorkers); i++ {
    98  		standbyWorkers[i].managedQueueUploadChunk(uc)
    99  	}
   100  }
   101  
   102  // managedDistributeChunkToWorkers will take a chunk with fully prepared
   103  // physical data and distribute it to the worker pool.
   104  func (r *Renter) managedDistributeChunkToWorkers(uc *unfinishedUploadChunk) {
   105  	// Give the chunk to each worker, marking the number of workers that have
   106  	// received the chunk. The workers cannot be interacted with while the
   107  	// renter is holding a lock, so we need to build a list of workers while
   108  	// under lock and then launch work jobs after that.
   109  	id := r.mu.RLock()
   110  	uc.workersRemaining += len(r.workerPool)
   111  	workers := make([]*worker, 0, len(r.workerPool))
   112  	for _, worker := range r.workerPool {
   113  		workers = append(workers, worker)
   114  	}
   115  	r.mu.RUnlock(id)
   116  	for _, worker := range workers {
   117  		worker.managedQueueUploadChunk(uc)
   118  	}
   119  }
   120  
   121  // managedDownloadLogicalChunkData will fetch the logical chunk data by sending a
   122  // download to the renter's downloader, and then using the data that gets
   123  // returned.
   124  func (r *Renter) managedDownloadLogicalChunkData(chunk *unfinishedUploadChunk) error {
   125  	//  Determine what the download length should be. Normally it is just the
   126  	//  chunk size, but if this is the last chunk we need to download less
   127  	//  because the file is not that large.
   128  	//
   129  	// TODO: There is a disparity in the way that the upload and download code
   130  	// handle the last chunk, which may not be full sized.
   131  	downloadLength := chunk.length
   132  	if chunk.index == chunk.renterFile.numChunks()-1 && chunk.renterFile.size%chunk.length != 0 {
   133  		downloadLength = chunk.renterFile.size % chunk.length
   134  	}
   135  
   136  	// Create the download.
   137  	buf := downloadDestinationBuffer(make([]byte, chunk.length))
   138  	d, err := r.newDownload(downloadParams{
   139  		destination:     buf,
   140  		destinationType: "buffer",
   141  		file:            chunk.renterFile,
   142  
   143  		latencyTarget: 200e3, // No need to rush latency on repair downloads.
   144  		length:        downloadLength,
   145  		needsMemory:   false, // We already requested memory, the download memory fits inside of that.
   146  		offset:        uint64(chunk.offset),
   147  		overdrive:     0, // No need to rush the latency on repair downloads.
   148  		priority:      0, // Repair downloads are completely de-prioritized.
   149  	})
   150  	if err != nil {
   151  		return err
   152  	}
   153  
   154  	// Wait for the download to complete.
   155  	select {
   156  	case <-d.completeChan:
   157  	case <-r.tg.StopChan():
   158  		return errors.New("repair download interrupted by stop call")
   159  	}
   160  	if d.Err() != nil {
   161  		buf = nil
   162  		return d.Err()
   163  	}
   164  	chunk.logicalChunkData = []byte(buf)
   165  	return nil
   166  }
   167  
   168  // managedFetchAndRepairChunk will fetch the logical data for a chunk, create
   169  // the physical pieces for the chunk, and then distribute them.
   170  func (r *Renter) managedFetchAndRepairChunk(chunk *unfinishedUploadChunk) {
   171  	// Calculate the amount of memory needed for erasure coding. This will need
   172  	// to be released if there's an error before erasure coding is complete.
   173  	erasureCodingMemory := chunk.renterFile.pieceSize * uint64(chunk.renterFile.erasureCode.MinPieces())
   174  
   175  	// Calculate the amount of memory to release due to already completed
   176  	// pieces. This memory gets released during encryption, but needs to be
   177  	// released if there's a failure before encryption happens.
   178  	var pieceCompletedMemory uint64
   179  	for i := 0; i < len(chunk.pieceUsage); i++ {
   180  		if chunk.pieceUsage[i] {
   181  			pieceCompletedMemory += chunk.renterFile.pieceSize + crypto.TwofishOverhead
   182  		}
   183  	}
   184  
   185  	// Ensure that memory is released and that the chunk is cleaned up properly
   186  	// after the chunk is distributed.
   187  	//
   188  	// Need to ensure the erasure coding memory is released as well as the
   189  	// physical chunk memory. Physical chunk memory is released by setting
   190  	// 'workersRemaining' to zero if the repair fails before being distributed
   191  	// to workers. Erasure coding memory is released manually if the repair
   192  	// fails before the erasure coding occurs.
   193  	defer r.managedCleanUpUploadChunk(chunk)
   194  
   195  	// Fetch the logical data for the chunk.
   196  	err := r.managedFetchLogicalChunkData(chunk)
   197  	if err != nil {
   198  		// Logical data is not available, cannot upload. Chunk will not be
   199  		// distributed to workers, therefore set workersRemaining equal to zero.
   200  		// The erasure coding memory has not been released yet, be sure to
   201  		// release that as well.
   202  		chunk.logicalChunkData = nil
   203  		chunk.workersRemaining = 0
   204  		r.memoryManager.Return(erasureCodingMemory + pieceCompletedMemory)
   205  		chunk.memoryReleased += erasureCodingMemory + pieceCompletedMemory
   206  		r.log.Debugln("Fetching logical data of a chunk failed:", err)
   207  		return
   208  	}
   209  
   210  	// Create the physical pieces for the data. Immediately release the logical
   211  	// data.
   212  	//
   213  	// TODO: The logical data is the first few chunks of the physical data. If
   214  	// the memory is not being handled cleanly here, we should leverage that
   215  	// fact to reduce the total memory required to create the physical data.
   216  	// That will also change the amount of memory we need to allocate, and the
   217  	// number of times we need to return memory.
   218  	chunk.physicalChunkData, err = chunk.renterFile.erasureCode.Encode(chunk.logicalChunkData)
   219  	chunk.logicalChunkData = nil
   220  	r.memoryManager.Return(erasureCodingMemory)
   221  	chunk.memoryReleased += erasureCodingMemory
   222  	if err != nil {
   223  		// Physical data is not available, cannot upload. Chunk will not be
   224  		// distributed to workers, therefore set workersRemaining equal to zero.
   225  		chunk.workersRemaining = 0
   226  		r.memoryManager.Return(pieceCompletedMemory)
   227  		chunk.memoryReleased += pieceCompletedMemory
   228  		for i := 0; i < len(chunk.physicalChunkData); i++ {
   229  			chunk.physicalChunkData[i] = nil
   230  		}
   231  		r.log.Debugln("Fetching physical data of a chunk failed:", err)
   232  		return
   233  	}
   234  
   235  	// Sanity check - we should have at least as many physical data pieces as we
   236  	// do elements in our piece usage.
   237  	if len(chunk.physicalChunkData) < len(chunk.pieceUsage) {
   238  		r.log.Critical("not enough physical pieces to match the upload settings of the file")
   239  		return
   240  	}
   241  	// Loop through the pieces and encrypt any that are needed, while dropping
   242  	// any pieces that are not needed.
   243  	for i := 0; i < len(chunk.pieceUsage); i++ {
   244  		if chunk.pieceUsage[i] {
   245  			chunk.physicalChunkData[i] = nil
   246  		} else {
   247  			// Encrypt the piece.
   248  			key := deriveKey(chunk.renterFile.masterKey, chunk.index, uint64(i))
   249  			chunk.physicalChunkData[i] = key.EncryptBytes(chunk.physicalChunkData[i])
   250  		}
   251  	}
   252  	// Return the released memory.
   253  	if pieceCompletedMemory > 0 {
   254  		r.memoryManager.Return(pieceCompletedMemory)
   255  		chunk.memoryReleased += pieceCompletedMemory
   256  	}
   257  
   258  	// Distribute the chunk to the workers.
   259  	r.managedDistributeChunkToWorkers(chunk)
   260  }
   261  
   262  // managedFetchLogicalChunkData will get the raw data for a chunk, pulling it from disk if
   263  // possible but otherwise queueing a download.
   264  //
   265  // chunk.data should be passed as 'nil' to the download, to keep memory usage as
   266  // light as possible.
   267  func (r *Renter) managedFetchLogicalChunkData(chunk *unfinishedUploadChunk) error {
   268  	// Only download this file if more than 25% of the redundancy is missing.
   269  	numParityPieces := float64(chunk.piecesNeeded - chunk.minimumPieces)
   270  	minMissingPiecesToDownload := int(numParityPieces * RemoteRepairDownloadThreshold)
   271  	download := chunk.piecesCompleted+minMissingPiecesToDownload < chunk.piecesNeeded
   272  
   273  	// Download the chunk if it's not on disk.
   274  	if chunk.localPath == "" && download {
   275  		return r.managedDownloadLogicalChunkData(chunk)
   276  	} else if chunk.localPath == "" {
   277  		return errors.New("file not available locally")
   278  	}
   279  
   280  	// Try to read the data from disk. If that fails at any point, prefer to
   281  	// download the chunk.
   282  	//
   283  	// TODO: Might want to remove the file from the renter tracking if the disk
   284  	// loading fails. Should do this after we swap the file format, the tracking
   285  	// data for the file should reside in the file metadata and not in a
   286  	// separate struct.
   287  	osFile, err := os.Open(chunk.localPath)
   288  	if err != nil && download {
   289  		return r.managedDownloadLogicalChunkData(chunk)
   290  	} else if err != nil {
   291  		return errors.Extend(err, errors.New("failed to open file locally"))
   292  	}
   293  	defer osFile.Close()
   294  	// TODO: Once we have enabled support for small chunks, we should stop
   295  	// needing to ignore the EOF errors, because the chunk size should always
   296  	// match the tail end of the file. Until then, we ignore io.EOF.
   297  	chunk.logicalChunkData = make([]byte, chunk.length)
   298  	_, err = osFile.ReadAt(chunk.logicalChunkData, chunk.offset)
   299  	if err != nil && err != io.EOF && download {
   300  		chunk.logicalChunkData = nil
   301  		return r.managedDownloadLogicalChunkData(chunk)
   302  	} else if err != nil && err != io.EOF {
   303  		chunk.logicalChunkData = nil
   304  		return errors.Extend(err, errors.New("failed to read file locally"))
   305  	}
   306  
   307  	// Data successfully read from disk.
   308  	return nil
   309  }
   310  
   311  // managedCleanUpUploadChunk will check the state of the chunk and perform any
   312  // cleanup required. This can include returning rememory and releasing the chunk
   313  // from the map of active chunks in the chunk heap.
   314  func (r *Renter) managedCleanUpUploadChunk(uc *unfinishedUploadChunk) {
   315  	uc.mu.Lock()
   316  	piecesAvailable := 0
   317  	var memoryReleased uint64
   318  	// Release any unnecessary pieces, counting any pieces that are
   319  	// currently available.
   320  	for i := 0; i < len(uc.pieceUsage); i++ {
   321  		// Skip the piece if it's not available.
   322  		if uc.pieceUsage[i] {
   323  			continue
   324  		}
   325  
   326  		// If we have all the available pieces we need, release this piece.
   327  		// Otherwise, mark that there's another piece available. This algorithm
   328  		// will prefer releasing later pieces, which improves computational
   329  		// complexity for erasure coding.
   330  		if piecesAvailable >= uc.workersRemaining {
   331  			memoryReleased += uc.renterFile.pieceSize + crypto.TwofishOverhead
   332  			uc.physicalChunkData[i] = nil
   333  			// Mark this piece as taken so that we don't double release memory.
   334  			uc.pieceUsage[i] = true
   335  		} else {
   336  			piecesAvailable++
   337  		}
   338  	}
   339  
   340  	// Check if the chunk needs to be removed from the list of active
   341  	// chunks. It needs to be removed if the chunk is complete, but hasn't
   342  	// yet been released.
   343  	chunkComplete := uc.workersRemaining == 0 && uc.piecesRegistered == 0
   344  	released := uc.released
   345  	if chunkComplete && !released {
   346  		uc.released = true
   347  	}
   348  	uc.memoryReleased += uint64(memoryReleased)
   349  	totalMemoryReleased := uc.memoryReleased
   350  	uc.mu.Unlock()
   351  
   352  	// If there are pieces available, add the standby workers to collect them.
   353  	// Standby workers are only added to the chunk when piecesAvailable is equal
   354  	// to zero, meaning this code will only trigger if the number of pieces
   355  	// available increases from zero. That can only happen if a worker
   356  	// experiences an error during upload.
   357  	if piecesAvailable > 0 {
   358  		uc.managedNotifyStandbyWorkers()
   359  	}
   360  	// If required, return the memory to the renter.
   361  	if memoryReleased > 0 {
   362  		r.memoryManager.Return(memoryReleased)
   363  	}
   364  	// If required, remove the chunk from the set of active chunks.
   365  	if chunkComplete && !released {
   366  		r.uploadHeap.mu.Lock()
   367  		delete(r.uploadHeap.activeChunks, uc.id)
   368  		r.uploadHeap.mu.Unlock()
   369  	}
   370  	// Sanity check - all memory should be released if the chunk is complete.
   371  	if chunkComplete && totalMemoryReleased != uc.memoryNeeded {
   372  		r.log.Critical("No workers remaining, but not all memory released:", uc.workersRemaining, uc.piecesRegistered, uc.memoryReleased, uc.memoryNeeded)
   373  	}
   374  }