gitlab.com/SkynetLabs/skyd@v1.6.9/skymodules/renter/downloadchunk.go (about)

     1  package renter
     2  
     3  import (
     4  	"fmt"
     5  	"sync"
     6  	"time"
     7  
     8  	"gitlab.com/NebulousLabs/errors"
     9  
    10  	"gitlab.com/SkynetLabs/skyd/build"
    11  	"gitlab.com/SkynetLabs/skyd/skymodules"
    12  	"gitlab.com/SkynetLabs/skyd/skymodules/renter/filesystem/siafile"
    13  	"go.sia.tech/siad/crypto"
    14  )
    15  
    16  // downloadPieceInfo contains all the information required to download and
    17  // recover a piece of a chunk from a host. It is a value in a map where the key
    18  // is the file contract id.
    19  type downloadPieceInfo struct {
    20  	index uint64
    21  	root  crypto.Hash
    22  }
    23  
    24  // unfinishedDownloadChunk contains a chunk for a download that is in progress.
    25  //
    26  // TODO: Currently, if a standby worker is needed, all of the standby workers
    27  // are added and the first one that is available will pick up the slack. But,
    28  // depending on the situation, we may only want to add a handful of workers to
    29  // make sure that a fast / optimal worker is initially able to pick up the
    30  // slack. This could potentially be streamlined by turning the standby array
    31  // into a standby heap, and then having some general scoring system for figuring
    32  // out how useful a worker is, and then having some threshold that a worker
    33  // needs to be pulled from standby to work on the download. That threshold
    34  // should go up every time that a worker fails, to make sure that if you have
    35  // repeated failures, you keep pulling in the fresh workers instead of getting
    36  // stuck and always rejecting all the standby workers.
    37  type unfinishedDownloadChunk struct {
    38  	// Fetch + Write instructions - read only or otherwise thread safe.
    39  	destination downloadDestination // Where to write the recovered logical chunk.
    40  	erasureCode skymodules.ErasureCoder
    41  	masterKey   crypto.CipherKey
    42  
    43  	// Fetch + Write instructions - read only or otherwise thread safe.
    44  	staticChunkIndex  uint64                       // Required for deriving the encryption keys for each piece.
    45  	staticCacheID     string                       // Used to uniquely identify a chunk in the chunk cache.
    46  	staticChunkMap    map[string]downloadPieceInfo // Maps from host PubKey to the info for the piece associated with that host
    47  	staticChunkSize   uint64
    48  	staticFetchLength uint64 // Length within the logical chunk to fetch.
    49  	staticFetchOffset uint64 // Offset within the logical chunk that is being downloaded.
    50  	staticPieceSize   uint64
    51  	staticWriteOffset int64 // Offset within the writer to write the completed data.
    52  
    53  	// Spending details.
    54  	staticSpendingCategory spendingCategory
    55  
    56  	// Fetch + Write instructions - read only or otherwise thread safe.
    57  	staticDisableDiskFetch bool
    58  	staticLatencyTarget    time.Duration
    59  	staticNeedsMemory      bool // Set to true if memory was not pre-allocated for this chunk.
    60  	staticMemoryManager    *memoryManager
    61  	staticOverdrive        int
    62  	staticPriority         uint64
    63  
    64  	// Download chunk state - need mutex to access.
    65  	completedPieces   []bool    // Which pieces were downloaded successfully.
    66  	failed            bool      // Indicates if the chunk has been marked as failed.
    67  	physicalChunkData [][]byte  // Used to recover the logical data.
    68  	pieceUsage        []bool    // Which pieces are being actively fetched.
    69  	piecesCompleted   int       // Number of pieces that have successfully completed.
    70  	piecesRegistered  int       // Number of pieces that workers are actively fetching.
    71  	recoveryComplete  bool      // Whether or not the recovery has completed and the chunk memory released.
    72  	workersRemaining  int       // Number of workers still able to fetch the chunk.
    73  	workersStandby    []*worker // Set of workers that are able to work on this download, but are not needed unless other workers fail.
    74  
    75  	// Memory management variables.
    76  	memoryAllocated uint64
    77  
    78  	// The staticDownload object, mostly to update staticDownload progress.
    79  	staticDownload *download
    80  	mu             sync.Mutex
    81  
    82  	// The SiaFile from which data is being downloaded.
    83  	renterFile *siafile.Snapshot
    84  }
    85  
    86  // fail will set the chunk status to failed. The physical chunk memory will be
    87  // wiped and any memory allocation will be returned to the renter. The download
    88  // as a whole will be failed as well.
    89  func (udc *unfinishedDownloadChunk) fail(err error) {
    90  	udc.failed = true
    91  	udc.recoveryComplete = true
    92  	for i := range udc.physicalChunkData {
    93  		udc.physicalChunkData[i] = nil
    94  	}
    95  	udc.staticDownload.managedFail(fmt.Errorf("chunk %v failed: %v", udc.staticChunkIndex, err))
    96  	udc.destination = nil
    97  }
    98  
    99  // managedCleanUp will check if the download has failed, and if not it will add
   100  // any standby workers which need to be added. Calling managedCleanUp too many
   101  // times is not harmful, however missing a call to managedCleanUp can lead to
   102  // dealocks.
   103  func (udc *unfinishedDownloadChunk) managedCleanUp() {
   104  	// Check if the chunk is newly failed.
   105  	udc.mu.Lock()
   106  	if udc.workersRemaining+udc.piecesCompleted < udc.erasureCode.MinPieces() && !udc.failed {
   107  		str := fmt.Sprintf("workers remaining %v, pieces completed %v, min pieces %v", udc.workersRemaining, udc.piecesCompleted, udc.erasureCode.MinPieces())
   108  		udc.fail(errors.AddContext(errNotEnoughWorkers, str))
   109  	}
   110  	// Return any excess memory.
   111  	udc.returnMemory()
   112  
   113  	// Nothing to do if the chunk has failed.
   114  	if udc.failed {
   115  		udc.mu.Unlock()
   116  		return
   117  	}
   118  
   119  	// Check whether standby workers are required.
   120  	chunkComplete := udc.piecesCompleted >= udc.erasureCode.MinPieces()
   121  	desiredPiecesRegistered := udc.erasureCode.MinPieces() + udc.staticOverdrive - udc.piecesCompleted
   122  	standbyWorkersRequired := !chunkComplete && udc.piecesRegistered < desiredPiecesRegistered
   123  	if !standbyWorkersRequired {
   124  		udc.mu.Unlock()
   125  		return
   126  	}
   127  
   128  	// Assemble a list of standby workers, release the udc lock, and then queue
   129  	// the chunk into the workers. The lock needs to be released early because
   130  	// holding the udc lock and the worker lock at the same time is a deadlock
   131  	// risk (they interact with eachother, call functions on eachother).
   132  	var standbyWorkers []*worker
   133  	for i := 0; i < len(udc.workersStandby); i++ {
   134  		standbyWorkers = append(standbyWorkers, udc.workersStandby[i])
   135  	}
   136  	udc.workersStandby = udc.workersStandby[:0] // Workers have been taken off of standby.
   137  	udc.mu.Unlock()
   138  	for i := 0; i < len(standbyWorkers); i++ {
   139  		go standbyWorkers[i].threadedPerformDownloadChunkJob(udc)
   140  	}
   141  }
   142  
   143  // managedFinalizeRecovery sets recoveryComplete to 'true' and also marks
   144  // the download as complete if there are no more chunks remaining.
   145  func (udc *unfinishedDownloadChunk) managedFinalizeRecovery() {
   146  	// Directly nil out the physical chunk data, it's not going to be used
   147  	// anymore. Also signal that data recovery has completed.
   148  	udc.mu.Lock()
   149  	udc.physicalChunkData = nil
   150  	udc.recoveryComplete = true
   151  	udc.mu.Unlock()
   152  
   153  	// Update the download and signal completion of this chunk.
   154  	udc.staticDownload.mu.Lock()
   155  	defer udc.staticDownload.mu.Unlock()
   156  	udc.staticDownload.chunksRemaining--
   157  	if udc.staticDownload.chunksRemaining == 0 {
   158  		// Download is complete, send out a notification.
   159  		udc.staticDownload.markComplete()
   160  	}
   161  }
   162  
   163  // managedRemoveWorker will decrement a worker from the set of remaining workers
   164  // in the udc. After a worker has been removed, the udc needs to be cleaned up.
   165  func (udc *unfinishedDownloadChunk) managedRemoveWorker() {
   166  	udc.mu.Lock()
   167  	udc.workersRemaining--
   168  	udc.mu.Unlock()
   169  	udc.managedCleanUp()
   170  }
   171  
   172  // markPieceCompleted marks the piece with pieceIndex as completed.
   173  func (udc *unfinishedDownloadChunk) markPieceCompleted(pieceIndex uint64) {
   174  	udc.completedPieces[pieceIndex] = true
   175  	udc.piecesCompleted++
   176  
   177  	// Sanity check to make sure the slice and counter are consistent.
   178  	if !build.DEBUG {
   179  		return
   180  	}
   181  	completed := 0
   182  	for _, b := range udc.completedPieces {
   183  		if b {
   184  			completed++
   185  		}
   186  	}
   187  	if completed != udc.piecesCompleted {
   188  		build.Critical(fmt.Sprintf("pieces completed and completedPieces out of sync %v != %v",
   189  			completed, udc.piecesCompleted))
   190  	}
   191  }
   192  
   193  // returnMemory will check on the status of all the workers and pieces, and
   194  // determine how much memory is safe to return to the renter. This should be
   195  // called each time a worker returns, and also after the chunk is recovered.
   196  func (udc *unfinishedDownloadChunk) returnMemory() {
   197  	// The maximum amount of memory is the pieces completed plus the number of
   198  	// workers remaining.
   199  	maxMemory := uint64(udc.workersRemaining+udc.piecesCompleted) * udc.staticPieceSize
   200  	// If enough pieces have completed, max memory is the number of registered
   201  	// pieces plus the number of completed pieces.
   202  	if udc.piecesCompleted >= udc.erasureCode.MinPieces() {
   203  		// udc.piecesRegistered is guaranteed to be at most equal to the number
   204  		// of overdrive pieces, meaning it will be equal to or less than
   205  		// initialMemory.
   206  		maxMemory = uint64(udc.piecesCompleted+udc.piecesRegistered) * udc.staticPieceSize
   207  	}
   208  	// If the chunk recovery has completed, the maximum number of pieces is the
   209  	// number of registered.
   210  	if udc.recoveryComplete {
   211  		maxMemory = uint64(udc.piecesRegistered) * udc.staticPieceSize
   212  	}
   213  	// Return any memory we don't need.
   214  	if uint64(udc.memoryAllocated) > maxMemory {
   215  		udc.staticMemoryManager.Return(udc.memoryAllocated - maxMemory)
   216  		udc.memoryAllocated = maxMemory
   217  	}
   218  }
   219  
   220  // threadedRecoverLogicalData will take all of the pieces that have been
   221  // downloaded and encode them into the logical data which is then written to the
   222  // underlying writer for the download.
   223  func (udc *unfinishedDownloadChunk) threadedRecoverLogicalData() error {
   224  	// Ensure cleanup occurs after the data is recovered, whether recovery
   225  	// succeeds or fails.
   226  	defer udc.managedCleanUp()
   227  
   228  	// Write the pieces to the requested output.
   229  	dataOffset := recoveredDataOffset(udc.staticFetchOffset, udc.erasureCode)
   230  	err := udc.destination.WritePieces(udc.erasureCode, udc.physicalChunkData, dataOffset, udc.staticWriteOffset, udc.staticFetchLength)
   231  	if err != nil {
   232  		udc.mu.Lock()
   233  		udc.fail(err)
   234  		udc.mu.Unlock()
   235  		return errors.AddContext(err, "unable to write to download destination")
   236  	}
   237  	// finalize the chunk.
   238  	udc.managedFinalizeRecovery()
   239  	return nil
   240  }
   241  
   242  // bytesToRecover returns the number of bytes we need to recover from the
   243  // erasure coded segments. The number of bytes we need to recover doesn't
   244  // always match the chunkFetchLength. e.g. a user might want to fetch 500 bytes
   245  // from a segment that is 640 bytes large after recovery. Then the number of
   246  // bytes to recover would be 640 instead of 500 and the 140 bytes we don't need
   247  // would be discarded after recovery.
   248  func bytesToRecover(chunkFetchOffset, chunkFetchLength, chunkSize uint64, rs skymodules.ErasureCoder) uint64 {
   249  	// If partialDecoding is not available we downloaded the whole sector and
   250  	// recovered the whole chunk.
   251  	segmentSize, supportsPartial := rs.SupportsPartialEncoding()
   252  	if !supportsPartial {
   253  		return chunkSize
   254  	}
   255  	// Else we need to calculate how much data we need to recover.
   256  	recoveredSegmentSize := uint64(rs.MinPieces()) * segmentSize
   257  	_, numSegments := segmentsForRecovery(chunkFetchOffset, chunkFetchLength, rs)
   258  	return numSegments * recoveredSegmentSize
   259  }
   260  
   261  // recoveredDataOffset translates the fetch offset of the chunk into the offset
   262  // within the recovered data.
   263  func recoveredDataOffset(chunkFetchOffset uint64, rs skymodules.ErasureCoder) uint64 {
   264  	// If partialDecoding is not available we downloaded the whole sector and
   265  	// recovered the whole chunk which means the offset and length are actually
   266  	// equal to the chunkFetchOffset and chunkFetchLength.
   267  	segmentSize, supportsPartial := rs.SupportsPartialEncoding()
   268  	if !supportsPartial {
   269  		return chunkFetchOffset
   270  	}
   271  	// Else we need to adjust the offset a bit.
   272  	recoveredSegmentSize := uint64(rs.MinPieces()) * segmentSize
   273  	return chunkFetchOffset % recoveredSegmentSize
   274  }