gitlab.com/SiaPrime/SiaPrime@v1.4.1/modules/renter/downloadchunk.go (about)

     1  package renter
     2  
     3  import (
     4  	"fmt"
     5  	"sync"
     6  	"time"
     7  
     8  	"gitlab.com/NebulousLabs/errors"
     9  
    10  	"gitlab.com/SiaPrime/SiaPrime/build"
    11  	"gitlab.com/SiaPrime/SiaPrime/crypto"
    12  	"gitlab.com/SiaPrime/SiaPrime/modules"
    13  	"gitlab.com/SiaPrime/SiaPrime/modules/renter/siafile"
    14  )
    15  
    16  // downloadPieceInfo contains all the information required to download and
    17  // recover a piece of a chunk from a host. It is a value in a map where the key
    18  // is the file contract id.
    19  type downloadPieceInfo struct {
    20  	index uint64
    21  	root  crypto.Hash
    22  }
    23  
    24  // unfinishedDownloadChunk contains a chunk for a download that is in progress.
    25  //
    26  // TODO: Currently, if a standby worker is needed, all of the standby workers
    27  // are added and the first one that is available will pick up the slack. But,
    28  // depending on the situation, we may only want to add a handful of workers to
    29  // make sure that a fast / optimal worker is initially able to pick up the
    30  // slack. This could potentially be streamlined by turning the standby array
    31  // into a standby heap, and then having some general scoring system for figuring
    32  // out how useful a worker is, and then having some threshold that a worker
    33  // needs to be pulled from standby to work on the download. That threshold
    34  // should go up every time that a worker fails, to make sure that if you have
    35  // repeated failures, you keep pulling in the fresh workers instead of getting
    36  // stuck and always rejecting all the standby workers.
    37  type unfinishedDownloadChunk struct {
    38  	// Fetch + Write instructions - read only or otherwise thread safe.
    39  	destination downloadDestination // Where to write the recovered logical chunk.
    40  	erasureCode modules.ErasureCoder
    41  	masterKey   crypto.CipherKey
    42  
    43  	// Fetch + Write instructions - read only or otherwise thread safe.
    44  	staticChunkIndex  uint64                       // Required for deriving the encryption keys for each piece.
    45  	staticCacheID     string                       // Used to uniquely identify a chunk in the chunk cache.
    46  	staticChunkMap    map[string]downloadPieceInfo // Maps from host PubKey to the info for the piece associated with that host
    47  	staticChunkSize   uint64
    48  	staticFetchLength uint64 // Length within the logical chunk to fetch.
    49  	staticFetchOffset uint64 // Offset within the logical chunk that is being downloaded.
    50  	staticPieceSize   uint64
    51  	staticWriteOffset int64 // Offset within the writer to write the completed data.
    52  
    53  	// Fetch + Write instructions - read only or otherwise thread safe.
    54  	staticLatencyTarget time.Duration
    55  	staticNeedsMemory   bool // Set to true if memory was not pre-allocated for this chunk.
    56  	staticOverdrive     int
    57  	staticPriority      uint64
    58  
    59  	// Download chunk state - need mutex to access.
    60  	completedPieces   []bool    // Which pieces were downloaded successfully.
    61  	failed            bool      // Indicates if the chunk has been marked as failed.
    62  	physicalChunkData [][]byte  // Used to recover the logical data.
    63  	pieceUsage        []bool    // Which pieces are being actively fetched.
    64  	piecesCompleted   int       // Number of pieces that have successfully completed.
    65  	piecesRegistered  int       // Number of pieces that workers are actively fetching.
    66  	recoveryComplete  bool      // Whether or not the recovery has completed and the chunk memory released.
    67  	workersRemaining  int       // Number of workers still able to fetch the chunk.
    68  	workersStandby    []*worker // Set of workers that are able to work on this download, but are not needed unless other workers fail.
    69  
    70  	// Memory management variables.
    71  	memoryAllocated uint64
    72  
    73  	// The download object, mostly to update download progress.
    74  	download *download
    75  	mu       sync.Mutex
    76  
    77  	// The SiaFile from which data is being downloaded.
    78  	renterFile *siafile.Snapshot
    79  }
    80  
    81  // fail will set the chunk status to failed. The physical chunk memory will be
    82  // wiped and any memory allocation will be returned to the renter. The download
    83  // as a whole will be failed as well.
    84  func (udc *unfinishedDownloadChunk) fail(err error) {
    85  	udc.failed = true
    86  	udc.recoveryComplete = true
    87  	for i := range udc.physicalChunkData {
    88  		udc.physicalChunkData[i] = nil
    89  	}
    90  	udc.download.managedFail(fmt.Errorf("chunk %v failed: %v", udc.staticChunkIndex, err))
    91  	udc.destination = nil
    92  }
    93  
    94  // managedCleanUp will check if the download has failed, and if not it will add
    95  // any standby workers which need to be added. Calling managedCleanUp too many
    96  // times is not harmful, however missing a call to managedCleanUp can lead to
    97  // dealocks.
    98  func (udc *unfinishedDownloadChunk) managedCleanUp() {
    99  	// Check if the chunk is newly failed.
   100  	udc.mu.Lock()
   101  	if udc.workersRemaining+udc.piecesCompleted < udc.erasureCode.MinPieces() && !udc.failed {
   102  		udc.fail(errors.New("not enough workers to continue download"))
   103  	}
   104  	// Return any excess memory.
   105  	udc.returnMemory()
   106  
   107  	// Nothing to do if the chunk has failed.
   108  	if udc.failed {
   109  		udc.mu.Unlock()
   110  		return
   111  	}
   112  
   113  	// Check whether standby workers are required.
   114  	chunkComplete := udc.piecesCompleted >= udc.erasureCode.MinPieces()
   115  	desiredPiecesRegistered := udc.erasureCode.MinPieces() + udc.staticOverdrive - udc.piecesCompleted
   116  	standbyWorkersRequired := !chunkComplete && udc.piecesRegistered < desiredPiecesRegistered
   117  	if !standbyWorkersRequired {
   118  		udc.mu.Unlock()
   119  		return
   120  	}
   121  
   122  	// Assemble a list of standby workers, release the udc lock, and then queue
   123  	// the chunk into the workers. The lock needs to be released early because
   124  	// holding the udc lock and the worker lock at the same time is a deadlock
   125  	// risk (they interact with eachother, call functions on eachother).
   126  	var standbyWorkers []*worker
   127  	for i := 0; i < len(udc.workersStandby); i++ {
   128  		standbyWorkers = append(standbyWorkers, udc.workersStandby[i])
   129  	}
   130  	udc.workersStandby = udc.workersStandby[:0] // Workers have been taken off of standby.
   131  	udc.mu.Unlock()
   132  	for i := 0; i < len(standbyWorkers); i++ {
   133  		standbyWorkers[i].callQueueDownloadChunk(udc)
   134  	}
   135  }
   136  
   137  // managedRemoveWorker will decrement a worker from the set of remaining workers
   138  // in the udc. After a worker has been removed, the udc needs to be cleaned up.
   139  func (udc *unfinishedDownloadChunk) managedRemoveWorker() {
   140  	udc.mu.Lock()
   141  	udc.workersRemaining--
   142  	udc.mu.Unlock()
   143  	udc.managedCleanUp()
   144  }
   145  
   146  // markPieceCompleted marks the piece with pieceIndex as completed.
   147  func (udc *unfinishedDownloadChunk) markPieceCompleted(pieceIndex uint64) {
   148  	udc.completedPieces[pieceIndex] = true
   149  	udc.piecesCompleted++
   150  
   151  	// Sanity check to make sure the slice and counter are consistent.
   152  	if !build.DEBUG {
   153  		return
   154  	}
   155  	completed := 0
   156  	for _, b := range udc.completedPieces {
   157  		if b {
   158  			completed++
   159  		}
   160  	}
   161  	if completed != udc.piecesCompleted {
   162  		build.Critical(fmt.Sprintf("pieces completed and completedPieces out of sync %v != %v",
   163  			completed, udc.piecesCompleted))
   164  	}
   165  }
   166  
   167  // returnMemory will check on the status of all the workers and pieces, and
   168  // determine how much memory is safe to return to the renter. This should be
   169  // called each time a worker returns, and also after the chunk is recovered.
   170  func (udc *unfinishedDownloadChunk) returnMemory() {
   171  	// The maximum amount of memory is the pieces completed plus the number of
   172  	// workers remaining.
   173  	maxMemory := uint64(udc.workersRemaining+udc.piecesCompleted) * udc.staticPieceSize
   174  	// If enough pieces have completed, max memory is the number of registered
   175  	// pieces plus the number of completed pieces.
   176  	if udc.piecesCompleted >= udc.erasureCode.MinPieces() {
   177  		// udc.piecesRegistered is guaranteed to be at most equal to the number
   178  		// of overdrive pieces, meaning it will be equal to or less than
   179  		// initialMemory.
   180  		maxMemory = uint64(udc.piecesCompleted+udc.piecesRegistered) * udc.staticPieceSize
   181  	}
   182  	// If the chunk recovery has completed, the maximum number of pieces is the
   183  	// number of registered.
   184  	if udc.recoveryComplete {
   185  		maxMemory = uint64(udc.piecesRegistered) * udc.staticPieceSize
   186  	}
   187  	// Return any memory we don't need.
   188  	if uint64(udc.memoryAllocated) > maxMemory {
   189  		udc.download.memoryManager.Return(udc.memoryAllocated - maxMemory)
   190  		udc.memoryAllocated = maxMemory
   191  	}
   192  }
   193  
   194  // threadedRecoverLogicalData will take all of the pieces that have been
   195  // downloaded and encode them into the logical data which is then written to the
   196  // underlying writer for the download.
   197  func (udc *unfinishedDownloadChunk) threadedRecoverLogicalData() error {
   198  	// Ensure cleanup occurs after the data is recovered, whether recovery
   199  	// succeeds or fails.
   200  	defer udc.managedCleanUp()
   201  
   202  	// Write the pieces to the requested output.
   203  	dataOffset := recoveredDataOffset(udc.staticFetchOffset, udc.erasureCode)
   204  	err := udc.destination.WritePieces(udc.erasureCode, udc.physicalChunkData, dataOffset, udc.staticWriteOffset, udc.staticFetchLength)
   205  	if err != nil {
   206  		udc.mu.Lock()
   207  		udc.fail(err)
   208  		udc.mu.Unlock()
   209  		return errors.AddContext(err, "unable to write to download destination")
   210  	}
   211  
   212  	// Directly nil out the physical chunk data, it's not going to be used
   213  	// anymore. Also signal that data recovery has completed.
   214  	udc.mu.Lock()
   215  	udc.physicalChunkData = nil
   216  	udc.recoveryComplete = true
   217  	udc.mu.Unlock()
   218  
   219  	// Update the download and signal completion of this chunk.
   220  	udc.download.mu.Lock()
   221  	defer udc.download.mu.Unlock()
   222  	udc.download.chunksRemaining--
   223  	if udc.download.chunksRemaining == 0 {
   224  		// Download is complete, send out a notification.
   225  		udc.download.markComplete()
   226  		return err
   227  	}
   228  	return nil
   229  }
   230  
   231  // bytesToRecover returns the number of bytes we need to recover from the
   232  // erasure coded segments. The number of bytes we need to recover doesn't
   233  // always match the chunkFetchLength. e.g. a user might want to fetch 500 bytes
   234  // from a segment that is 640 bytes large after recovery. Then the number of
   235  // bytes to recover would be 640 instead of 500 and the 140 bytes we don't need
   236  // would be discarded after recovery.
   237  func bytesToRecover(chunkFetchOffset, chunkFetchLength, chunkSize uint64, rs modules.ErasureCoder) uint64 {
   238  	// If partialDecoding is not available we downloaded the whole sector and
   239  	// recovered the whole chunk.
   240  	if !rs.SupportsPartialEncoding() {
   241  		return chunkSize
   242  	}
   243  	// Else we need to calculate how much data we need to recover.
   244  	recoveredSegmentSize := uint64(rs.MinPieces() * crypto.SegmentSize)
   245  	_, numSegments := segmentsForRecovery(chunkFetchOffset, chunkFetchLength, rs)
   246  	return numSegments * recoveredSegmentSize
   247  
   248  }
   249  
   250  // recoveredDataOffset translates the fetch offset of the chunk into the offset
   251  // within the recovered data.
   252  func recoveredDataOffset(chunkFetchOffset uint64, rs modules.ErasureCoder) uint64 {
   253  	// If partialDecoding is not available we downloaded the whole sector and
   254  	// recovered the whole chunk which means the offset and length are actually
   255  	// equal to the chunkFetchOffset and chunkFetchLength.
   256  	if !rs.SupportsPartialEncoding() {
   257  		return chunkFetchOffset
   258  	}
   259  	// Else we need to adjust the offset a bit.
   260  	recoveredSegmentSize := uint64(rs.MinPieces() * crypto.SegmentSize)
   261  	return chunkFetchOffset % recoveredSegmentSize
   262  }