gitlab.com/SkynetLabs/skyd@v1.6.9/skymodules/renter/downloadchunk.go (about) 1 package renter 2 3 import ( 4 "fmt" 5 "sync" 6 "time" 7 8 "gitlab.com/NebulousLabs/errors" 9 10 "gitlab.com/SkynetLabs/skyd/build" 11 "gitlab.com/SkynetLabs/skyd/skymodules" 12 "gitlab.com/SkynetLabs/skyd/skymodules/renter/filesystem/siafile" 13 "go.sia.tech/siad/crypto" 14 ) 15 16 // downloadPieceInfo contains all the information required to download and 17 // recover a piece of a chunk from a host. It is a value in a map where the key 18 // is the file contract id. 19 type downloadPieceInfo struct { 20 index uint64 21 root crypto.Hash 22 } 23 24 // unfinishedDownloadChunk contains a chunk for a download that is in progress. 25 // 26 // TODO: Currently, if a standby worker is needed, all of the standby workers 27 // are added and the first one that is available will pick up the slack. But, 28 // depending on the situation, we may only want to add a handful of workers to 29 // make sure that a fast / optimal worker is initially able to pick up the 30 // slack. This could potentially be streamlined by turning the standby array 31 // into a standby heap, and then having some general scoring system for figuring 32 // out how useful a worker is, and then having some threshold that a worker 33 // needs to be pulled from standby to work on the download. That threshold 34 // should go up every time that a worker fails, to make sure that if you have 35 // repeated failures, you keep pulling in the fresh workers instead of getting 36 // stuck and always rejecting all the standby workers. 37 type unfinishedDownloadChunk struct { 38 // Fetch + Write instructions - read only or otherwise thread safe. 39 destination downloadDestination // Where to write the recovered logical chunk. 40 erasureCode skymodules.ErasureCoder 41 masterKey crypto.CipherKey 42 43 // Fetch + Write instructions - read only or otherwise thread safe. 44 staticChunkIndex uint64 // Required for deriving the encryption keys for each piece. 45 staticCacheID string // Used to uniquely identify a chunk in the chunk cache. 46 staticChunkMap map[string]downloadPieceInfo // Maps from host PubKey to the info for the piece associated with that host 47 staticChunkSize uint64 48 staticFetchLength uint64 // Length within the logical chunk to fetch. 49 staticFetchOffset uint64 // Offset within the logical chunk that is being downloaded. 50 staticPieceSize uint64 51 staticWriteOffset int64 // Offset within the writer to write the completed data. 52 53 // Spending details. 54 staticSpendingCategory spendingCategory 55 56 // Fetch + Write instructions - read only or otherwise thread safe. 57 staticDisableDiskFetch bool 58 staticLatencyTarget time.Duration 59 staticNeedsMemory bool // Set to true if memory was not pre-allocated for this chunk. 60 staticMemoryManager *memoryManager 61 staticOverdrive int 62 staticPriority uint64 63 64 // Download chunk state - need mutex to access. 65 completedPieces []bool // Which pieces were downloaded successfully. 66 failed bool // Indicates if the chunk has been marked as failed. 67 physicalChunkData [][]byte // Used to recover the logical data. 68 pieceUsage []bool // Which pieces are being actively fetched. 69 piecesCompleted int // Number of pieces that have successfully completed. 70 piecesRegistered int // Number of pieces that workers are actively fetching. 71 recoveryComplete bool // Whether or not the recovery has completed and the chunk memory released. 72 workersRemaining int // Number of workers still able to fetch the chunk. 73 workersStandby []*worker // Set of workers that are able to work on this download, but are not needed unless other workers fail. 74 75 // Memory management variables. 76 memoryAllocated uint64 77 78 // The staticDownload object, mostly to update staticDownload progress. 79 staticDownload *download 80 mu sync.Mutex 81 82 // The SiaFile from which data is being downloaded. 83 renterFile *siafile.Snapshot 84 } 85 86 // fail will set the chunk status to failed. The physical chunk memory will be 87 // wiped and any memory allocation will be returned to the renter. The download 88 // as a whole will be failed as well. 89 func (udc *unfinishedDownloadChunk) fail(err error) { 90 udc.failed = true 91 udc.recoveryComplete = true 92 for i := range udc.physicalChunkData { 93 udc.physicalChunkData[i] = nil 94 } 95 udc.staticDownload.managedFail(fmt.Errorf("chunk %v failed: %v", udc.staticChunkIndex, err)) 96 udc.destination = nil 97 } 98 99 // managedCleanUp will check if the download has failed, and if not it will add 100 // any standby workers which need to be added. Calling managedCleanUp too many 101 // times is not harmful, however missing a call to managedCleanUp can lead to 102 // dealocks. 103 func (udc *unfinishedDownloadChunk) managedCleanUp() { 104 // Check if the chunk is newly failed. 105 udc.mu.Lock() 106 if udc.workersRemaining+udc.piecesCompleted < udc.erasureCode.MinPieces() && !udc.failed { 107 str := fmt.Sprintf("workers remaining %v, pieces completed %v, min pieces %v", udc.workersRemaining, udc.piecesCompleted, udc.erasureCode.MinPieces()) 108 udc.fail(errors.AddContext(errNotEnoughWorkers, str)) 109 } 110 // Return any excess memory. 111 udc.returnMemory() 112 113 // Nothing to do if the chunk has failed. 114 if udc.failed { 115 udc.mu.Unlock() 116 return 117 } 118 119 // Check whether standby workers are required. 120 chunkComplete := udc.piecesCompleted >= udc.erasureCode.MinPieces() 121 desiredPiecesRegistered := udc.erasureCode.MinPieces() + udc.staticOverdrive - udc.piecesCompleted 122 standbyWorkersRequired := !chunkComplete && udc.piecesRegistered < desiredPiecesRegistered 123 if !standbyWorkersRequired { 124 udc.mu.Unlock() 125 return 126 } 127 128 // Assemble a list of standby workers, release the udc lock, and then queue 129 // the chunk into the workers. The lock needs to be released early because 130 // holding the udc lock and the worker lock at the same time is a deadlock 131 // risk (they interact with eachother, call functions on eachother). 132 var standbyWorkers []*worker 133 for i := 0; i < len(udc.workersStandby); i++ { 134 standbyWorkers = append(standbyWorkers, udc.workersStandby[i]) 135 } 136 udc.workersStandby = udc.workersStandby[:0] // Workers have been taken off of standby. 137 udc.mu.Unlock() 138 for i := 0; i < len(standbyWorkers); i++ { 139 go standbyWorkers[i].threadedPerformDownloadChunkJob(udc) 140 } 141 } 142 143 // managedFinalizeRecovery sets recoveryComplete to 'true' and also marks 144 // the download as complete if there are no more chunks remaining. 145 func (udc *unfinishedDownloadChunk) managedFinalizeRecovery() { 146 // Directly nil out the physical chunk data, it's not going to be used 147 // anymore. Also signal that data recovery has completed. 148 udc.mu.Lock() 149 udc.physicalChunkData = nil 150 udc.recoveryComplete = true 151 udc.mu.Unlock() 152 153 // Update the download and signal completion of this chunk. 154 udc.staticDownload.mu.Lock() 155 defer udc.staticDownload.mu.Unlock() 156 udc.staticDownload.chunksRemaining-- 157 if udc.staticDownload.chunksRemaining == 0 { 158 // Download is complete, send out a notification. 159 udc.staticDownload.markComplete() 160 } 161 } 162 163 // managedRemoveWorker will decrement a worker from the set of remaining workers 164 // in the udc. After a worker has been removed, the udc needs to be cleaned up. 165 func (udc *unfinishedDownloadChunk) managedRemoveWorker() { 166 udc.mu.Lock() 167 udc.workersRemaining-- 168 udc.mu.Unlock() 169 udc.managedCleanUp() 170 } 171 172 // markPieceCompleted marks the piece with pieceIndex as completed. 173 func (udc *unfinishedDownloadChunk) markPieceCompleted(pieceIndex uint64) { 174 udc.completedPieces[pieceIndex] = true 175 udc.piecesCompleted++ 176 177 // Sanity check to make sure the slice and counter are consistent. 178 if !build.DEBUG { 179 return 180 } 181 completed := 0 182 for _, b := range udc.completedPieces { 183 if b { 184 completed++ 185 } 186 } 187 if completed != udc.piecesCompleted { 188 build.Critical(fmt.Sprintf("pieces completed and completedPieces out of sync %v != %v", 189 completed, udc.piecesCompleted)) 190 } 191 } 192 193 // returnMemory will check on the status of all the workers and pieces, and 194 // determine how much memory is safe to return to the renter. This should be 195 // called each time a worker returns, and also after the chunk is recovered. 196 func (udc *unfinishedDownloadChunk) returnMemory() { 197 // The maximum amount of memory is the pieces completed plus the number of 198 // workers remaining. 199 maxMemory := uint64(udc.workersRemaining+udc.piecesCompleted) * udc.staticPieceSize 200 // If enough pieces have completed, max memory is the number of registered 201 // pieces plus the number of completed pieces. 202 if udc.piecesCompleted >= udc.erasureCode.MinPieces() { 203 // udc.piecesRegistered is guaranteed to be at most equal to the number 204 // of overdrive pieces, meaning it will be equal to or less than 205 // initialMemory. 206 maxMemory = uint64(udc.piecesCompleted+udc.piecesRegistered) * udc.staticPieceSize 207 } 208 // If the chunk recovery has completed, the maximum number of pieces is the 209 // number of registered. 210 if udc.recoveryComplete { 211 maxMemory = uint64(udc.piecesRegistered) * udc.staticPieceSize 212 } 213 // Return any memory we don't need. 214 if uint64(udc.memoryAllocated) > maxMemory { 215 udc.staticMemoryManager.Return(udc.memoryAllocated - maxMemory) 216 udc.memoryAllocated = maxMemory 217 } 218 } 219 220 // threadedRecoverLogicalData will take all of the pieces that have been 221 // downloaded and encode them into the logical data which is then written to the 222 // underlying writer for the download. 223 func (udc *unfinishedDownloadChunk) threadedRecoverLogicalData() error { 224 // Ensure cleanup occurs after the data is recovered, whether recovery 225 // succeeds or fails. 226 defer udc.managedCleanUp() 227 228 // Write the pieces to the requested output. 229 dataOffset := recoveredDataOffset(udc.staticFetchOffset, udc.erasureCode) 230 err := udc.destination.WritePieces(udc.erasureCode, udc.physicalChunkData, dataOffset, udc.staticWriteOffset, udc.staticFetchLength) 231 if err != nil { 232 udc.mu.Lock() 233 udc.fail(err) 234 udc.mu.Unlock() 235 return errors.AddContext(err, "unable to write to download destination") 236 } 237 // finalize the chunk. 238 udc.managedFinalizeRecovery() 239 return nil 240 } 241 242 // bytesToRecover returns the number of bytes we need to recover from the 243 // erasure coded segments. The number of bytes we need to recover doesn't 244 // always match the chunkFetchLength. e.g. a user might want to fetch 500 bytes 245 // from a segment that is 640 bytes large after recovery. Then the number of 246 // bytes to recover would be 640 instead of 500 and the 140 bytes we don't need 247 // would be discarded after recovery. 248 func bytesToRecover(chunkFetchOffset, chunkFetchLength, chunkSize uint64, rs skymodules.ErasureCoder) uint64 { 249 // If partialDecoding is not available we downloaded the whole sector and 250 // recovered the whole chunk. 251 segmentSize, supportsPartial := rs.SupportsPartialEncoding() 252 if !supportsPartial { 253 return chunkSize 254 } 255 // Else we need to calculate how much data we need to recover. 256 recoveredSegmentSize := uint64(rs.MinPieces()) * segmentSize 257 _, numSegments := segmentsForRecovery(chunkFetchOffset, chunkFetchLength, rs) 258 return numSegments * recoveredSegmentSize 259 } 260 261 // recoveredDataOffset translates the fetch offset of the chunk into the offset 262 // within the recovered data. 263 func recoveredDataOffset(chunkFetchOffset uint64, rs skymodules.ErasureCoder) uint64 { 264 // If partialDecoding is not available we downloaded the whole sector and 265 // recovered the whole chunk which means the offset and length are actually 266 // equal to the chunkFetchOffset and chunkFetchLength. 267 segmentSize, supportsPartial := rs.SupportsPartialEncoding() 268 if !supportsPartial { 269 return chunkFetchOffset 270 } 271 // Else we need to adjust the offset a bit. 272 recoveredSegmentSize := uint64(rs.MinPieces()) * segmentSize 273 return chunkFetchOffset % recoveredSegmentSize 274 }