gitlab.com/jokerrs1/Sia@v1.3.2/modules/renter/uploadchunk.go (about) 1 package renter 2 3 import ( 4 "io" 5 "os" 6 "sync" 7 8 "github.com/NebulousLabs/Sia/crypto" 9 10 "github.com/NebulousLabs/errors" 11 ) 12 13 // uploadChunkID is a unique identifier for each chunk in the renter. 14 type uploadChunkID struct { 15 fileUID string // Unique to each file. 16 index uint64 // Unique to each chunk within a file. 17 } 18 19 // unfinishedUploadChunk contains a chunk from the filesystem that has not 20 // finished uploading, including knowledge of the progress. 21 type unfinishedUploadChunk struct { 22 // Information about the file. localPath may be the empty string if the file 23 // is known not to exist locally. 24 id uploadChunkID 25 localPath string 26 renterFile *file 27 28 // Information about the chunk, namely where it exists within the file. 29 // 30 // TODO / NOTE: As we change the file mapper, we're probably going to have 31 // to update these fields. Compatibility shouldn't be an issue because this 32 // struct is not persisted anywhere, it's always built from other 33 // structures. 34 index uint64 35 length uint64 36 memoryNeeded uint64 // memory needed in bytes 37 memoryReleased uint64 // memory that has been returned of memoryNeeded 38 minimumPieces int // number of pieces required to recover the file. 39 offset int64 // Offset of the chunk within the file. 40 piecesNeeded int // number of pieces to achieve a 100% complete upload 41 42 // The logical data is the data that is presented to the user when the user 43 // requests the chunk. The physical data is all of the pieces that get 44 // stored across the network. 45 logicalChunkData []byte 46 physicalChunkData [][]byte 47 48 // Worker synchronization fields. The mutex only protects these fields. 49 // 50 // When a worker passes over a piece for upload to go on standby: 51 // + the worker should add itself to the list of standby chunks 52 // + the worker should call for memory to be released 53 // 54 // When a worker passes over a piece because it's not useful: 55 // + the worker should decrement the number of workers remaining 56 // + the worker should call for memory to be released 57 // 58 // When a worker accepts a piece for upload: 59 // + the worker should increment the number of pieces registered 60 // + the worker should mark the piece usage for the piece it is uploading 61 // + the worker should decrement the number of workers remaining 62 // 63 // When a worker completes an upload (success or failure): 64 // + the worker should decrement the number of pieces registered 65 // + the worker should call for memory to be released 66 // 67 // When a worker completes an upload (failure): 68 // + the worker should unmark the piece usage for the piece it registered 69 // + the worker should notify the standby workers of a new available piece 70 // 71 // When a worker completes an upload successfully: 72 // + the worker should increment the number of pieces completed 73 // + the worker should decrement the number of pieces registered 74 // + the worker should release the memory for the completed piece 75 mu sync.Mutex 76 pieceUsage []bool // 'true' if a piece is either uploaded, or a worker is attempting to upload that piece. 77 piecesCompleted int // number of pieces that have been fully uploaded. 78 piecesRegistered int // number of pieces that are being uploaded, but aren't finished yet (may fail). 79 released bool // whether this chunk has been released from the active chunks set. 80 unusedHosts map[string]struct{} // hosts that aren't yet storing any pieces or performing any work. 81 workersRemaining int // number of inactive workers still able to upload a piece. 82 workersStandby []*worker // workers that can be used if other workers fail. 83 } 84 85 // managedNotifyStandbyWorkers is called when a worker fails to upload a piece, meaning 86 // that the standby workers may now be needed to help the piece finish 87 // uploading. 88 func (uc *unfinishedUploadChunk) managedNotifyStandbyWorkers() { 89 // Copy the standby workers into a new slice and reset it since we can't 90 // hold the lock while calling the managed function. 91 uc.mu.Lock() 92 standbyWorkers := make([]*worker, len(uc.workersStandby)) 93 copy(standbyWorkers, uc.workersStandby) 94 uc.workersStandby = uc.workersStandby[:0] 95 uc.mu.Unlock() 96 97 for i := 0; i < len(standbyWorkers); i++ { 98 standbyWorkers[i].managedQueueUploadChunk(uc) 99 } 100 } 101 102 // managedDistributeChunkToWorkers will take a chunk with fully prepared 103 // physical data and distribute it to the worker pool. 104 func (r *Renter) managedDistributeChunkToWorkers(uc *unfinishedUploadChunk) { 105 // Give the chunk to each worker, marking the number of workers that have 106 // received the chunk. The workers cannot be interacted with while the 107 // renter is holding a lock, so we need to build a list of workers while 108 // under lock and then launch work jobs after that. 109 id := r.mu.RLock() 110 uc.workersRemaining += len(r.workerPool) 111 workers := make([]*worker, 0, len(r.workerPool)) 112 for _, worker := range r.workerPool { 113 workers = append(workers, worker) 114 } 115 r.mu.RUnlock(id) 116 for _, worker := range workers { 117 worker.managedQueueUploadChunk(uc) 118 } 119 } 120 121 // managedDownloadLogicalChunkData will fetch the logical chunk data by sending a 122 // download to the renter's downloader, and then using the data that gets 123 // returned. 124 func (r *Renter) managedDownloadLogicalChunkData(chunk *unfinishedUploadChunk) error { 125 // Determine what the download length should be. Normally it is just the 126 // chunk size, but if this is the last chunk we need to download less 127 // because the file is not that large. 128 // 129 // TODO: There is a disparity in the way that the upload and download code 130 // handle the last chunk, which may not be full sized. 131 downloadLength := chunk.length 132 if chunk.index == chunk.renterFile.numChunks()-1 && chunk.renterFile.size%chunk.length != 0 { 133 downloadLength = chunk.renterFile.size % chunk.length 134 } 135 136 // Create the download. 137 buf := downloadDestinationBuffer(make([]byte, chunk.length)) 138 d, err := r.newDownload(downloadParams{ 139 destination: buf, 140 destinationType: "buffer", 141 file: chunk.renterFile, 142 143 latencyTarget: 200e3, // No need to rush latency on repair downloads. 144 length: downloadLength, 145 needsMemory: false, // We already requested memory, the download memory fits inside of that. 146 offset: uint64(chunk.offset), 147 overdrive: 0, // No need to rush the latency on repair downloads. 148 priority: 0, // Repair downloads are completely de-prioritized. 149 }) 150 if err != nil { 151 return err 152 } 153 154 // Wait for the download to complete. 155 select { 156 case <-d.completeChan: 157 case <-r.tg.StopChan(): 158 return errors.New("repair download interrupted by stop call") 159 } 160 if d.Err() != nil { 161 buf = nil 162 return d.Err() 163 } 164 chunk.logicalChunkData = []byte(buf) 165 return nil 166 } 167 168 // managedFetchAndRepairChunk will fetch the logical data for a chunk, create 169 // the physical pieces for the chunk, and then distribute them. 170 func (r *Renter) managedFetchAndRepairChunk(chunk *unfinishedUploadChunk) { 171 // Calculate the amount of memory needed for erasure coding. This will need 172 // to be released if there's an error before erasure coding is complete. 173 erasureCodingMemory := chunk.renterFile.pieceSize * uint64(chunk.renterFile.erasureCode.MinPieces()) 174 175 // Calculate the amount of memory to release due to already completed 176 // pieces. This memory gets released during encryption, but needs to be 177 // released if there's a failure before encryption happens. 178 var pieceCompletedMemory uint64 179 for i := 0; i < len(chunk.pieceUsage); i++ { 180 if chunk.pieceUsage[i] { 181 pieceCompletedMemory += chunk.renterFile.pieceSize + crypto.TwofishOverhead 182 } 183 } 184 185 // Ensure that memory is released and that the chunk is cleaned up properly 186 // after the chunk is distributed. 187 // 188 // Need to ensure the erasure coding memory is released as well as the 189 // physical chunk memory. Physical chunk memory is released by setting 190 // 'workersRemaining' to zero if the repair fails before being distributed 191 // to workers. Erasure coding memory is released manually if the repair 192 // fails before the erasure coding occurs. 193 defer r.managedCleanUpUploadChunk(chunk) 194 195 // Fetch the logical data for the chunk. 196 err := r.managedFetchLogicalChunkData(chunk) 197 if err != nil { 198 // Logical data is not available, cannot upload. Chunk will not be 199 // distributed to workers, therefore set workersRemaining equal to zero. 200 // The erasure coding memory has not been released yet, be sure to 201 // release that as well. 202 chunk.logicalChunkData = nil 203 chunk.workersRemaining = 0 204 r.memoryManager.Return(erasureCodingMemory + pieceCompletedMemory) 205 chunk.memoryReleased += erasureCodingMemory + pieceCompletedMemory 206 r.log.Debugln("Fetching logical data of a chunk failed:", err) 207 return 208 } 209 210 // Create the physical pieces for the data. Immediately release the logical 211 // data. 212 // 213 // TODO: The logical data is the first few chunks of the physical data. If 214 // the memory is not being handled cleanly here, we should leverage that 215 // fact to reduce the total memory required to create the physical data. 216 // That will also change the amount of memory we need to allocate, and the 217 // number of times we need to return memory. 218 chunk.physicalChunkData, err = chunk.renterFile.erasureCode.Encode(chunk.logicalChunkData) 219 chunk.logicalChunkData = nil 220 r.memoryManager.Return(erasureCodingMemory) 221 chunk.memoryReleased += erasureCodingMemory 222 if err != nil { 223 // Physical data is not available, cannot upload. Chunk will not be 224 // distributed to workers, therefore set workersRemaining equal to zero. 225 chunk.workersRemaining = 0 226 r.memoryManager.Return(pieceCompletedMemory) 227 chunk.memoryReleased += pieceCompletedMemory 228 for i := 0; i < len(chunk.physicalChunkData); i++ { 229 chunk.physicalChunkData[i] = nil 230 } 231 r.log.Debugln("Fetching physical data of a chunk failed:", err) 232 return 233 } 234 235 // Sanity check - we should have at least as many physical data pieces as we 236 // do elements in our piece usage. 237 if len(chunk.physicalChunkData) < len(chunk.pieceUsage) { 238 r.log.Critical("not enough physical pieces to match the upload settings of the file") 239 return 240 } 241 // Loop through the pieces and encrypt any that are needed, while dropping 242 // any pieces that are not needed. 243 for i := 0; i < len(chunk.pieceUsage); i++ { 244 if chunk.pieceUsage[i] { 245 chunk.physicalChunkData[i] = nil 246 } else { 247 // Encrypt the piece. 248 key := deriveKey(chunk.renterFile.masterKey, chunk.index, uint64(i)) 249 chunk.physicalChunkData[i] = key.EncryptBytes(chunk.physicalChunkData[i]) 250 } 251 } 252 // Return the released memory. 253 if pieceCompletedMemory > 0 { 254 r.memoryManager.Return(pieceCompletedMemory) 255 chunk.memoryReleased += pieceCompletedMemory 256 } 257 258 // Distribute the chunk to the workers. 259 r.managedDistributeChunkToWorkers(chunk) 260 } 261 262 // managedFetchLogicalChunkData will get the raw data for a chunk, pulling it from disk if 263 // possible but otherwise queueing a download. 264 // 265 // chunk.data should be passed as 'nil' to the download, to keep memory usage as 266 // light as possible. 267 func (r *Renter) managedFetchLogicalChunkData(chunk *unfinishedUploadChunk) error { 268 // Only download this file if more than 25% of the redundancy is missing. 269 minMissingPiecesToDownload := (chunk.piecesNeeded - chunk.minimumPieces) / 4 270 download := chunk.piecesCompleted+minMissingPiecesToDownload < chunk.piecesNeeded 271 272 // Download the chunk if it's not on disk. 273 if chunk.localPath == "" && download { 274 return r.managedDownloadLogicalChunkData(chunk) 275 } else if chunk.localPath == "" { 276 return errors.New("file not available locally") 277 } 278 279 // Try to read the data from disk. If that fails at any point, prefer to 280 // download the chunk. 281 // 282 // TODO: Might want to remove the file from the renter tracking if the disk 283 // loading fails. Should do this after we swap the file format, the tracking 284 // data for the file should reside in the file metadata and not in a 285 // separate struct. 286 osFile, err := os.Open(chunk.localPath) 287 if err != nil && download { 288 return r.managedDownloadLogicalChunkData(chunk) 289 } else if err != nil { 290 return errors.Extend(err, errors.New("failed to open file locally")) 291 } 292 defer osFile.Close() 293 // TODO: Once we have enabled support for small chunks, we should stop 294 // needing to ignore the EOF errors, because the chunk size should always 295 // match the tail end of the file. Until then, we ignore io.EOF. 296 chunk.logicalChunkData = make([]byte, chunk.length) 297 _, err = osFile.ReadAt(chunk.logicalChunkData, chunk.offset) 298 if err != nil && err != io.EOF && download { 299 chunk.logicalChunkData = nil 300 return r.managedDownloadLogicalChunkData(chunk) 301 } else if err != nil && err != io.EOF { 302 chunk.logicalChunkData = nil 303 return errors.Extend(err, errors.New("failed to read file locally")) 304 } 305 306 // Data successfully read from disk. 307 return nil 308 } 309 310 // managedCleanUpUploadChunk will check the state of the chunk and perform any 311 // cleanup required. This can include returning rememory and releasing the chunk 312 // from the map of active chunks in the chunk heap. 313 func (r *Renter) managedCleanUpUploadChunk(uc *unfinishedUploadChunk) { 314 uc.mu.Lock() 315 piecesAvailable := 0 316 var memoryReleased uint64 317 // Release any unnecessary pieces, counting any pieces that are 318 // currently available. 319 for i := 0; i < len(uc.pieceUsage); i++ { 320 // Skip the piece if it's not available. 321 if uc.pieceUsage[i] { 322 continue 323 } 324 325 // If we have all the available pieces we need, release this piece. 326 // Otherwise, mark that there's another piece available. This algorithm 327 // will prefer releasing later pieces, which improves computational 328 // complexity for erasure coding. 329 if piecesAvailable >= uc.workersRemaining { 330 memoryReleased += uc.renterFile.pieceSize + crypto.TwofishOverhead 331 uc.physicalChunkData[i] = nil 332 // Mark this piece as taken so that we don't double release memory. 333 uc.pieceUsage[i] = true 334 } else { 335 piecesAvailable++ 336 } 337 } 338 339 // Check if the chunk needs to be removed from the list of active 340 // chunks. It needs to be removed if the chunk is complete, but hasn't 341 // yet been released. 342 chunkComplete := uc.workersRemaining == 0 && uc.piecesRegistered == 0 343 released := uc.released 344 if chunkComplete && !released { 345 uc.released = true 346 } 347 uc.memoryReleased += uint64(memoryReleased) 348 totalMemoryReleased := uc.memoryReleased 349 uc.mu.Unlock() 350 351 // If there are pieces available, add the standby workers to collect them. 352 // Standby workers are only added to the chunk when piecesAvailable is equal 353 // to zero, meaning this code will only trigger if the number of pieces 354 // available increases from zero. That can only happen if a worker 355 // experiences an error during upload. 356 if piecesAvailable > 0 { 357 uc.managedNotifyStandbyWorkers() 358 } 359 // If required, return the memory to the renter. 360 if memoryReleased > 0 { 361 r.memoryManager.Return(memoryReleased) 362 } 363 // If required, remove the chunk from the set of active chunks. 364 if chunkComplete && !released { 365 r.uploadHeap.mu.Lock() 366 delete(r.uploadHeap.activeChunks, uc.id) 367 r.uploadHeap.mu.Unlock() 368 } 369 // Sanity check - all memory should be released if the chunk is complete. 370 if chunkComplete && totalMemoryReleased != uc.memoryNeeded { 371 r.log.Critical("No workers remaining, but not all memory released:", uc.workersRemaining, uc.piecesRegistered, uc.memoryReleased, uc.memoryNeeded) 372 } 373 }