github.com/fozzysec/SiaPrime@v0.0.0-20190612043147-66c8e8d11fe3/modules/renter/uploadheap.go (about) 1 package renter 2 3 // TODO / NOTE: Once the filesystem is tree-based, instead of continually 4 // looping through the whole filesystem we can add values to the file metadata 5 // for each folder + file, where the folder scan time is the least recent time 6 // of any file in the folder, and the folder health is the lowest health of any 7 // file in the folder. This will allow us to go one folder at a time and focus 8 // on problem areas instead of doing everything all at once every iteration. 9 // This should boost scalability. 10 11 // TODO / NOTE: We need to upgrade the contractor before we can do this, but we 12 // need to be checking for every piece within a contract, and checking that the 13 // piece is still available in the contract that we have, that the host did not 14 // lose or nullify the piece. 15 16 // TODO: Renter will try to download to repair a piece even if there are not 17 // enough workers to make any progress on the repair. This should be fixed. 18 19 import ( 20 "container/heap" 21 "os" 22 "sync" 23 "time" 24 25 "SiaPrime/build" 26 "SiaPrime/crypto" 27 "SiaPrime/types" 28 ) 29 30 // uploadHeap contains a priority-sorted heap of all the chunks being uploaded 31 // to the renter, along with some metadata. 32 type uploadHeap struct { 33 // activeChunks contains a list of all the chunks actively being worked on. 34 // These chunks will either be in the heap, or will be in the queues of some 35 // of the workers. A chunk is added to the activeChunks map as soon as it is 36 // added to the uploadHeap, and it is removed from the map as soon as the 37 // last worker completes work on the chunk. 38 activeChunks map[uploadChunkID]struct{} 39 heap uploadChunkHeap 40 newUploads chan struct{} 41 mu sync.Mutex 42 } 43 44 // uploadChunkHeap is a bunch of priority-sorted chunks that need to be either 45 // uploaded or repaired. 46 // 47 // TODO: When the file system is adjusted to have a tree structure, the 48 // filesystem itself will serve as the uploadChunkHeap, making this structure 49 // unnecessary. The repair loop might be moved to repair.go. 50 type uploadChunkHeap []*unfinishedUploadChunk 51 52 // Implementation of heap.Interface for uploadChunkHeap. 53 func (uch uploadChunkHeap) Len() int { return len(uch) } 54 func (uch uploadChunkHeap) Less(i, j int) bool { 55 return float64(uch[i].piecesCompleted)/float64(uch[i].piecesNeeded) < float64(uch[j].piecesCompleted)/float64(uch[j].piecesNeeded) 56 } 57 func (uch uploadChunkHeap) Swap(i, j int) { uch[i], uch[j] = uch[j], uch[i] } 58 func (uch *uploadChunkHeap) Push(x interface{}) { *uch = append(*uch, x.(*unfinishedUploadChunk)) } 59 func (uch *uploadChunkHeap) Pop() interface{} { 60 old := *uch 61 n := len(old) 62 x := old[n-1] 63 *uch = old[0 : n-1] 64 return x 65 } 66 67 // managedPush will add a chunk to the upload heap. 68 func (uh *uploadHeap) managedPush(uuc *unfinishedUploadChunk) { 69 // Create the unique chunk id. 70 ucid := uploadChunkID{ 71 fileUID: uuc.renterFile.staticUID, 72 index: uuc.index, 73 } 74 // Sanity check: fileUID should not be the empty value. 75 if uuc.renterFile.staticUID == "" { 76 panic("empty string for file UID") 77 } 78 79 // Check whether this chunk is already being repaired. If not, add it to the 80 // upload chunk heap. 81 uh.mu.Lock() 82 _, exists := uh.activeChunks[ucid] 83 if !exists { 84 uh.activeChunks[ucid] = struct{}{} 85 uh.heap.Push(uuc) 86 } 87 uh.mu.Unlock() 88 } 89 90 // managedPop will pull a chunk off of the upload heap and return it. 91 func (uh *uploadHeap) managedPop() (uc *unfinishedUploadChunk) { 92 uh.mu.Lock() 93 if len(uh.heap) > 0 { 94 uc = heap.Pop(&uh.heap).(*unfinishedUploadChunk) 95 } 96 uh.mu.Unlock() 97 return uc 98 } 99 100 // buildUnfinishedChunks will pull all of the unfinished chunks out of a file. 101 // 102 // TODO / NOTE: This code can be substantially simplified once the files store 103 // the HostPubKey instead of the FileContractID, and can be simplified even 104 // further once the layout is per-chunk instead of per-filecontract. 105 func (r *Renter) buildUnfinishedChunks(f *file, hosts map[string]struct{}) []*unfinishedUploadChunk { 106 // Files are not threadsafe. 107 f.mu.Lock() 108 defer f.mu.Unlock() 109 110 // If the file is not being tracked, don't repair it. 111 trackedFile, exists := r.persist.Tracking[f.name] 112 if !exists { 113 return nil 114 } 115 116 // If we don't have enough workers for the file, don't repair it right now. 117 if len(r.workerPool) < f.erasureCode.MinPieces() { 118 return nil 119 } 120 121 // Assemble the set of chunks. 122 // 123 // TODO / NOTE: Future files may have a different method for determining the 124 // number of chunks. Changes will be made due to things like sparse files, 125 // and the fact that chunks are going to be different sizes. 126 chunkCount := f.numChunks() 127 newUnfinishedChunks := make([]*unfinishedUploadChunk, chunkCount) 128 for i := uint64(0); i < chunkCount; i++ { 129 newUnfinishedChunks[i] = &unfinishedUploadChunk{ 130 renterFile: f, 131 localPath: trackedFile.RepairPath, 132 133 id: uploadChunkID{ 134 fileUID: f.staticUID, 135 index: i, 136 }, 137 138 index: i, 139 length: f.staticChunkSize(), 140 offset: int64(i * f.staticChunkSize()), 141 142 // memoryNeeded has to also include the logical data, and also 143 // include the overhead for encryption. 144 // 145 // TODO / NOTE: If we adjust the file to have a flexible encryption 146 // scheme, we'll need to adjust the overhead stuff too. 147 // 148 // TODO: Currently we request memory for all of the pieces as well 149 // as the minimum pieces, but we perhaps don't need to request all 150 // of that. 151 memoryNeeded: f.pieceSize*uint64(f.erasureCode.NumPieces()+f.erasureCode.MinPieces()) + uint64(f.erasureCode.NumPieces()*crypto.TwofishOverhead), 152 minimumPieces: f.erasureCode.MinPieces(), 153 piecesNeeded: f.erasureCode.NumPieces(), 154 155 physicalChunkData: make([][]byte, f.erasureCode.NumPieces()), 156 157 pieceUsage: make([]bool, f.erasureCode.NumPieces()), 158 unusedHosts: make(map[string]struct{}), 159 } 160 // Every chunk can have a different set of unused hosts. 161 for host := range hosts { 162 newUnfinishedChunks[i].unusedHosts[host] = struct{}{} 163 } 164 } 165 166 // Iterate through the contracts of the file and mark which hosts are 167 // already in use for the chunk. As you delete hosts from the 'unusedHosts' 168 // map, also increment the 'piecesCompleted' value. 169 saveFile := false 170 for fcid, fileContract := range f.contracts { 171 pk := r.hostContractor.ResolveIDToPubKey(fcid) 172 recentContract, exists := r.hostContractor.ContractByPublicKey(pk) 173 contractUtility, exists2 := r.hostContractor.ContractUtility(pk) 174 if exists != exists2 { 175 build.Critical("got a contract without utility or vice versa which shouldn't happen", 176 exists, exists2) 177 } 178 if !exists || !exists2 { 179 // File contract does not seem to be part of the host anymore. 180 // Delete this contract and mark the file to be saved. 181 delete(f.contracts, fcid) 182 saveFile = true 183 continue 184 } 185 if !contractUtility.GoodForRenew { 186 // We are no longer renewing with this contract, so it does not 187 // count for redundancy. 188 continue 189 } 190 hpk := recentContract.HostPublicKey 191 192 // Mark the chunk set based on the pieces in this contract. 193 for _, piece := range fileContract.Pieces { 194 _, exists := newUnfinishedChunks[piece.Chunk].unusedHosts[hpk.String()] 195 redundantPiece := newUnfinishedChunks[piece.Chunk].pieceUsage[piece.Piece] 196 if exists && !redundantPiece { 197 newUnfinishedChunks[piece.Chunk].pieceUsage[piece.Piece] = true 198 newUnfinishedChunks[piece.Chunk].piecesCompleted++ 199 delete(newUnfinishedChunks[piece.Chunk].unusedHosts, hpk.String()) 200 } else if exists { 201 // This host has a piece, but it is the same piece another host 202 // has. We should still remove the host from the unusedHosts 203 // since one host having multiple pieces of a chunk might lead 204 // to unexpected issues. 205 delete(newUnfinishedChunks[piece.Chunk].unusedHosts, hpk.String()) 206 } 207 } 208 } 209 // If 'saveFile' is marked, it means we deleted some dead contracts and 210 // cleaned up the file a bit. Save the file to clean up some space on disk 211 // and prevent the same work from being repeated after the next restart. 212 // 213 // TODO / NOTE: This process isn't going to make sense anymore once we 214 // switch to chunk-based saving. 215 if saveFile { 216 err := r.saveFile(f) 217 if err != nil { 218 r.log.Println("error while saving a file after pruning some contracts from it:", err) 219 } 220 } 221 222 // Iterate through the set of newUnfinishedChunks and remove any that are 223 // completed. 224 incompleteChunks := newUnfinishedChunks[:0] 225 for i := 0; i < len(newUnfinishedChunks); i++ { 226 if newUnfinishedChunks[i].piecesCompleted < newUnfinishedChunks[i].piecesNeeded { 227 incompleteChunks = append(incompleteChunks, newUnfinishedChunks[i]) 228 } 229 } 230 // TODO: Don't return chunks that can't be downloaded, uploaded or otherwise 231 // helped by the upload process. 232 return incompleteChunks 233 } 234 235 // managedBuildChunkHeap will iterate through all of the files in the renter and 236 // construct a chunk heap. 237 func (r *Renter) managedBuildChunkHeap(hosts map[string]struct{}) { 238 // Loop through the whole set of files and get a list of chunks to add to 239 // the heap. 240 id := r.mu.RLock() 241 goodForRenew := make(map[types.FileContractID]bool) 242 offline := make(map[types.FileContractID]bool) 243 for _, file := range r.files { 244 file.mu.RLock() 245 for cid := range file.contracts { 246 resolvedID := r.hostContractor.ResolveIDToPubKey(cid) 247 cu, ok := r.hostContractor.ContractUtility(resolvedID) 248 goodForRenew[cid] = ok && cu.GoodForRenew 249 offline[cid] = r.hostContractor.IsOffline(resolvedID) 250 } 251 file.mu.RUnlock() 252 253 unfinishedUploadChunks := r.buildUnfinishedChunks(file, hosts) 254 for i := 0; i < len(unfinishedUploadChunks); i++ { 255 r.uploadHeap.managedPush(unfinishedUploadChunks[i]) 256 } 257 } 258 for _, file := range r.files { 259 file.mu.RLock() 260 // check for local file 261 tf, exists := r.persist.Tracking[file.name] 262 if exists { 263 // Check if local file is missing and redundancy is less than 1 264 // log warning to renter log 265 if _, err := os.Stat(tf.RepairPath); os.IsNotExist(err) && file.redundancy(offline, goodForRenew) < 1 { 266 r.log.Println("File not found on disk and possibly unrecoverable:", tf.RepairPath) 267 } 268 } 269 file.mu.RUnlock() 270 } 271 r.mu.RUnlock(id) 272 } 273 274 // managedPrepareNextChunk takes the next chunk from the chunk heap and prepares 275 // it for upload. Preparation includes blocking until enough memory is 276 // available, fetching the logical data for the chunk (either from the disk or 277 // from the network), erasure coding the logical data into the physical data, 278 // and then finally passing the work onto the workers. 279 func (r *Renter) managedPrepareNextChunk(uuc *unfinishedUploadChunk, hosts map[string]struct{}) { 280 // Grab the next chunk, loop until we have enough memory, update the amount 281 // of memory available, and then spin up a thread to asynchronously handle 282 // the rest of the chunk tasks. 283 if !r.memoryManager.Request(uuc.memoryNeeded, memoryPriorityLow) { 284 return 285 } 286 // Fetch the chunk in a separate goroutine, as it can take a long time and 287 // does not need to bottleneck the repair loop. 288 go r.managedFetchAndRepairChunk(uuc) 289 } 290 291 // managedRefreshHostsAndWorkers will reset the set of hosts and the set of 292 // workers for the renter. 293 func (r *Renter) managedRefreshHostsAndWorkers() map[string]struct{} { 294 // Grab the current set of contracts and use them to build a list of hosts 295 // that are currently active. The hosts are assembled into a map where the 296 // key is the String() representation of the host's SiaPublicKey. 297 // 298 // TODO / NOTE: This code can be removed once files store the HostPubKey 299 // of the hosts they are using, instead of just the FileContractID. 300 currentContracts := r.hostContractor.Contracts() 301 hosts := make(map[string]struct{}) 302 for _, contract := range currentContracts { 303 hosts[contract.HostPublicKey.String()] = struct{}{} 304 } 305 // Refresh the worker pool as well. 306 r.managedUpdateWorkerPool() 307 return hosts 308 } 309 310 // threadedUploadLoop is a background thread that checks on the health of files, 311 // tracking the least healthy files and queuing the worst ones for repair. 312 func (r *Renter) threadedUploadLoop() { 313 err := r.tg.Add() 314 if err != nil { 315 return 316 } 317 defer r.tg.Done() 318 319 for { 320 // Wait until the renter is online to proceed. 321 if !r.managedBlockUntilOnline() { 322 // The renter shut down before the internet connection was restored. 323 return 324 } 325 326 // Refresh the worker pool and get the set of hosts that are currently 327 // useful for uploading. 328 hosts := r.managedRefreshHostsAndWorkers() 329 330 // Build a min-heap of chunks organized by upload progress. 331 // 332 // TODO: After replacing the filesystem to resemble a tree, we'll be 333 // able to go through the filesystem piecewise instead of doing 334 // everything all at once. 335 r.managedBuildChunkHeap(hosts) 336 r.uploadHeap.mu.Lock() 337 heapLen := r.uploadHeap.heap.Len() 338 r.uploadHeap.mu.Unlock() 339 r.log.Println("Repairing", heapLen, "chunks") 340 341 // Work through the heap. Chunks will be processed one at a time until 342 // the heap is whittled down. When the heap is empty, we wait for new 343 // files in a loop and then process those. When the rebuild signal is 344 // received, we start over with the outer loop that rebuilds the heap 345 // and re-checks the health of all the files. 346 rebuildHeapSignal := time.After(rebuildChunkHeapInterval) 347 for { 348 // Return if the renter has shut down. 349 select { 350 case <-r.tg.StopChan(): 351 return 352 default: 353 } 354 355 // Break to the outer loop if not online. 356 if !r.g.Online() { 357 break 358 } 359 360 // Check if there is work by trying to pop of the next chunk from 361 // the heap. 362 nextChunk := r.uploadHeap.managedPop() 363 if nextChunk == nil { 364 break 365 } 366 367 // Make sure we have enough workers for this chunk to reach minimum 368 // redundancy. Otherwise we ignore this chunk for now and try again 369 // the next time we rebuild the heap and refresh the workers. 370 id := r.mu.RLock() 371 availableWorkers := len(r.workerPool) 372 r.mu.RUnlock(id) 373 if availableWorkers < nextChunk.minimumPieces { 374 continue 375 } 376 377 // Perform the work. managedPrepareNextChunk will block until 378 // enough memory is available to perform the work, slowing this 379 // thread down to using only the resources that are available. 380 r.managedPrepareNextChunk(nextChunk, hosts) 381 continue 382 } 383 384 // Block until new work is required. 385 select { 386 case <-r.uploadHeap.newUploads: 387 // User has uploaded a new file. 388 case <-rebuildHeapSignal: 389 // Time to check the filesystem health again. 390 case <-r.tg.StopChan(): 391 // The renter has shut down. 392 return 393 } 394 } 395 }