gitlab.com/SkynetLabs/skyd@v1.6.9/skymodules/renter/repair.go (about) 1 package renter 2 3 import ( 4 "fmt" 5 "path/filepath" 6 "strings" 7 "time" 8 9 "gitlab.com/NebulousLabs/errors" 10 "gitlab.com/NebulousLabs/fastrand" 11 12 "gitlab.com/SkynetLabs/skyd/build" 13 "gitlab.com/SkynetLabs/skyd/skymodules" 14 ) 15 16 var ( 17 // errNoStuckFiles is a helper to indicate that there are no stuck files in 18 // the renter's directory 19 errNoStuckFiles = errors.New("no stuck files") 20 21 // errNoStuckChunks is a helper to indicate that there are no stuck chunks 22 // in a siafile 23 errNoStuckChunks = errors.New("no stuck chunks") 24 ) 25 26 // managedAddRandomStuckChunks will try and add up to 27 // maxRandomStuckChunksAddToHeap random stuck chunks to the upload heap 28 func (r *Renter) managedAddRandomStuckChunks(hosts map[string]struct{}) ([]skymodules.SiaPath, error) { 29 var dirSiaPaths []skymodules.SiaPath 30 // Remember number of stuck chunks we are starting with 31 prevNumStuckChunks, prevNumRandomStuckChunks := r.staticUploadHeap.managedNumStuckChunks() 32 // Check if there is space in the heap. There is space if the number of 33 // random stuck chunks has not exceeded maxRandomStuckChunksInHeap and the 34 // total number of stuck chunks as not exceeded maxStuckChunksInHeap 35 spaceInHeap := prevNumRandomStuckChunks < maxRandomStuckChunksInHeap && prevNumStuckChunks < maxStuckChunksInHeap 36 for i := 0; i < maxRandomStuckChunksAddToHeap && spaceInHeap; i++ { 37 // Randomly get directory with stuck files 38 dirSiaPath, err := r.managedStuckDirectory() 39 if err != nil { 40 return dirSiaPaths, errors.AddContext(err, "unable to get random stuck directory") 41 } 42 43 // Get Random stuck file from directory 44 siaPath, err := r.managedStuckFile(dirSiaPath) 45 if err != nil { 46 return dirSiaPaths, errors.AddContext(err, "unable to get random stuck file in dir "+dirSiaPath.String()) 47 } 48 49 // Add stuck chunk to upload heap and signal repair needed 50 err = r.managedBuildAndPushRandomChunk(siaPath, hosts, targetStuckChunks, r.staticRepairMemoryManager) 51 if err != nil { 52 return dirSiaPaths, errors.AddContext(err, "unable to push random stuck chunk from '"+siaPath.String()+"' of '"+dirSiaPath.String()+"'") 53 } 54 55 // Sanity check that stuck chunks were added 56 currentNumStuckChunks, currentNumRandomStuckChunks := r.staticUploadHeap.managedNumStuckChunks() 57 if currentNumRandomStuckChunks <= prevNumRandomStuckChunks { 58 // If the number of stuck chunks in the heap is not increasing 59 // then break out of this loop in order to prevent getting stuck 60 // in an infinite loop 61 break 62 } 63 64 // Remember the directory so bubble can be called on it at the end of 65 // the iteration 66 dirSiaPaths = append(dirSiaPaths, dirSiaPath) 67 r.staticRepairLog.Printf("Added %v stuck chunks from %s", currentNumRandomStuckChunks-prevNumRandomStuckChunks, dirSiaPath.String()) 68 prevNumStuckChunks = currentNumStuckChunks 69 prevNumRandomStuckChunks = currentNumRandomStuckChunks 70 spaceInHeap = prevNumRandomStuckChunks < maxRandomStuckChunksInHeap && prevNumStuckChunks < maxStuckChunksInHeap 71 } 72 return dirSiaPaths, nil 73 } 74 75 // managedAddStuckChunksFromStuckStack will try and add up to 76 // maxStuckChunksInHeap stuck chunks to the upload heap from the files in the 77 // stuck stack. 78 func (r *Renter) managedAddStuckChunksFromStuckStack(hosts map[string]struct{}) ([]skymodules.SiaPath, error) { 79 var dirSiaPaths []skymodules.SiaPath 80 offline, goodForRenew, _, _ := r.callRenterContractsAndUtilities() 81 numStuckChunks, _ := r.staticUploadHeap.managedNumStuckChunks() 82 for r.staticStuckStack.managedLen() > 0 && numStuckChunks < maxStuckChunksInHeap { 83 // Pop the first file SiaPath 84 siaPath := r.staticStuckStack.managedPop() 85 86 // Add stuck chunks to uploadHeap 87 err := r.managedAddStuckChunksToHeap(siaPath, hosts, offline, goodForRenew) 88 if err != nil && !errors.Contains(err, errNoStuckChunks) { 89 return dirSiaPaths, errors.AddContext(err, "unable to add stuck chunks to heap") 90 } 91 92 // Since we either added stuck chunks to the heap from this file, 93 // there are no stuck chunks left in the file, or all the stuck 94 // chunks for the file are already being worked on, remember the 95 // directory so we can call bubble on it at the end of this 96 // iteration of the stuck loop to update the filesystem 97 dirSiaPath, err := siaPath.Dir() 98 if err != nil { 99 return dirSiaPaths, errors.AddContext(err, "unable to get directory siapath") 100 } 101 dirSiaPaths = append(dirSiaPaths, dirSiaPath) 102 numStuckChunks, _ = r.staticUploadHeap.managedNumStuckChunks() 103 } 104 return dirSiaPaths, nil 105 } 106 107 // managedAddStuckChunksToHeap tries to add as many stuck chunks from a siafile 108 // to the upload heap as possible 109 func (r *Renter) managedAddStuckChunksToHeap(siaPath skymodules.SiaPath, hosts map[string]struct{}, offline, goodForRenew map[string]bool) (err error) { 110 // Open File 111 sf, err := r.staticFileSystem.OpenSiaFile(siaPath) 112 if err != nil { 113 return fmt.Errorf("unable to open siafile %v, error: %v", siaPath, err) 114 } 115 defer func() { 116 err = errors.Compose(err, sf.Close()) 117 }() 118 119 // Check if there are still stuck chunks to repair 120 if sf.NumStuckChunks() == 0 { 121 return errNoStuckChunks 122 } 123 124 // Build unfinished stuck chunks 125 var allErrors error 126 unfinishedStuckChunks := r.managedBuildUnfinishedChunks(sf, hosts, targetStuckChunks, offline, goodForRenew, r.staticRepairMemoryManager) 127 defer func() { 128 // Close out remaining file entries 129 for _, chunk := range unfinishedStuckChunks { 130 allErrors = errors.Compose(allErrors, chunk.Close()) 131 } 132 }() 133 134 // Add up to maxStuckChunksInHeap stuck chunks to the upload heap 135 var chunk *unfinishedUploadChunk 136 stuckChunksAdded := 0 137 for len(unfinishedStuckChunks) > 0 && stuckChunksAdded < maxStuckChunksInHeap { 138 chunk = unfinishedStuckChunks[0] 139 unfinishedStuckChunks = unfinishedStuckChunks[1:] 140 chunk.stuckRepair = true 141 chunk.fileRecentlySuccessful = true 142 _, pushed, err := r.managedPushChunkForRepair(chunk, chunkTypeLocalChunk) 143 if err != nil { 144 return errors.Compose(allErrors, err, chunk.Close()) 145 } 146 if !pushed { 147 // Stuck chunk unable to be added. Close the file entry of that 148 // chunk 149 allErrors = errors.Compose(allErrors, chunk.Close()) 150 continue 151 } 152 stuckChunksAdded++ 153 } 154 if stuckChunksAdded > 0 { 155 r.staticRepairLog.Printf("Added %v stuck chunks from %s to the repair heap", stuckChunksAdded, siaPath.String()) 156 } 157 158 // check if there are more stuck chunks in the file 159 if len(unfinishedStuckChunks) > 0 { 160 r.staticStuckStack.managedPush(siaPath) 161 } 162 return allErrors 163 } 164 165 // managedStuckDirectory randomly finds a directory that contains stuck chunks 166 func (r *Renter) managedStuckDirectory() (skymodules.SiaPath, error) { 167 // Iterating of the renter directory until randomly ending up in a 168 // directory, break and return that directory 169 siaPath := skymodules.RootSiaPath() 170 for { 171 select { 172 // Check to make sure renter hasn't been shutdown 173 case <-r.tg.StopChan(): 174 return skymodules.SiaPath{}, nil 175 default: 176 } 177 178 directories, err := r.managedDirList(siaPath) 179 if err != nil { 180 return skymodules.SiaPath{}, err 181 } 182 // Sanity check that there is at least the current directory 183 if len(directories) == 0 { 184 build.Critical("No directories returned from DirList", siaPath.String()) 185 } 186 187 // Check if we are in an empty Directory. This will be the case before 188 // any files have been uploaded so the root directory is empty. Also it 189 // could happen if the only file in a directory was stuck and was very 190 // recently deleted so the health of the directory has not yet been 191 // updated. 192 emptyDir := len(directories) == 1 && directories[0].NumFiles == 0 193 if emptyDir { 194 return siaPath, errNoStuckFiles 195 } 196 // Check if there are stuck chunks in this directory 197 if directories[0].AggregateNumStuckChunks == 0 { 198 // Log error if we are not at the root directory 199 if !siaPath.IsRoot() { 200 r.staticLog.Println("WARN: ended up in directory with no stuck chunks that is not root directory:", siaPath) 201 } 202 return siaPath, errNoStuckFiles 203 } 204 // Check if we have reached a directory with only files 205 if len(directories) == 1 { 206 return siaPath, nil 207 } 208 209 // Get random int 210 rand := fastrand.Intn(int(directories[0].AggregateNumStuckChunks)) 211 // Use rand to decide which directory to go into. Work backwards over 212 // the slice of directories. Since the first element is the current 213 // directory that means that it is the sum of all the files and 214 // directories. We can chose a directory by subtracting the number of 215 // stuck chunks a directory has from rand and if rand gets to 0 or less 216 // we choose that directory 217 for i := len(directories) - 1; i >= 0; i-- { 218 // If we are on the last iteration and the directory does have files 219 // then return the current directory 220 if i == 0 { 221 siaPath = directories[0].SiaPath 222 return siaPath, nil 223 } 224 225 // Skip directories with no stuck chunks 226 if directories[i].AggregateNumStuckChunks == uint64(0) { 227 continue 228 } 229 230 rand = rand - int(directories[i].AggregateNumStuckChunks) 231 siaPath = directories[i].SiaPath 232 // If rand is less than 0 break out of the loop and continue into 233 // that directory 234 if rand < 0 { 235 break 236 } 237 } 238 } 239 } 240 241 // managedStuckFile finds a weighted random stuck file from a directory based on 242 // the number of stuck chunks in the stuck files of the directory 243 func (r *Renter) managedStuckFile(dirSiaPath skymodules.SiaPath) (siapath skymodules.SiaPath, err error) { 244 // Grab Aggregate number of stuck chunks from the directory 245 // 246 // NOTE: using the aggregate number of stuck chunks assumes that the 247 // directory and the files within the directory are in sync. This is ok to 248 // do as the risks associated with being out of sync are low. 249 siaDir, err := r.staticFileSystem.OpenSiaDir(dirSiaPath) 250 if err != nil { 251 return skymodules.SiaPath{}, errors.AddContext(err, "unable to open siaDir "+dirSiaPath.String()) 252 } 253 defer func() { 254 err = errors.Compose(err, siaDir.Close()) 255 }() 256 metadata, err := siaDir.Metadata() 257 if err != nil { 258 return skymodules.SiaPath{}, err 259 } 260 aggregateNumStuckChunks := metadata.AggregateNumStuckChunks 261 numStuckChunks := metadata.NumStuckChunks 262 numFiles := metadata.NumFiles 263 if aggregateNumStuckChunks == 0 || numStuckChunks == 0 || numFiles == 0 { 264 // If the number of stuck chunks or number of files is zero then this 265 // directory should not have been used to find a stuck file. Queue an 266 // update on the directories metadata to prevent this from happening 267 // again. 268 r.staticDirUpdateBatcher.callQueueDirUpdate(dirSiaPath) 269 err = fmt.Errorf("managedStuckFile should not have been called on %v, AggregateNumStuckChunks: %v, NumStuckChunks: %v, NumFiles: %v", dirSiaPath.String(), aggregateNumStuckChunks, numStuckChunks, numFiles) 270 return skymodules.SiaPath{}, err 271 } 272 273 // Use rand to decide which file to select. We can chose a file by 274 // subtracting the number of stuck chunks a file has from rand and if rand 275 // gets to 0 or less we choose that file 276 rand := fastrand.Intn(int(aggregateNumStuckChunks)) 277 278 // Read the directory, using ReadDir so we don't read all the siafiles 279 // unless we need to 280 fileinfos, err := r.staticFileSystem.ReadDir(dirSiaPath) 281 if err != nil { 282 return skymodules.SiaPath{}, errors.AddContext(err, "unable to open siadir: "+dirSiaPath.String()) 283 } 284 // Iterate over the fileinfos 285 for _, fi := range fileinfos { 286 // Check for SiaFile 287 if fi.IsDir() || filepath.Ext(fi.Name()) != skymodules.SiaFileExtension { 288 continue 289 } 290 291 // Get SiaPath 292 sp, err := dirSiaPath.Join(strings.TrimSuffix(fi.Name(), skymodules.SiaFileExtension)) 293 if err != nil { 294 return skymodules.SiaPath{}, errors.AddContext(err, "unable to join the siapath with the file: "+fi.Name()) 295 } 296 297 // Open SiaFile, grab the number of stuck chunks and close the file 298 f, err := r.staticFileSystem.OpenSiaFile(sp) 299 if err != nil { 300 return skymodules.SiaPath{}, errors.AddContext(err, "could not open siafileset for "+sp.String()) 301 } 302 numStuckChunks := int(f.NumStuckChunks()) 303 if err := f.Close(); err != nil { 304 return skymodules.SiaPath{}, errors.AddContext(err, "failed to close filenode "+sp.String()) 305 } 306 307 // Check if stuck 308 if numStuckChunks == 0 { 309 continue 310 } 311 312 // Decrement rand and check if we have decremented fully 313 rand = rand - numStuckChunks 314 siapath = sp 315 if rand < 0 { 316 break 317 } 318 } 319 if siapath.IsEmpty() { 320 // If no files were selected from the directory than there is a mismatch 321 // between the file metadata and the directory metadata. Queue an update 322 // on the directory's metadata so this doesn't happen again. 323 r.staticDirUpdateBatcher.callQueueDirUpdate(dirSiaPath) 324 r.staticDirUpdateBatcher.callFlush() // wait to avoid spinning 325 326 return skymodules.SiaPath{}, errors.New("no files selected from directory " + dirSiaPath.String()) 327 } 328 return siapath, nil 329 } 330 331 // managedSubDirectories reads a directory and returns a slice of all the sub 332 // directory SiaPaths 333 func (r *Renter) managedSubDirectories(siaPath skymodules.SiaPath) ([]skymodules.SiaPath, error) { 334 // Read directory 335 fileinfos, err := r.staticFileSystem.ReadDir(siaPath) 336 if err != nil { 337 return nil, err 338 } 339 // Find all sub directory SiaPaths 340 folders := make([]skymodules.SiaPath, 0, len(fileinfos)) 341 for _, fi := range fileinfos { 342 if fi.IsDir() { 343 subDir, err := siaPath.Join(fi.Name()) 344 if err != nil { 345 return nil, err 346 } 347 folders = append(folders, subDir) 348 } 349 } 350 return folders, nil 351 } 352 353 // threadedStuckFileLoop works through the renter directory and finds the stuck 354 // chunks and tries to repair them 355 func (r *Renter) threadedStuckFileLoop() { 356 err := r.tg.Add() 357 if err != nil { 358 return 359 } 360 defer r.tg.Done() 361 362 // Loop until the renter has shutdown or until there are no stuck chunks 363 for { 364 // Return if the renter has shut down. 365 select { 366 case <-r.tg.StopChan(): 367 return 368 default: 369 } 370 371 // Wait until the renter is online to proceed. 372 if !r.managedBlockUntilOnline() { 373 // The renter shut down before the internet connection was restored. 374 r.staticLog.Println("renter shutdown before internet connection") 375 return 376 } 377 378 // As we add stuck chunks to the upload heap we want to remember the 379 // directories they came from so we can call bubble to update the 380 // filesystem 381 var dirSiaPaths []skymodules.SiaPath 382 383 // Refresh the hosts and workers before adding stuck chunks to the 384 // upload heap 385 hosts := r.managedRefreshHostsAndWorkers() 386 387 // Try and add stuck chunks from the stuck stack. We try and add these 388 // first as they will be from files that previously had a successful 389 // stuck chunk repair. The previous success gives us more confidence 390 // that it is more likely additional stuck chunks from these files will 391 // be successful compared to a random stuck chunk from the renter's 392 // directory. 393 stuckStackDirSiaPaths, err := r.managedAddStuckChunksFromStuckStack(hosts) 394 if err != nil { 395 r.staticRepairLog.Println("WARN: error adding stuck chunks to repair heap from files with previously successful stuck repair jobs:", err) 396 } 397 dirSiaPaths = append(dirSiaPaths, stuckStackDirSiaPaths...) 398 399 // Try add random stuck chunks to upload heap 400 randomDirSiaPaths, err := r.managedAddRandomStuckChunks(hosts) 401 if err != nil { 402 r.staticRepairLog.Println("WARN: error adding random stuck chunks to upload heap:", err) 403 } 404 dirSiaPaths = append(dirSiaPaths, randomDirSiaPaths...) 405 406 // Check if any stuck chunks were added to the upload heap 407 numStuckChunks, _ := r.staticUploadHeap.managedNumStuckChunks() 408 if numStuckChunks == 0 { 409 // Block until new work is required. 410 select { 411 case <-r.tg.StopChan(): 412 // The renter has shut down. 413 return 414 case <-r.staticUploadHeap.stuckChunkFound: 415 // Health Loop found stuck chunk 416 case <-r.staticUploadHeap.stuckChunkSuccess: 417 // Stuck chunk was successfully repaired. 418 } 419 continue 420 } 421 422 // Signal that a repair is needed because stuck chunks were added to the 423 // upload heap 424 select { 425 case r.staticUploadHeap.repairNeeded <- struct{}{}: 426 default: 427 } 428 429 // Sleep until it is time to try and repair another stuck chunk 430 rebuildStuckHeapSignal := time.After(repairStuckChunkInterval) 431 select { 432 case <-r.tg.StopChan(): 433 // Return if the return has been shutdown 434 return 435 case <-rebuildStuckHeapSignal: 436 // Time to find another random chunk 437 case <-r.staticUploadHeap.stuckChunkSuccess: 438 // Stuck chunk was successfully repaired. 439 } 440 441 // Queue an update to all of the dirs that were visited and then block 442 // until all of the updates have completed and have their stats 443 // represented in the root aggregate metadata. 444 for _, dirSiaPath := range dirSiaPaths { 445 r.staticDirUpdateBatcher.callQueueDirUpdate(dirSiaPath) 446 } 447 r.staticDirUpdateBatcher.callFlush() 448 } 449 }