gitlab.com/SiaPrime/SiaPrime@v1.4.1/modules/renter/repair.go (about) 1 package renter 2 3 import ( 4 "fmt" 5 "io/ioutil" 6 "time" 7 8 "gitlab.com/NebulousLabs/errors" 9 "gitlab.com/NebulousLabs/fastrand" 10 11 "gitlab.com/SiaPrime/SiaPrime/build" 12 "gitlab.com/SiaPrime/SiaPrime/modules" 13 ) 14 15 var ( 16 // errNoStuckFiles is a helper to indicate that there are no stuck files in 17 // the renter's directory 18 errNoStuckFiles = errors.New("no stuck files") 19 20 // errNoStuckChunks is a helper to indicate that there are no stuck chunks 21 // in a siafile 22 errNoStuckChunks = errors.New("no stuck chunks") 23 ) 24 25 // managedAddRandomStuckChunks will try and add up to maxStuckChunksInHeap 26 // random stuck chunks to the upload heap 27 func (r *Renter) managedAddRandomStuckChunks(hosts map[string]struct{}) ([]modules.SiaPath, error) { 28 var dirSiaPaths []modules.SiaPath 29 prevNumStuckChunks := r.uploadHeap.managedNumStuckChunks() 30 for r.uploadHeap.managedNumStuckChunks() < maxStuckChunksInHeap { 31 // Randomly get directory with stuck files 32 dirSiaPath, err := r.managedStuckDirectory() 33 if err != nil { 34 return dirSiaPaths, errors.AddContext(err, "unable to get random stuck directory") 35 } 36 // Remember the directory so bubble can be called on it at the end 37 // of the iteration 38 dirSiaPaths = append(dirSiaPaths, dirSiaPath) 39 40 // Add stuck chunks to upload heap and signal repair needed 41 r.managedBuildChunkHeap(dirSiaPath, hosts, targetStuckChunks) 42 43 // Sanity check that stuck chunks were added 44 currentNumStuckChunks := r.uploadHeap.managedNumStuckChunks() 45 if currentNumStuckChunks <= prevNumStuckChunks { 46 // If the number of stuck chunks in the heap is not increasing 47 // then break out of this loop in order to prevent getting stuck 48 // in an infinite loop 49 break 50 } 51 r.log.Debugf("Added %v stuck chunks from directory `%s`", currentNumStuckChunks-prevNumStuckChunks, dirSiaPath.String()) 52 prevNumStuckChunks = currentNumStuckChunks 53 } 54 return dirSiaPaths, nil 55 } 56 57 // managedAddStuckChunksFromStuckStack will try and add up to 58 // maxStuckChunksInHeap stuck chunks to the upload heap from the files in the 59 // stuck stack. 60 func (r *Renter) managedAddStuckChunksFromStuckStack(hosts map[string]struct{}) ([]modules.SiaPath, error) { 61 var dirSiaPaths []modules.SiaPath 62 offline, goodForRenew, _ := r.managedContractUtilityMaps() 63 for r.stuckStack.managedLen() > 0 && r.uploadHeap.managedNumStuckChunks() < maxStuckChunksInHeap { 64 // Pop the first file SiaPath 65 siaPath := r.stuckStack.managedPop() 66 67 // Add stuck chunks to uploadHeap 68 err := r.managedAddStuckChunksToHeap(siaPath, hosts, offline, goodForRenew) 69 if err != nil && err != errNoStuckChunks { 70 return dirSiaPaths, errors.AddContext(err, "unable to add stuck chunks to heap") 71 } 72 73 // Since we either added stuck chunks to the heap from this file, 74 // there are no stuck chunks left in the file, or all the stuck 75 // chunks for the file are already being worked on, remember the 76 // directory so we can call bubble on it at the end of this 77 // iteration of the stuck loop to update the filesystem 78 dirSiaPath, err := siaPath.Dir() 79 if err != nil { 80 return dirSiaPaths, errors.AddContext(err, "unable to get directory siapath") 81 } 82 dirSiaPaths = append(dirSiaPaths, dirSiaPath) 83 } 84 return dirSiaPaths, nil 85 } 86 87 // managedAddStuckChunksToHeap tries to add as many stuck chunks from a siafile 88 // to the upload heap as possible 89 func (r *Renter) managedAddStuckChunksToHeap(siaPath modules.SiaPath, hosts map[string]struct{}, offline, goodForRenew map[string]bool) error { 90 // Open File 91 sf, err := r.staticFileSet.Open(siaPath) 92 if err != nil { 93 return fmt.Errorf("unable to open siafile %v, error: %v", siaPath, err) 94 } 95 defer sf.Close() 96 97 // Check if there are still stuck chunks to repair 98 if sf.NumStuckChunks() == 0 { 99 return errNoStuckChunks 100 } 101 102 // Build unfinished stuck chunks 103 var allErrors error 104 unfinishedStuckChunks := r.managedBuildUnfinishedChunks(sf, hosts, targetStuckChunks, offline, goodForRenew) 105 defer func() { 106 // Close out remaining file entries 107 for _, chunk := range unfinishedStuckChunks { 108 if err = chunk.fileEntry.Close(); err != nil { 109 // If there is an error log it and append to the other errors so 110 // that we close as many files as possible 111 r.log.Println("WARN: unable to close file:", err) 112 allErrors = errors.Compose(allErrors, err) 113 } 114 } 115 }() 116 117 // Add up to maxStuckChunksInHeap stuck chunks to the upload heap 118 var chunk *unfinishedUploadChunk 119 stuckChunksAdded := 0 120 for len(unfinishedStuckChunks) > 0 && stuckChunksAdded < maxStuckChunksInHeap { 121 chunk = unfinishedStuckChunks[0] 122 unfinishedStuckChunks = unfinishedStuckChunks[1:] 123 chunk.stuckRepair = true 124 if !r.uploadHeap.managedPush(chunk) { 125 // Stuck chunk unable to be added. Close the file entry of that 126 // chunk 127 if err = chunk.fileEntry.Close(); err != nil { 128 // If there is an error log it and append to the other errors so 129 // that we close as many files as possible 130 r.log.Println("WARN: unable to close file:", err) 131 allErrors = errors.Compose(allErrors, err) 132 } 133 continue 134 } 135 stuckChunksAdded++ 136 } 137 138 // check if there are more stuck chunks in the file 139 if len(unfinishedStuckChunks) > 0 { 140 r.stuckStack.managedPush(siaPath) 141 } 142 return allErrors 143 } 144 145 // managedOldestHealthCheckTime finds the lowest level directory with the oldest 146 // LastHealthCheckTime 147 func (r *Renter) managedOldestHealthCheckTime() (modules.SiaPath, time.Time, error) { 148 // Check the siadir metadata for the root files directory 149 siaPath := modules.RootSiaPath() 150 metadata, err := r.managedDirectoryMetadata(siaPath) 151 if err != nil { 152 return modules.SiaPath{}, time.Time{}, err 153 } 154 155 // Follow the path of oldest LastHealthCheckTime to the lowest level 156 // directory 157 for metadata.NumSubDirs > 0 { 158 // Check to make sure renter hasn't been shutdown 159 select { 160 case <-r.tg.StopChan(): 161 return modules.SiaPath{}, time.Time{}, errors.New("Renter shutdown before oldestHealthCheckTime could be found") 162 default: 163 } 164 165 // Check for sub directories 166 subDirSiaPaths, err := r.managedSubDirectories(siaPath) 167 if err != nil { 168 return modules.SiaPath{}, time.Time{}, err 169 } 170 171 // Find the oldest LastHealthCheckTime of the sub directories 172 updated := false 173 for _, subDirPath := range subDirSiaPaths { 174 // Check to make sure renter hasn't been shutdown 175 select { 176 case <-r.tg.StopChan(): 177 return modules.SiaPath{}, time.Time{}, errors.New("Renter shutdown before oldestHealthCheckTime could be found") 178 default: 179 } 180 181 // Check lastHealthCheckTime of sub directory 182 subMetadata, err := r.managedDirectoryMetadata(subDirPath) 183 if err != nil { 184 return modules.SiaPath{}, time.Time{}, err 185 } 186 187 // If the LastHealthCheckTime is after current LastHealthCheckTime 188 // continue since we are already in a directory with an older 189 // timestamp 190 if subMetadata.AggregateLastHealthCheckTime.After(metadata.AggregateLastHealthCheckTime) { 191 continue 192 } 193 194 // Update LastHealthCheckTime and follow older path 195 updated = true 196 metadata = subMetadata 197 siaPath = subDirPath 198 } 199 200 // If the values were never updated with any of the sub directory values 201 // then return as we are in the directory we are looking for 202 if !updated { 203 return siaPath, metadata.AggregateLastHealthCheckTime, nil 204 } 205 } 206 207 return siaPath, metadata.AggregateLastHealthCheckTime, nil 208 } 209 210 // managedStuckDirectory randomly finds a directory that contains stuck chunks 211 func (r *Renter) managedStuckDirectory() (modules.SiaPath, error) { 212 // Iterating of the renter directory until randomly ending up in a 213 // directory, break and return that directory 214 siaPath := modules.RootSiaPath() 215 for { 216 select { 217 // Check to make sure renter hasn't been shutdown 218 case <-r.tg.StopChan(): 219 return modules.SiaPath{}, nil 220 default: 221 } 222 223 directories, err := r.DirList(siaPath) 224 if err != nil { 225 return modules.SiaPath{}, err 226 } 227 files, err := r.FileList(siaPath, false, false) 228 if err != nil { 229 return modules.SiaPath{}, err 230 } 231 // Sanity check that there is at least the current directory 232 if len(directories) == 0 { 233 build.Critical("No directories returned from DirList") 234 } 235 // Check if we are in an empty Directory. This will be the case before 236 // any files have been uploaded so the root directory is empty. Also it 237 // could happen if the only file in a directory was stuck and was very 238 // recently deleted so the health of the directory has not yet been 239 // updated. 240 emptyDir := len(directories) == 1 && len(files) == 0 241 if emptyDir { 242 return siaPath, errNoStuckFiles 243 } 244 // Check if there are stuck chunks in this directory 245 if directories[0].AggregateNumStuckChunks == 0 { 246 // Log error if we are not at the root directory 247 if !siaPath.IsRoot() { 248 r.log.Debugln("WARN: ended up in directory with no stuck chunks that is not root directory:", siaPath) 249 } 250 return siaPath, errNoStuckFiles 251 } 252 // Check if we have reached a directory with only files 253 if len(directories) == 1 { 254 return siaPath, nil 255 } 256 257 // Get random int 258 rand := fastrand.Intn(int(directories[0].AggregateNumStuckChunks)) 259 260 // Use rand to decide which directory to go into. Work backwards over 261 // the slice of directories. Since the first element is the current 262 // directory that means that it is the sum of all the files and 263 // directories. We can chose a directory by subtracting the number of 264 // stuck chunks a directory has from rand and if rand gets to 0 or less 265 // we choose that directory 266 for i := len(directories) - 1; i >= 0; i-- { 267 // If we make it to the last iteration double check that the current 268 // directory has files 269 if i == 0 && len(files) == 0 { 270 break 271 } 272 273 // If we are on the last iteration and the directory does have files 274 // then return the current directory 275 if i == 0 { 276 siaPath = directories[0].SiaPath 277 return siaPath, nil 278 } 279 280 // Skip directories with no stuck chunks 281 if directories[i].AggregateNumStuckChunks == uint64(0) { 282 continue 283 } 284 285 rand = rand - int(directories[i].AggregateNumStuckChunks) 286 siaPath = directories[i].SiaPath 287 // If rand is less than 0 break out of the loop and continue into 288 // that directory 289 if rand <= 0 { 290 break 291 } 292 } 293 } 294 } 295 296 // managedSubDirectories reads a directory and returns a slice of all the sub 297 // directory SiaPaths 298 func (r *Renter) managedSubDirectories(siaPath modules.SiaPath) ([]modules.SiaPath, error) { 299 // Read directory 300 fileinfos, err := ioutil.ReadDir(siaPath.SiaDirSysPath(r.staticFilesDir)) 301 if err != nil { 302 return nil, err 303 } 304 // Find all sub directory SiaPaths 305 folders := make([]modules.SiaPath, 0, len(fileinfos)) 306 for _, fi := range fileinfos { 307 if fi.IsDir() { 308 subDir, err := siaPath.Join(fi.Name()) 309 if err != nil { 310 return nil, err 311 } 312 folders = append(folders, subDir) 313 } 314 } 315 return folders, nil 316 } 317 318 // threadedStuckFileLoop works through the renter directory and finds the stuck 319 // chunks and tries to repair them 320 func (r *Renter) threadedStuckFileLoop() { 321 err := r.tg.Add() 322 if err != nil { 323 return 324 } 325 defer r.tg.Done() 326 327 // Loop until the renter has shutdown or until there are no stuck chunks 328 for { 329 // Return if the renter has shut down. 330 select { 331 case <-r.tg.StopChan(): 332 return 333 default: 334 } 335 336 // Wait until the renter is online to proceed. 337 if !r.managedBlockUntilOnline() { 338 // The renter shut down before the internet connection was restored. 339 r.log.Debugln("renter shutdown before internet connection") 340 return 341 } 342 343 // As we add stuck chunks to the upload heap we want to remember the 344 // directories they came from so we can call bubble to update the 345 // filesystem 346 var dirSiaPaths []modules.SiaPath 347 348 // Refresh the hosts and workers before adding stuck chunks to the 349 // upload heap 350 hosts := r.managedRefreshHostsAndWorkers() 351 352 // Try and add stuck chunks from the stuck stack. We try and add these 353 // first as they will be from files that previously had a successful 354 // stuck chunk repair. The previous success gives us more confidence 355 // that it is more likely additional stuck chunks from these files will 356 // be successful compared to a random stuck chunk from the renter's 357 // directory. 358 stuckStackDirSiaPaths, err := r.managedAddStuckChunksFromStuckStack(hosts) 359 if err != nil { 360 r.log.Println("WARN: error adding stuck chunks to upload heap from stuck stack:", err) 361 } 362 dirSiaPaths = append(dirSiaPaths, stuckStackDirSiaPaths...) 363 364 // Try add random stuck chunks to upload heap 365 randomDirSiaPaths, err := r.managedAddRandomStuckChunks(hosts) 366 if err != nil { 367 r.log.Println("WARN: error adding random stuck chunks to upload heap:", err) 368 } 369 dirSiaPaths = append(dirSiaPaths, randomDirSiaPaths...) 370 371 // Check if any stuck chunks were added to the upload heap 372 numStuckChunks := r.uploadHeap.managedNumStuckChunks() 373 if numStuckChunks == 0 { 374 // Block until new work is required. 375 select { 376 case <-r.tg.StopChan(): 377 // The renter has shut down. 378 return 379 case <-r.uploadHeap.stuckChunkFound: 380 // Health Loop found stuck chunk 381 case <-r.uploadHeap.stuckChunkSuccess: 382 // Stuck chunk was successfully repaired. 383 } 384 continue 385 } 386 387 // Signal that a repair is needed because stuck chunks were added to the 388 // upload heap 389 select { 390 case r.uploadHeap.repairNeeded <- struct{}{}: 391 default: 392 } 393 r.log.Println(numStuckChunks, "stuck chunks added to the upload heap, repair signal sent") 394 395 // Sleep until it is time to try and repair another stuck chunk 396 rebuildStuckHeapSignal := time.After(repairStuckChunkInterval) 397 select { 398 case <-r.tg.StopChan(): 399 // Return if the return has been shutdown 400 return 401 case <-rebuildStuckHeapSignal: 402 // Time to find another random chunk 403 case <-r.uploadHeap.stuckChunkSuccess: 404 // Stuck chunk was successfully repaired. 405 } 406 407 // Call bubble before continuing on next iteration to ensure filesystem 408 // is updated. 409 for _, dirSiaPath := range dirSiaPaths { 410 err = r.managedBubbleMetadata(dirSiaPath) 411 if err != nil { 412 r.log.Println("Error calling managedBubbleMetadata on `", dirSiaPath.String(), "`:", err) 413 select { 414 case <-time.After(stuckLoopErrorSleepDuration): 415 case <-r.tg.StopChan(): 416 return 417 } 418 } 419 } 420 } 421 } 422 423 // threadedUpdateRenterHealth reads all the siafiles in the renter, calculates 424 // the health of each file and updates the folder metadata 425 func (r *Renter) threadedUpdateRenterHealth() { 426 err := r.tg.Add() 427 if err != nil { 428 return 429 } 430 defer r.tg.Done() 431 432 // Loop until the renter has shutdown or until the renter's top level files 433 // directory has a LasHealthCheckTime within the healthCheckInterval 434 for { 435 select { 436 // Check to make sure renter hasn't been shutdown 437 case <-r.tg.StopChan(): 438 return 439 default: 440 } 441 442 // Follow path of oldest time, return directory and timestamp 443 r.log.Debugln("Checking for oldest health check time") 444 siaPath, lastHealthCheckTime, err := r.managedOldestHealthCheckTime() 445 if err != nil { 446 // If there is an error getting the lastHealthCheckTime sleep for a 447 // little bit before continuing 448 r.log.Debug("WARN: Could not find oldest health check time:", err) 449 select { 450 case <-time.After(healthLoopErrorSleepDuration): 451 case <-r.tg.StopChan(): 452 return 453 } 454 continue 455 } 456 457 // Check if the time since the last check on the least recently checked 458 // folder is inside the health check interval. If so, the whole 459 // filesystem has been checked recently, and we can sleep until the 460 // least recent check is outside the check interval. 461 timeSinceLastCheck := time.Since(lastHealthCheckTime) 462 if timeSinceLastCheck < healthCheckInterval { 463 // Sleep until the least recent check is outside the check interval. 464 sleepDuration := healthCheckInterval - timeSinceLastCheck 465 r.log.Debugln("Health loop sleeping for", sleepDuration) 466 wakeSignal := time.After(sleepDuration) 467 select { 468 case <-r.tg.StopChan(): 469 return 470 case <-wakeSignal: 471 } 472 } 473 r.log.Debug("Health Loop calling bubble on '", siaPath.String(), "'") 474 err = r.managedBubbleMetadata(siaPath) 475 if err != nil { 476 r.log.Println("Error calling managedBubbleMetadata on `", siaPath.String(), "`:", err) 477 select { 478 case <-time.After(healthLoopErrorSleepDuration): 479 case <-r.tg.StopChan(): 480 return 481 } 482 } 483 } 484 }