gitlab.com/SiaPrime/SiaPrime@v1.4.1/modules/renter/repair.go (about)

     1  package renter
     2  
     3  import (
     4  	"fmt"
     5  	"io/ioutil"
     6  	"time"
     7  
     8  	"gitlab.com/NebulousLabs/errors"
     9  	"gitlab.com/NebulousLabs/fastrand"
    10  
    11  	"gitlab.com/SiaPrime/SiaPrime/build"
    12  	"gitlab.com/SiaPrime/SiaPrime/modules"
    13  )
    14  
    15  var (
    16  	// errNoStuckFiles is a helper to indicate that there are no stuck files in
    17  	// the renter's directory
    18  	errNoStuckFiles = errors.New("no stuck files")
    19  
    20  	// errNoStuckChunks is a helper to indicate that there are no stuck chunks
    21  	// in a siafile
    22  	errNoStuckChunks = errors.New("no stuck chunks")
    23  )
    24  
    25  // managedAddRandomStuckChunks will try and add up to maxStuckChunksInHeap
    26  // random stuck chunks to the upload heap
    27  func (r *Renter) managedAddRandomStuckChunks(hosts map[string]struct{}) ([]modules.SiaPath, error) {
    28  	var dirSiaPaths []modules.SiaPath
    29  	prevNumStuckChunks := r.uploadHeap.managedNumStuckChunks()
    30  	for r.uploadHeap.managedNumStuckChunks() < maxStuckChunksInHeap {
    31  		// Randomly get directory with stuck files
    32  		dirSiaPath, err := r.managedStuckDirectory()
    33  		if err != nil {
    34  			return dirSiaPaths, errors.AddContext(err, "unable to get random stuck directory")
    35  		}
    36  		// Remember the directory so bubble can be called on it at the end
    37  		// of the iteration
    38  		dirSiaPaths = append(dirSiaPaths, dirSiaPath)
    39  
    40  		// Add stuck chunks to upload heap and signal repair needed
    41  		r.managedBuildChunkHeap(dirSiaPath, hosts, targetStuckChunks)
    42  
    43  		// Sanity check that stuck chunks were added
    44  		currentNumStuckChunks := r.uploadHeap.managedNumStuckChunks()
    45  		if currentNumStuckChunks <= prevNumStuckChunks {
    46  			// If the number of stuck chunks in the heap is not increasing
    47  			// then break out of this loop in order to prevent getting stuck
    48  			// in an infinite loop
    49  			break
    50  		}
    51  		r.log.Debugf("Added %v stuck chunks from directory `%s`", currentNumStuckChunks-prevNumStuckChunks, dirSiaPath.String())
    52  		prevNumStuckChunks = currentNumStuckChunks
    53  	}
    54  	return dirSiaPaths, nil
    55  }
    56  
    57  // managedAddStuckChunksFromStuckStack will try and add up to
    58  // maxStuckChunksInHeap stuck chunks to the upload heap from the files in the
    59  // stuck stack.
    60  func (r *Renter) managedAddStuckChunksFromStuckStack(hosts map[string]struct{}) ([]modules.SiaPath, error) {
    61  	var dirSiaPaths []modules.SiaPath
    62  	offline, goodForRenew, _ := r.managedContractUtilityMaps()
    63  	for r.stuckStack.managedLen() > 0 && r.uploadHeap.managedNumStuckChunks() < maxStuckChunksInHeap {
    64  		// Pop the first file SiaPath
    65  		siaPath := r.stuckStack.managedPop()
    66  
    67  		// Add stuck chunks to uploadHeap
    68  		err := r.managedAddStuckChunksToHeap(siaPath, hosts, offline, goodForRenew)
    69  		if err != nil && err != errNoStuckChunks {
    70  			return dirSiaPaths, errors.AddContext(err, "unable to add stuck chunks to heap")
    71  		}
    72  
    73  		// Since we either added stuck chunks to the heap from this file,
    74  		// there are no stuck chunks left in the file, or all the stuck
    75  		// chunks for the file are already being worked on, remember the
    76  		// directory so we can call bubble on it at the end of this
    77  		// iteration of the stuck loop to update the filesystem
    78  		dirSiaPath, err := siaPath.Dir()
    79  		if err != nil {
    80  			return dirSiaPaths, errors.AddContext(err, "unable to get directory siapath")
    81  		}
    82  		dirSiaPaths = append(dirSiaPaths, dirSiaPath)
    83  	}
    84  	return dirSiaPaths, nil
    85  }
    86  
    87  // managedAddStuckChunksToHeap tries to add as many stuck chunks from a siafile
    88  // to the upload heap as possible
    89  func (r *Renter) managedAddStuckChunksToHeap(siaPath modules.SiaPath, hosts map[string]struct{}, offline, goodForRenew map[string]bool) error {
    90  	// Open File
    91  	sf, err := r.staticFileSet.Open(siaPath)
    92  	if err != nil {
    93  		return fmt.Errorf("unable to open siafile %v, error: %v", siaPath, err)
    94  	}
    95  	defer sf.Close()
    96  
    97  	// Check if there are still stuck chunks to repair
    98  	if sf.NumStuckChunks() == 0 {
    99  		return errNoStuckChunks
   100  	}
   101  
   102  	// Build unfinished stuck chunks
   103  	var allErrors error
   104  	unfinishedStuckChunks := r.managedBuildUnfinishedChunks(sf, hosts, targetStuckChunks, offline, goodForRenew)
   105  	defer func() {
   106  		// Close out remaining file entries
   107  		for _, chunk := range unfinishedStuckChunks {
   108  			if err = chunk.fileEntry.Close(); err != nil {
   109  				// If there is an error log it and append to the other errors so
   110  				// that we close as many files as possible
   111  				r.log.Println("WARN: unable to close file:", err)
   112  				allErrors = errors.Compose(allErrors, err)
   113  			}
   114  		}
   115  	}()
   116  
   117  	// Add up to maxStuckChunksInHeap stuck chunks to the upload heap
   118  	var chunk *unfinishedUploadChunk
   119  	stuckChunksAdded := 0
   120  	for len(unfinishedStuckChunks) > 0 && stuckChunksAdded < maxStuckChunksInHeap {
   121  		chunk = unfinishedStuckChunks[0]
   122  		unfinishedStuckChunks = unfinishedStuckChunks[1:]
   123  		chunk.stuckRepair = true
   124  		if !r.uploadHeap.managedPush(chunk) {
   125  			// Stuck chunk unable to be added. Close the file entry of that
   126  			// chunk
   127  			if err = chunk.fileEntry.Close(); err != nil {
   128  				// If there is an error log it and append to the other errors so
   129  				// that we close as many files as possible
   130  				r.log.Println("WARN: unable to close file:", err)
   131  				allErrors = errors.Compose(allErrors, err)
   132  			}
   133  			continue
   134  		}
   135  		stuckChunksAdded++
   136  	}
   137  
   138  	// check if there are more stuck chunks in the file
   139  	if len(unfinishedStuckChunks) > 0 {
   140  		r.stuckStack.managedPush(siaPath)
   141  	}
   142  	return allErrors
   143  }
   144  
   145  // managedOldestHealthCheckTime finds the lowest level directory with the oldest
   146  // LastHealthCheckTime
   147  func (r *Renter) managedOldestHealthCheckTime() (modules.SiaPath, time.Time, error) {
   148  	// Check the siadir metadata for the root files directory
   149  	siaPath := modules.RootSiaPath()
   150  	metadata, err := r.managedDirectoryMetadata(siaPath)
   151  	if err != nil {
   152  		return modules.SiaPath{}, time.Time{}, err
   153  	}
   154  
   155  	// Follow the path of oldest LastHealthCheckTime to the lowest level
   156  	// directory
   157  	for metadata.NumSubDirs > 0 {
   158  		// Check to make sure renter hasn't been shutdown
   159  		select {
   160  		case <-r.tg.StopChan():
   161  			return modules.SiaPath{}, time.Time{}, errors.New("Renter shutdown before oldestHealthCheckTime could be found")
   162  		default:
   163  		}
   164  
   165  		// Check for sub directories
   166  		subDirSiaPaths, err := r.managedSubDirectories(siaPath)
   167  		if err != nil {
   168  			return modules.SiaPath{}, time.Time{}, err
   169  		}
   170  
   171  		// Find the oldest LastHealthCheckTime of the sub directories
   172  		updated := false
   173  		for _, subDirPath := range subDirSiaPaths {
   174  			// Check to make sure renter hasn't been shutdown
   175  			select {
   176  			case <-r.tg.StopChan():
   177  				return modules.SiaPath{}, time.Time{}, errors.New("Renter shutdown before oldestHealthCheckTime could be found")
   178  			default:
   179  			}
   180  
   181  			// Check lastHealthCheckTime of sub directory
   182  			subMetadata, err := r.managedDirectoryMetadata(subDirPath)
   183  			if err != nil {
   184  				return modules.SiaPath{}, time.Time{}, err
   185  			}
   186  
   187  			// If the LastHealthCheckTime is after current LastHealthCheckTime
   188  			// continue since we are already in a directory with an older
   189  			// timestamp
   190  			if subMetadata.AggregateLastHealthCheckTime.After(metadata.AggregateLastHealthCheckTime) {
   191  				continue
   192  			}
   193  
   194  			// Update LastHealthCheckTime and follow older path
   195  			updated = true
   196  			metadata = subMetadata
   197  			siaPath = subDirPath
   198  		}
   199  
   200  		// If the values were never updated with any of the sub directory values
   201  		// then return as we are in the directory we are looking for
   202  		if !updated {
   203  			return siaPath, metadata.AggregateLastHealthCheckTime, nil
   204  		}
   205  	}
   206  
   207  	return siaPath, metadata.AggregateLastHealthCheckTime, nil
   208  }
   209  
   210  // managedStuckDirectory randomly finds a directory that contains stuck chunks
   211  func (r *Renter) managedStuckDirectory() (modules.SiaPath, error) {
   212  	// Iterating of the renter directory until randomly ending up in a
   213  	// directory, break and return that directory
   214  	siaPath := modules.RootSiaPath()
   215  	for {
   216  		select {
   217  		// Check to make sure renter hasn't been shutdown
   218  		case <-r.tg.StopChan():
   219  			return modules.SiaPath{}, nil
   220  		default:
   221  		}
   222  
   223  		directories, err := r.DirList(siaPath)
   224  		if err != nil {
   225  			return modules.SiaPath{}, err
   226  		}
   227  		files, err := r.FileList(siaPath, false, false)
   228  		if err != nil {
   229  			return modules.SiaPath{}, err
   230  		}
   231  		// Sanity check that there is at least the current directory
   232  		if len(directories) == 0 {
   233  			build.Critical("No directories returned from DirList")
   234  		}
   235  		// Check if we are in an empty Directory. This will be the case before
   236  		// any files have been uploaded so the root directory is empty. Also it
   237  		// could happen if the only file in a directory was stuck and was very
   238  		// recently deleted so the health of the directory has not yet been
   239  		// updated.
   240  		emptyDir := len(directories) == 1 && len(files) == 0
   241  		if emptyDir {
   242  			return siaPath, errNoStuckFiles
   243  		}
   244  		// Check if there are stuck chunks in this directory
   245  		if directories[0].AggregateNumStuckChunks == 0 {
   246  			// Log error if we are not at the root directory
   247  			if !siaPath.IsRoot() {
   248  				r.log.Debugln("WARN: ended up in directory with no stuck chunks that is not root directory:", siaPath)
   249  			}
   250  			return siaPath, errNoStuckFiles
   251  		}
   252  		// Check if we have reached a directory with only files
   253  		if len(directories) == 1 {
   254  			return siaPath, nil
   255  		}
   256  
   257  		// Get random int
   258  		rand := fastrand.Intn(int(directories[0].AggregateNumStuckChunks))
   259  
   260  		// Use rand to decide which directory to go into. Work backwards over
   261  		// the slice of directories. Since the first element is the current
   262  		// directory that means that it is the sum of all the files and
   263  		// directories.  We can chose a directory by subtracting the number of
   264  		// stuck chunks a directory has from rand and if rand gets to 0 or less
   265  		// we choose that directory
   266  		for i := len(directories) - 1; i >= 0; i-- {
   267  			// If we make it to the last iteration double check that the current
   268  			// directory has files
   269  			if i == 0 && len(files) == 0 {
   270  				break
   271  			}
   272  
   273  			// If we are on the last iteration and the directory does have files
   274  			// then return the current directory
   275  			if i == 0 {
   276  				siaPath = directories[0].SiaPath
   277  				return siaPath, nil
   278  			}
   279  
   280  			// Skip directories with no stuck chunks
   281  			if directories[i].AggregateNumStuckChunks == uint64(0) {
   282  				continue
   283  			}
   284  
   285  			rand = rand - int(directories[i].AggregateNumStuckChunks)
   286  			siaPath = directories[i].SiaPath
   287  			// If rand is less than 0 break out of the loop and continue into
   288  			// that directory
   289  			if rand <= 0 {
   290  				break
   291  			}
   292  		}
   293  	}
   294  }
   295  
   296  // managedSubDirectories reads a directory and returns a slice of all the sub
   297  // directory SiaPaths
   298  func (r *Renter) managedSubDirectories(siaPath modules.SiaPath) ([]modules.SiaPath, error) {
   299  	// Read directory
   300  	fileinfos, err := ioutil.ReadDir(siaPath.SiaDirSysPath(r.staticFilesDir))
   301  	if err != nil {
   302  		return nil, err
   303  	}
   304  	// Find all sub directory SiaPaths
   305  	folders := make([]modules.SiaPath, 0, len(fileinfos))
   306  	for _, fi := range fileinfos {
   307  		if fi.IsDir() {
   308  			subDir, err := siaPath.Join(fi.Name())
   309  			if err != nil {
   310  				return nil, err
   311  			}
   312  			folders = append(folders, subDir)
   313  		}
   314  	}
   315  	return folders, nil
   316  }
   317  
   318  // threadedStuckFileLoop works through the renter directory and finds the stuck
   319  // chunks and tries to repair them
   320  func (r *Renter) threadedStuckFileLoop() {
   321  	err := r.tg.Add()
   322  	if err != nil {
   323  		return
   324  	}
   325  	defer r.tg.Done()
   326  
   327  	// Loop until the renter has shutdown or until there are no stuck chunks
   328  	for {
   329  		// Return if the renter has shut down.
   330  		select {
   331  		case <-r.tg.StopChan():
   332  			return
   333  		default:
   334  		}
   335  
   336  		// Wait until the renter is online to proceed.
   337  		if !r.managedBlockUntilOnline() {
   338  			// The renter shut down before the internet connection was restored.
   339  			r.log.Debugln("renter shutdown before internet connection")
   340  			return
   341  		}
   342  
   343  		// As we add stuck chunks to the upload heap we want to remember the
   344  		// directories they came from so we can call bubble to update the
   345  		// filesystem
   346  		var dirSiaPaths []modules.SiaPath
   347  
   348  		// Refresh the hosts and workers before adding stuck chunks to the
   349  		// upload heap
   350  		hosts := r.managedRefreshHostsAndWorkers()
   351  
   352  		// Try and add stuck chunks from the stuck stack. We try and add these
   353  		// first as they will be from files that previously had a successful
   354  		// stuck chunk repair. The previous success gives us more confidence
   355  		// that it is more likely additional stuck chunks from these files will
   356  		// be successful compared to a random stuck chunk from the renter's
   357  		// directory.
   358  		stuckStackDirSiaPaths, err := r.managedAddStuckChunksFromStuckStack(hosts)
   359  		if err != nil {
   360  			r.log.Println("WARN: error adding stuck chunks to upload heap from stuck stack:", err)
   361  		}
   362  		dirSiaPaths = append(dirSiaPaths, stuckStackDirSiaPaths...)
   363  
   364  		// Try add random stuck chunks to upload heap
   365  		randomDirSiaPaths, err := r.managedAddRandomStuckChunks(hosts)
   366  		if err != nil {
   367  			r.log.Println("WARN: error adding random stuck chunks to upload heap:", err)
   368  		}
   369  		dirSiaPaths = append(dirSiaPaths, randomDirSiaPaths...)
   370  
   371  		// Check if any stuck chunks were added to the upload heap
   372  		numStuckChunks := r.uploadHeap.managedNumStuckChunks()
   373  		if numStuckChunks == 0 {
   374  			// Block until new work is required.
   375  			select {
   376  			case <-r.tg.StopChan():
   377  				// The renter has shut down.
   378  				return
   379  			case <-r.uploadHeap.stuckChunkFound:
   380  				// Health Loop found stuck chunk
   381  			case <-r.uploadHeap.stuckChunkSuccess:
   382  				// Stuck chunk was successfully repaired.
   383  			}
   384  			continue
   385  		}
   386  
   387  		// Signal that a repair is needed because stuck chunks were added to the
   388  		// upload heap
   389  		select {
   390  		case r.uploadHeap.repairNeeded <- struct{}{}:
   391  		default:
   392  		}
   393  		r.log.Println(numStuckChunks, "stuck chunks added to the upload heap, repair signal sent")
   394  
   395  		// Sleep until it is time to try and repair another stuck chunk
   396  		rebuildStuckHeapSignal := time.After(repairStuckChunkInterval)
   397  		select {
   398  		case <-r.tg.StopChan():
   399  			// Return if the return has been shutdown
   400  			return
   401  		case <-rebuildStuckHeapSignal:
   402  			// Time to find another random chunk
   403  		case <-r.uploadHeap.stuckChunkSuccess:
   404  			// Stuck chunk was successfully repaired.
   405  		}
   406  
   407  		// Call bubble before continuing on next iteration to ensure filesystem
   408  		// is updated.
   409  		for _, dirSiaPath := range dirSiaPaths {
   410  			err = r.managedBubbleMetadata(dirSiaPath)
   411  			if err != nil {
   412  				r.log.Println("Error calling managedBubbleMetadata on `", dirSiaPath.String(), "`:", err)
   413  				select {
   414  				case <-time.After(stuckLoopErrorSleepDuration):
   415  				case <-r.tg.StopChan():
   416  					return
   417  				}
   418  			}
   419  		}
   420  	}
   421  }
   422  
   423  // threadedUpdateRenterHealth reads all the siafiles in the renter, calculates
   424  // the health of each file and updates the folder metadata
   425  func (r *Renter) threadedUpdateRenterHealth() {
   426  	err := r.tg.Add()
   427  	if err != nil {
   428  		return
   429  	}
   430  	defer r.tg.Done()
   431  
   432  	// Loop until the renter has shutdown or until the renter's top level files
   433  	// directory has a LasHealthCheckTime within the healthCheckInterval
   434  	for {
   435  		select {
   436  		// Check to make sure renter hasn't been shutdown
   437  		case <-r.tg.StopChan():
   438  			return
   439  		default:
   440  		}
   441  
   442  		// Follow path of oldest time, return directory and timestamp
   443  		r.log.Debugln("Checking for oldest health check time")
   444  		siaPath, lastHealthCheckTime, err := r.managedOldestHealthCheckTime()
   445  		if err != nil {
   446  			// If there is an error getting the lastHealthCheckTime sleep for a
   447  			// little bit before continuing
   448  			r.log.Debug("WARN: Could not find oldest health check time:", err)
   449  			select {
   450  			case <-time.After(healthLoopErrorSleepDuration):
   451  			case <-r.tg.StopChan():
   452  				return
   453  			}
   454  			continue
   455  		}
   456  
   457  		// Check if the time since the last check on the least recently checked
   458  		// folder is inside the health check interval. If so, the whole
   459  		// filesystem has been checked recently, and we can sleep until the
   460  		// least recent check is outside the check interval.
   461  		timeSinceLastCheck := time.Since(lastHealthCheckTime)
   462  		if timeSinceLastCheck < healthCheckInterval {
   463  			// Sleep until the least recent check is outside the check interval.
   464  			sleepDuration := healthCheckInterval - timeSinceLastCheck
   465  			r.log.Debugln("Health loop sleeping for", sleepDuration)
   466  			wakeSignal := time.After(sleepDuration)
   467  			select {
   468  			case <-r.tg.StopChan():
   469  				return
   470  			case <-wakeSignal:
   471  			}
   472  		}
   473  		r.log.Debug("Health Loop calling bubble on '", siaPath.String(), "'")
   474  		err = r.managedBubbleMetadata(siaPath)
   475  		if err != nil {
   476  			r.log.Println("Error calling managedBubbleMetadata on `", siaPath.String(), "`:", err)
   477  			select {
   478  			case <-time.After(healthLoopErrorSleepDuration):
   479  			case <-r.tg.StopChan():
   480  				return
   481  			}
   482  		}
   483  	}
   484  }