gitlab.com/SkynetLabs/skyd@v1.6.9/skymodules/renter/repair.go (about)

     1  package renter
     2  
     3  import (
     4  	"fmt"
     5  	"path/filepath"
     6  	"strings"
     7  	"time"
     8  
     9  	"gitlab.com/NebulousLabs/errors"
    10  	"gitlab.com/NebulousLabs/fastrand"
    11  
    12  	"gitlab.com/SkynetLabs/skyd/build"
    13  	"gitlab.com/SkynetLabs/skyd/skymodules"
    14  )
    15  
    16  var (
    17  	// errNoStuckFiles is a helper to indicate that there are no stuck files in
    18  	// the renter's directory
    19  	errNoStuckFiles = errors.New("no stuck files")
    20  
    21  	// errNoStuckChunks is a helper to indicate that there are no stuck chunks
    22  	// in a siafile
    23  	errNoStuckChunks = errors.New("no stuck chunks")
    24  )
    25  
    26  // managedAddRandomStuckChunks will try and add up to
    27  // maxRandomStuckChunksAddToHeap random stuck chunks to the upload heap
    28  func (r *Renter) managedAddRandomStuckChunks(hosts map[string]struct{}) ([]skymodules.SiaPath, error) {
    29  	var dirSiaPaths []skymodules.SiaPath
    30  	// Remember number of stuck chunks we are starting with
    31  	prevNumStuckChunks, prevNumRandomStuckChunks := r.staticUploadHeap.managedNumStuckChunks()
    32  	// Check if there is space in the heap. There is space if the number of
    33  	// random stuck chunks has not exceeded maxRandomStuckChunksInHeap and the
    34  	// total number of stuck chunks as not exceeded maxStuckChunksInHeap
    35  	spaceInHeap := prevNumRandomStuckChunks < maxRandomStuckChunksInHeap && prevNumStuckChunks < maxStuckChunksInHeap
    36  	for i := 0; i < maxRandomStuckChunksAddToHeap && spaceInHeap; i++ {
    37  		// Randomly get directory with stuck files
    38  		dirSiaPath, err := r.managedStuckDirectory()
    39  		if err != nil {
    40  			return dirSiaPaths, errors.AddContext(err, "unable to get random stuck directory")
    41  		}
    42  
    43  		// Get Random stuck file from directory
    44  		siaPath, err := r.managedStuckFile(dirSiaPath)
    45  		if err != nil {
    46  			return dirSiaPaths, errors.AddContext(err, "unable to get random stuck file in dir "+dirSiaPath.String())
    47  		}
    48  
    49  		// Add stuck chunk to upload heap and signal repair needed
    50  		err = r.managedBuildAndPushRandomChunk(siaPath, hosts, targetStuckChunks, r.staticRepairMemoryManager)
    51  		if err != nil {
    52  			return dirSiaPaths, errors.AddContext(err, "unable to push random stuck chunk from '"+siaPath.String()+"' of '"+dirSiaPath.String()+"'")
    53  		}
    54  
    55  		// Sanity check that stuck chunks were added
    56  		currentNumStuckChunks, currentNumRandomStuckChunks := r.staticUploadHeap.managedNumStuckChunks()
    57  		if currentNumRandomStuckChunks <= prevNumRandomStuckChunks {
    58  			// If the number of stuck chunks in the heap is not increasing
    59  			// then break out of this loop in order to prevent getting stuck
    60  			// in an infinite loop
    61  			break
    62  		}
    63  
    64  		// Remember the directory so bubble can be called on it at the end of
    65  		// the iteration
    66  		dirSiaPaths = append(dirSiaPaths, dirSiaPath)
    67  		r.staticRepairLog.Printf("Added %v stuck chunks from %s", currentNumRandomStuckChunks-prevNumRandomStuckChunks, dirSiaPath.String())
    68  		prevNumStuckChunks = currentNumStuckChunks
    69  		prevNumRandomStuckChunks = currentNumRandomStuckChunks
    70  		spaceInHeap = prevNumRandomStuckChunks < maxRandomStuckChunksInHeap && prevNumStuckChunks < maxStuckChunksInHeap
    71  	}
    72  	return dirSiaPaths, nil
    73  }
    74  
    75  // managedAddStuckChunksFromStuckStack will try and add up to
    76  // maxStuckChunksInHeap stuck chunks to the upload heap from the files in the
    77  // stuck stack.
    78  func (r *Renter) managedAddStuckChunksFromStuckStack(hosts map[string]struct{}) ([]skymodules.SiaPath, error) {
    79  	var dirSiaPaths []skymodules.SiaPath
    80  	offline, goodForRenew, _, _ := r.callRenterContractsAndUtilities()
    81  	numStuckChunks, _ := r.staticUploadHeap.managedNumStuckChunks()
    82  	for r.staticStuckStack.managedLen() > 0 && numStuckChunks < maxStuckChunksInHeap {
    83  		// Pop the first file SiaPath
    84  		siaPath := r.staticStuckStack.managedPop()
    85  
    86  		// Add stuck chunks to uploadHeap
    87  		err := r.managedAddStuckChunksToHeap(siaPath, hosts, offline, goodForRenew)
    88  		if err != nil && !errors.Contains(err, errNoStuckChunks) {
    89  			return dirSiaPaths, errors.AddContext(err, "unable to add stuck chunks to heap")
    90  		}
    91  
    92  		// Since we either added stuck chunks to the heap from this file,
    93  		// there are no stuck chunks left in the file, or all the stuck
    94  		// chunks for the file are already being worked on, remember the
    95  		// directory so we can call bubble on it at the end of this
    96  		// iteration of the stuck loop to update the filesystem
    97  		dirSiaPath, err := siaPath.Dir()
    98  		if err != nil {
    99  			return dirSiaPaths, errors.AddContext(err, "unable to get directory siapath")
   100  		}
   101  		dirSiaPaths = append(dirSiaPaths, dirSiaPath)
   102  		numStuckChunks, _ = r.staticUploadHeap.managedNumStuckChunks()
   103  	}
   104  	return dirSiaPaths, nil
   105  }
   106  
   107  // managedAddStuckChunksToHeap tries to add as many stuck chunks from a siafile
   108  // to the upload heap as possible
   109  func (r *Renter) managedAddStuckChunksToHeap(siaPath skymodules.SiaPath, hosts map[string]struct{}, offline, goodForRenew map[string]bool) (err error) {
   110  	// Open File
   111  	sf, err := r.staticFileSystem.OpenSiaFile(siaPath)
   112  	if err != nil {
   113  		return fmt.Errorf("unable to open siafile %v, error: %v", siaPath, err)
   114  	}
   115  	defer func() {
   116  		err = errors.Compose(err, sf.Close())
   117  	}()
   118  
   119  	// Check if there are still stuck chunks to repair
   120  	if sf.NumStuckChunks() == 0 {
   121  		return errNoStuckChunks
   122  	}
   123  
   124  	// Build unfinished stuck chunks
   125  	var allErrors error
   126  	unfinishedStuckChunks := r.managedBuildUnfinishedChunks(sf, hosts, targetStuckChunks, offline, goodForRenew, r.staticRepairMemoryManager)
   127  	defer func() {
   128  		// Close out remaining file entries
   129  		for _, chunk := range unfinishedStuckChunks {
   130  			allErrors = errors.Compose(allErrors, chunk.Close())
   131  		}
   132  	}()
   133  
   134  	// Add up to maxStuckChunksInHeap stuck chunks to the upload heap
   135  	var chunk *unfinishedUploadChunk
   136  	stuckChunksAdded := 0
   137  	for len(unfinishedStuckChunks) > 0 && stuckChunksAdded < maxStuckChunksInHeap {
   138  		chunk = unfinishedStuckChunks[0]
   139  		unfinishedStuckChunks = unfinishedStuckChunks[1:]
   140  		chunk.stuckRepair = true
   141  		chunk.fileRecentlySuccessful = true
   142  		_, pushed, err := r.managedPushChunkForRepair(chunk, chunkTypeLocalChunk)
   143  		if err != nil {
   144  			return errors.Compose(allErrors, err, chunk.Close())
   145  		}
   146  		if !pushed {
   147  			// Stuck chunk unable to be added. Close the file entry of that
   148  			// chunk
   149  			allErrors = errors.Compose(allErrors, chunk.Close())
   150  			continue
   151  		}
   152  		stuckChunksAdded++
   153  	}
   154  	if stuckChunksAdded > 0 {
   155  		r.staticRepairLog.Printf("Added %v stuck chunks from %s to the repair heap", stuckChunksAdded, siaPath.String())
   156  	}
   157  
   158  	// check if there are more stuck chunks in the file
   159  	if len(unfinishedStuckChunks) > 0 {
   160  		r.staticStuckStack.managedPush(siaPath)
   161  	}
   162  	return allErrors
   163  }
   164  
   165  // managedStuckDirectory randomly finds a directory that contains stuck chunks
   166  func (r *Renter) managedStuckDirectory() (skymodules.SiaPath, error) {
   167  	// Iterating of the renter directory until randomly ending up in a
   168  	// directory, break and return that directory
   169  	siaPath := skymodules.RootSiaPath()
   170  	for {
   171  		select {
   172  		// Check to make sure renter hasn't been shutdown
   173  		case <-r.tg.StopChan():
   174  			return skymodules.SiaPath{}, nil
   175  		default:
   176  		}
   177  
   178  		directories, err := r.managedDirList(siaPath)
   179  		if err != nil {
   180  			return skymodules.SiaPath{}, err
   181  		}
   182  		// Sanity check that there is at least the current directory
   183  		if len(directories) == 0 {
   184  			build.Critical("No directories returned from DirList", siaPath.String())
   185  		}
   186  
   187  		// Check if we are in an empty Directory. This will be the case before
   188  		// any files have been uploaded so the root directory is empty. Also it
   189  		// could happen if the only file in a directory was stuck and was very
   190  		// recently deleted so the health of the directory has not yet been
   191  		// updated.
   192  		emptyDir := len(directories) == 1 && directories[0].NumFiles == 0
   193  		if emptyDir {
   194  			return siaPath, errNoStuckFiles
   195  		}
   196  		// Check if there are stuck chunks in this directory
   197  		if directories[0].AggregateNumStuckChunks == 0 {
   198  			// Log error if we are not at the root directory
   199  			if !siaPath.IsRoot() {
   200  				r.staticLog.Println("WARN: ended up in directory with no stuck chunks that is not root directory:", siaPath)
   201  			}
   202  			return siaPath, errNoStuckFiles
   203  		}
   204  		// Check if we have reached a directory with only files
   205  		if len(directories) == 1 {
   206  			return siaPath, nil
   207  		}
   208  
   209  		// Get random int
   210  		rand := fastrand.Intn(int(directories[0].AggregateNumStuckChunks))
   211  		// Use rand to decide which directory to go into. Work backwards over
   212  		// the slice of directories. Since the first element is the current
   213  		// directory that means that it is the sum of all the files and
   214  		// directories.  We can chose a directory by subtracting the number of
   215  		// stuck chunks a directory has from rand and if rand gets to 0 or less
   216  		// we choose that directory
   217  		for i := len(directories) - 1; i >= 0; i-- {
   218  			// If we are on the last iteration and the directory does have files
   219  			// then return the current directory
   220  			if i == 0 {
   221  				siaPath = directories[0].SiaPath
   222  				return siaPath, nil
   223  			}
   224  
   225  			// Skip directories with no stuck chunks
   226  			if directories[i].AggregateNumStuckChunks == uint64(0) {
   227  				continue
   228  			}
   229  
   230  			rand = rand - int(directories[i].AggregateNumStuckChunks)
   231  			siaPath = directories[i].SiaPath
   232  			// If rand is less than 0 break out of the loop and continue into
   233  			// that directory
   234  			if rand < 0 {
   235  				break
   236  			}
   237  		}
   238  	}
   239  }
   240  
   241  // managedStuckFile finds a weighted random stuck file from a directory based on
   242  // the number of stuck chunks in the stuck files of the directory
   243  func (r *Renter) managedStuckFile(dirSiaPath skymodules.SiaPath) (siapath skymodules.SiaPath, err error) {
   244  	// Grab Aggregate number of stuck chunks from the directory
   245  	//
   246  	// NOTE: using the aggregate number of stuck chunks assumes that the
   247  	// directory and the files within the directory are in sync. This is ok to
   248  	// do as the risks associated with being out of sync are low.
   249  	siaDir, err := r.staticFileSystem.OpenSiaDir(dirSiaPath)
   250  	if err != nil {
   251  		return skymodules.SiaPath{}, errors.AddContext(err, "unable to open siaDir "+dirSiaPath.String())
   252  	}
   253  	defer func() {
   254  		err = errors.Compose(err, siaDir.Close())
   255  	}()
   256  	metadata, err := siaDir.Metadata()
   257  	if err != nil {
   258  		return skymodules.SiaPath{}, err
   259  	}
   260  	aggregateNumStuckChunks := metadata.AggregateNumStuckChunks
   261  	numStuckChunks := metadata.NumStuckChunks
   262  	numFiles := metadata.NumFiles
   263  	if aggregateNumStuckChunks == 0 || numStuckChunks == 0 || numFiles == 0 {
   264  		// If the number of stuck chunks or number of files is zero then this
   265  		// directory should not have been used to find a stuck file. Queue an
   266  		// update on the directories metadata to prevent this from happening
   267  		// again.
   268  		r.staticDirUpdateBatcher.callQueueDirUpdate(dirSiaPath)
   269  		err = fmt.Errorf("managedStuckFile should not have been called on %v, AggregateNumStuckChunks: %v, NumStuckChunks: %v, NumFiles: %v", dirSiaPath.String(), aggregateNumStuckChunks, numStuckChunks, numFiles)
   270  		return skymodules.SiaPath{}, err
   271  	}
   272  
   273  	// Use rand to decide which file to select. We can chose a file by
   274  	// subtracting the number of stuck chunks a file has from rand and if rand
   275  	// gets to 0 or less we choose that file
   276  	rand := fastrand.Intn(int(aggregateNumStuckChunks))
   277  
   278  	// Read the directory, using ReadDir so we don't read all the siafiles
   279  	// unless we need to
   280  	fileinfos, err := r.staticFileSystem.ReadDir(dirSiaPath)
   281  	if err != nil {
   282  		return skymodules.SiaPath{}, errors.AddContext(err, "unable to open siadir: "+dirSiaPath.String())
   283  	}
   284  	// Iterate over the fileinfos
   285  	for _, fi := range fileinfos {
   286  		// Check for SiaFile
   287  		if fi.IsDir() || filepath.Ext(fi.Name()) != skymodules.SiaFileExtension {
   288  			continue
   289  		}
   290  
   291  		// Get SiaPath
   292  		sp, err := dirSiaPath.Join(strings.TrimSuffix(fi.Name(), skymodules.SiaFileExtension))
   293  		if err != nil {
   294  			return skymodules.SiaPath{}, errors.AddContext(err, "unable to join the siapath with the file: "+fi.Name())
   295  		}
   296  
   297  		// Open SiaFile, grab the number of stuck chunks and close the file
   298  		f, err := r.staticFileSystem.OpenSiaFile(sp)
   299  		if err != nil {
   300  			return skymodules.SiaPath{}, errors.AddContext(err, "could not open siafileset for "+sp.String())
   301  		}
   302  		numStuckChunks := int(f.NumStuckChunks())
   303  		if err := f.Close(); err != nil {
   304  			return skymodules.SiaPath{}, errors.AddContext(err, "failed to close filenode "+sp.String())
   305  		}
   306  
   307  		// Check if stuck
   308  		if numStuckChunks == 0 {
   309  			continue
   310  		}
   311  
   312  		// Decrement rand and check if we have decremented fully
   313  		rand = rand - numStuckChunks
   314  		siapath = sp
   315  		if rand < 0 {
   316  			break
   317  		}
   318  	}
   319  	if siapath.IsEmpty() {
   320  		// If no files were selected from the directory than there is a mismatch
   321  		// between the file metadata and the directory metadata. Queue an update
   322  		// on the directory's metadata so this doesn't happen again.
   323  		r.staticDirUpdateBatcher.callQueueDirUpdate(dirSiaPath)
   324  		r.staticDirUpdateBatcher.callFlush() // wait to avoid spinning
   325  
   326  		return skymodules.SiaPath{}, errors.New("no files selected from directory " + dirSiaPath.String())
   327  	}
   328  	return siapath, nil
   329  }
   330  
   331  // managedSubDirectories reads a directory and returns a slice of all the sub
   332  // directory SiaPaths
   333  func (r *Renter) managedSubDirectories(siaPath skymodules.SiaPath) ([]skymodules.SiaPath, error) {
   334  	// Read directory
   335  	fileinfos, err := r.staticFileSystem.ReadDir(siaPath)
   336  	if err != nil {
   337  		return nil, err
   338  	}
   339  	// Find all sub directory SiaPaths
   340  	folders := make([]skymodules.SiaPath, 0, len(fileinfos))
   341  	for _, fi := range fileinfos {
   342  		if fi.IsDir() {
   343  			subDir, err := siaPath.Join(fi.Name())
   344  			if err != nil {
   345  				return nil, err
   346  			}
   347  			folders = append(folders, subDir)
   348  		}
   349  	}
   350  	return folders, nil
   351  }
   352  
   353  // threadedStuckFileLoop works through the renter directory and finds the stuck
   354  // chunks and tries to repair them
   355  func (r *Renter) threadedStuckFileLoop() {
   356  	err := r.tg.Add()
   357  	if err != nil {
   358  		return
   359  	}
   360  	defer r.tg.Done()
   361  
   362  	// Loop until the renter has shutdown or until there are no stuck chunks
   363  	for {
   364  		// Return if the renter has shut down.
   365  		select {
   366  		case <-r.tg.StopChan():
   367  			return
   368  		default:
   369  		}
   370  
   371  		// Wait until the renter is online to proceed.
   372  		if !r.managedBlockUntilOnline() {
   373  			// The renter shut down before the internet connection was restored.
   374  			r.staticLog.Println("renter shutdown before internet connection")
   375  			return
   376  		}
   377  
   378  		// As we add stuck chunks to the upload heap we want to remember the
   379  		// directories they came from so we can call bubble to update the
   380  		// filesystem
   381  		var dirSiaPaths []skymodules.SiaPath
   382  
   383  		// Refresh the hosts and workers before adding stuck chunks to the
   384  		// upload heap
   385  		hosts := r.managedRefreshHostsAndWorkers()
   386  
   387  		// Try and add stuck chunks from the stuck stack. We try and add these
   388  		// first as they will be from files that previously had a successful
   389  		// stuck chunk repair. The previous success gives us more confidence
   390  		// that it is more likely additional stuck chunks from these files will
   391  		// be successful compared to a random stuck chunk from the renter's
   392  		// directory.
   393  		stuckStackDirSiaPaths, err := r.managedAddStuckChunksFromStuckStack(hosts)
   394  		if err != nil {
   395  			r.staticRepairLog.Println("WARN: error adding stuck chunks to repair heap from files with previously successful stuck repair jobs:", err)
   396  		}
   397  		dirSiaPaths = append(dirSiaPaths, stuckStackDirSiaPaths...)
   398  
   399  		// Try add random stuck chunks to upload heap
   400  		randomDirSiaPaths, err := r.managedAddRandomStuckChunks(hosts)
   401  		if err != nil {
   402  			r.staticRepairLog.Println("WARN: error adding random stuck chunks to upload heap:", err)
   403  		}
   404  		dirSiaPaths = append(dirSiaPaths, randomDirSiaPaths...)
   405  
   406  		// Check if any stuck chunks were added to the upload heap
   407  		numStuckChunks, _ := r.staticUploadHeap.managedNumStuckChunks()
   408  		if numStuckChunks == 0 {
   409  			// Block until new work is required.
   410  			select {
   411  			case <-r.tg.StopChan():
   412  				// The renter has shut down.
   413  				return
   414  			case <-r.staticUploadHeap.stuckChunkFound:
   415  				// Health Loop found stuck chunk
   416  			case <-r.staticUploadHeap.stuckChunkSuccess:
   417  				// Stuck chunk was successfully repaired.
   418  			}
   419  			continue
   420  		}
   421  
   422  		// Signal that a repair is needed because stuck chunks were added to the
   423  		// upload heap
   424  		select {
   425  		case r.staticUploadHeap.repairNeeded <- struct{}{}:
   426  		default:
   427  		}
   428  
   429  		// Sleep until it is time to try and repair another stuck chunk
   430  		rebuildStuckHeapSignal := time.After(repairStuckChunkInterval)
   431  		select {
   432  		case <-r.tg.StopChan():
   433  			// Return if the return has been shutdown
   434  			return
   435  		case <-rebuildStuckHeapSignal:
   436  			// Time to find another random chunk
   437  		case <-r.staticUploadHeap.stuckChunkSuccess:
   438  			// Stuck chunk was successfully repaired.
   439  		}
   440  
   441  		// Queue an update to all of the dirs that were visited and then block
   442  		// until all of the updates have completed and have their stats
   443  		// represented in the root aggregate metadata.
   444  		for _, dirSiaPath := range dirSiaPaths {
   445  			r.staticDirUpdateBatcher.callQueueDirUpdate(dirSiaPath)
   446  		}
   447  		r.staticDirUpdateBatcher.callFlush()
   448  	}
   449  }