gitlab.com/SkynetLabs/skyd@v1.6.9/skymodules/renter/dirupdatebatcher.go (about)

     1  package renter
     2  
     3  // dirupdatebatcher.go contains the logic for the dirupdatebatcher, which is a
     4  // batching tool that improves the performance of updating a large numer of
     5  // directories in the same time period by removing redundant calls to the same
     6  // directory, and by removing redundant update calls that would happen on shared
     7  // parent directories.
     8  //
     9  // NOTE: The dir update batcher is already fairly optimized. There are two known
    10  // places to improve performance, but both contain a fair amount of programming
    11  // overhead and could potentially make performance worse if implemented
    12  // incorrectly. The first is that batches do not deduplicate between eachother.
    13  // If flush() is called on one batch before the previous batch is finished, the
    14  // two batches may perform redundant work. This can be deduplicated if the
    15  // batches have pointers to eachother, however for garbage collection purposes
    16  // you need to make sure to clean up the pointers later. The second thing is
    17  // that the update calls are all made together in rapid succession, which could
    18  // hog the CPU and consume a ton of disk IOPs all at once. We try to manage this
    19  // by only batching together 30 seconds at a time. You could try to slow down
    20  // the update calls so that the CPU is under less stress, but this may block
    21  // parts of the repair loop, and may also block user calls. It is unlikely that
    22  // either of these optimizations need to be pursued, but is something to keep in
    23  // mind if the batcher seems to be causing issues in production.
    24  
    25  import (
    26  	"container/list"
    27  	"fmt"
    28  	"sync"
    29  	"time"
    30  
    31  	"gitlab.com/NebulousLabs/errors"
    32  	"gitlab.com/SkynetLabs/skyd/build"
    33  	"gitlab.com/SkynetLabs/skyd/skymodules"
    34  )
    35  
    36  var (
    37  	// maxTimeBetweenBatchExectutions defines the amount of time that a batch
    38  	// will wait before executing the queue of directories to batch. The testing
    39  	// value is really low at 50ms to maximize the opportunity that threads
    40  	// queue things across multiple batches (which should be safe, but
    41  	// potentially has edge cases).
    42  	//
    43  	// The production value is also relatively low at 30 seconds was set a lot
    44  	// higher (15 minutes), but we saw in production that this would result in
    45  	// large amounts of files being batched together all at once, causing the
    46  	// flush to take over a minute.
    47  	maxTimeBetweenBatchExecutions = build.Select(build.Var{
    48  		Dev:      10 * time.Second,
    49  		Standard: 30 * time.Second,
    50  		Testing:  50 * time.Millisecond,
    51  	}).(time.Duration)
    52  )
    53  
    54  type (
    55  	// dirUpdateBatch defines a batch of updates that should be run at the
    56  	// same time. Performing an update on a file requires doing an update on its
    57  	// directory and all parent directories up to the root directory. By doing
    58  	// the updates as a batch, we can reduce the total amount of work required
    59  	// to complete the update.
    60  	//
    61  	// NOTE: the health update batch depends on the mutex of the
    62  	// dirUpdateBatcher for thread safety.
    63  	dirUpdateBatch struct {
    64  		// batchSet is an array of maps which contain the directories that need
    65  		// to be updated. Each element of the array corresponds to a directory
    66  		// of a different depth. The first element of the array just contains
    67  		// the root directory. The second element is a map that contains only
    68  		// direct subdirs of the root. The third element is a map that contains
    69  		// directories which live directly in subdirs of the root, and so on.
    70  		//
    71  		// When performing the update on the set, the lowest level dirs are all
    72  		// executed at once, and then their parents are added to the batchSet,
    73  		// then the next level of dirs are executed all together, and so on.
    74  		// This ensures that each directory is only updated a single time per
    75  		// batch, even if it appears as a parent in dozens of directories in the
    76  		// batchSet.
    77  		batchSet []map[skymodules.SiaPath]struct{}
    78  
    79  		// completeChan is a channel that gets closed when the whole batch has
    80  		// successfully executed. It will not be closed until priorCompleteChan
    81  		// has been closed. priorCompleteChan is the channel owned by the
    82  		// previous batch. This ensures that when the channel is closed, all
    83  		// updates are certain to have completed, even if those updates were
    84  		// submitted to previous batches.
    85  		completeChan      chan struct{}
    86  		priorCompleteChan <-chan struct{}
    87  
    88  		// Contains a renter, and also has some dependency injection logic.
    89  		dirUpdateBatchDeps
    90  	}
    91  
    92  	// dirUpdateBatcher receives requests to update the health of a file or
    93  	// directory and adds them to a batch. This struct manages concurrency and
    94  	// safety between different batches.
    95  	dirUpdateBatcher struct {
    96  		// nextBatch defines the next batch that will perform a health update.
    97  		nextBatch *dirUpdateBatch
    98  
    99  		// Utilities
   100  		closed          bool // callQueueDirUpdate is a no-op after shutdown
   101  		staticFlushChan chan struct{}
   102  		mu              sync.Mutex
   103  		staticRenter    *Renter
   104  	}
   105  )
   106  
   107  // managedExecute will execute a batch of updates.
   108  func (batch *dirUpdateBatch) managedExecute() {
   109  	renter := batch.dirUpdateBatchDeps.renter
   110  	start := time.Now()
   111  	dirs := 0
   112  	defer func() {
   113  		str := fmt.Sprintf("dirupdatebatch completed %v dirs in %v", dirs, time.Since(start))
   114  		renter.staticLog.Debugln(str, "dirupdatebatcher")
   115  	}()
   116  
   117  	// iterate through the batchSet backwards.
   118  	for i := len(batch.batchSet) - 1; i >= 0; i-- {
   119  		for dirPath := range batch.batchSet[i] {
   120  			// Update the directory metadata. Note: we don't do any updates on
   121  			// the file healths themselves, we just use the file metadata.
   122  			err := batch.managedUpdateDirMetadata(dirPath) // passes through to the renter except during testing
   123  			if err != nil {
   124  				str := fmt.Sprintf("error updating directory %v in dirUpdateBatch.execute: %v", dirPath, err)
   125  				renter.staticLog.Println(str, "health-verbose", "dirupdatebatcher", "error")
   126  				continue
   127  			}
   128  			dirs++ // Increment after the error.
   129  
   130  			// Add the parent.
   131  			if !dirPath.IsRoot() {
   132  				parent, err := dirPath.Dir()
   133  				if err != nil {
   134  					renter.staticLog.Critical("should not be getting an error when grabbing the dir of a non-root siadir:", dirPath, err)
   135  				}
   136  				batch.batchSet[i-1][parent] = struct{}{}
   137  			}
   138  		}
   139  	}
   140  
   141  	// Wait until the previous batch is complete. If we are shutting down, go
   142  	// ahead and front-run the previous batch and just signal a close
   143  	// immediately.
   144  	select {
   145  	case <-batch.priorCompleteChan:
   146  	case <-batch.renter.tg.StopChan():
   147  	}
   148  	close(batch.completeChan)
   149  }
   150  
   151  // callQueueUpdate will add an update to the current batch. The input needs to
   152  // be a dir.
   153  func (dub *dirUpdateBatcher) callQueueDirUpdate(dirPath skymodules.SiaPath) {
   154  	dub.mu.Lock()
   155  	defer dub.mu.Unlock()
   156  	if dub.closed {
   157  		return
   158  	}
   159  	dub.staticRenter.staticLog.Debugln("dirUpdateBatcher queuing update for:", dirPath)
   160  	// Make sure maps at each depth exist.
   161  	depth := dirPath.Depth()
   162  	for i := len(dub.nextBatch.batchSet); i <= depth; i++ {
   163  		dub.nextBatch.batchSet = append(dub.nextBatch.batchSet, make(map[skymodules.SiaPath]struct{}))
   164  	}
   165  	// Add the input dirPath to the final level.
   166  	dub.nextBatch.batchSet[depth][dirPath] = struct{}{}
   167  }
   168  
   169  // callFlush will trigger the current batch of updates to execute, and will not
   170  // return until all updates have completed and are represented in the root
   171  // directory. It will also not return until all prior batches have completed as
   172  // well - if you have added a directory to a batch and call flush, you can be
   173  // certain that the directory update will have executed by the time the flush
   174  // call returns, regardless of which batch that directory was added to.
   175  func (dub *dirUpdateBatcher) callFlush() {
   176  	// Grab the complete chan for the current batch.
   177  	dub.mu.Lock()
   178  	completeChan := dub.nextBatch.completeChan
   179  	dub.mu.Unlock()
   180  
   181  	// Signal that the current batch should be flushed.
   182  	select {
   183  	case dub.staticFlushChan <- struct{}{}:
   184  	default:
   185  	}
   186  
   187  	// Wait until the batch has completed before returning. No need to wait if
   188  	// the renter has closed, just exit immediately.
   189  	select {
   190  	case <-completeChan:
   191  	case <-dub.staticRenter.tg.StopChan():
   192  	}
   193  }
   194  
   195  // newBatch returns a new dirUpdateBatch ready for use.
   196  func (dub *dirUpdateBatcher) newBatch(priorCompleteChan <-chan struct{}) *dirUpdateBatch {
   197  	return &dirUpdateBatch{
   198  		completeChan:      make(chan struct{}),
   199  		priorCompleteChan: priorCompleteChan,
   200  
   201  		dirUpdateBatchDeps: dirUpdateBatchDeps{
   202  			renter: dub.staticRenter,
   203  		},
   204  	}
   205  }
   206  
   207  // threadedExecuteBatchUpdates is a permanent background thread which will
   208  // execute batched updates in the background.
   209  func (dub *dirUpdateBatcher) threadedExecuteBatchUpdates() {
   210  	for {
   211  		select {
   212  		case <-dub.staticRenter.tg.StopChan():
   213  			dub.mu.Lock()
   214  			dub.closed = true
   215  			dub.mu.Unlock()
   216  			dub.nextBatch.managedExecute()
   217  			return
   218  		case <-dub.staticFlushChan:
   219  		case <-time.After(maxTimeBetweenBatchExecutions):
   220  		}
   221  
   222  		// Rotate the current batch out for a new batch. This will block any
   223  		// thread trying to add new updates to the batch, so make sure it
   224  		// happens quickly.
   225  		dub.mu.Lock()
   226  		batch := dub.nextBatch
   227  		dub.nextBatch = dub.newBatch(batch.priorCompleteChan)
   228  		dub.mu.Unlock()
   229  		// Execute the batch now that we aren't blocking anymore.
   230  		batch.managedExecute()
   231  	}
   232  }
   233  
   234  // newDirUpdateBatcher returns a health update batcher that is ready for use.
   235  func (r *Renter) newDirUpdateBatcher() (*dirUpdateBatcher, error) {
   236  	dub := &dirUpdateBatcher{
   237  		staticFlushChan: make(chan struct{}, 1),
   238  		staticRenter:    r,
   239  	}
   240  
   241  	// The next batch needs a channel which will be closed when the previous
   242  	// batch completes. Since there is no previous batch, we provide a channel
   243  	// that is already closed.
   244  	initialChan := make(chan struct{})
   245  	close(initialChan)
   246  
   247  	dub.nextBatch = dub.newBatch(initialChan)
   248  	err := r.tg.Launch(dub.threadedExecuteBatchUpdates)
   249  	if err != nil {
   250  		return nil, errors.AddContext(err, "unable to launch the batch updates backghround thread")
   251  	}
   252  	return dub, nil
   253  }
   254  
   255  // UpdateMetadata will explicitly update the metadata of the provided directory,
   256  // returning once the directory has been updated and the changes are reflected
   257  // in the aggregate metadata of the root directory. If the recursive flag is
   258  // set, it will do a check on all subdirs as well.
   259  //
   260  // NOTE: This call is not very efficient, and generally isn't intended to be
   261  // used on large directories with lots of subdirectories.
   262  func (r *Renter) UpdateMetadata(siaPath skymodules.SiaPath, recursive bool) error {
   263  	err := r.tg.Add()
   264  	if err != nil {
   265  		return err
   266  	}
   267  	defer r.tg.Done()
   268  
   269  	// Use a list to track all of the siapaths we want.
   270  	dirPaths := list.New()
   271  	dirPaths.PushBack(siaPath)
   272  	for dirPaths.Front() != nil {
   273  		e := dirPaths.Front()
   274  		dirPaths.Remove(e)
   275  		siaPath := e.Value.(skymodules.SiaPath)
   276  		err := r.managedUpdateFilesInDir(siaPath)
   277  		if err != nil {
   278  			context := fmt.Sprintf("unable to update the metadata of the files in dir %v", siaPath)
   279  			return errors.AddContext(err, context)
   280  		}
   281  		r.staticDirUpdateBatcher.callQueueDirUpdate(siaPath)
   282  		if !recursive {
   283  			// If the recursive flag isn't set, this should trigger immediately
   284  			// and result in only one directory being processed.
   285  			continue
   286  		}
   287  
   288  		// The recursive flag is set, so load the full list of subdirectories
   289  		// and ensure the loop will scan all of those directories as well.
   290  		subDirPaths, err := r.managedSubDirectories(siaPath)
   291  		if err != nil {
   292  			context := fmt.Sprintf("unable to load list of subdirs for %v", siaPath)
   293  			return errors.AddContext(err, context)
   294  		}
   295  		for _, subDir := range subDirPaths {
   296  			dirPaths.PushBack(subDir)
   297  		}
   298  	}
   299  
   300  	// Block until all updates are represented in the root aggregate metadata.
   301  	r.staticDirUpdateBatcher.callFlush()
   302  	return nil
   303  }