github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/bootstrap/bootstrapper/fs/migrator/migrator.go (about)

     1  // Copyright (c) 2020 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package migrator
    22  
    23  import (
    24  	"sync"
    25  
    26  	"github.com/m3db/m3/src/dbnode/namespace"
    27  	"github.com/m3db/m3/src/dbnode/persist"
    28  	"github.com/m3db/m3/src/dbnode/persist/fs"
    29  	"github.com/m3db/m3/src/dbnode/persist/fs/migration"
    30  	"github.com/m3db/m3/src/dbnode/storage"
    31  	"github.com/m3db/m3/src/dbnode/storage/bootstrap"
    32  	"github.com/m3db/m3/src/dbnode/tracepoint"
    33  	"github.com/m3db/m3/src/x/context"
    34  	"github.com/m3db/m3/src/x/instrument"
    35  
    36  	"go.uber.org/zap"
    37  )
    38  
    39  type worker struct {
    40  	persistManager persist.Manager
    41  	taskOptions    migration.TaskOptions
    42  }
    43  
    44  // Migrator is responsible for migrating data filesets based on version information in
    45  // the info files.
    46  type Migrator struct {
    47  	migrationTaskFn      MigrationTaskFn
    48  	infoFilesByNamespace bootstrap.InfoFilesByNamespace
    49  	migrationOpts        migration.Options
    50  	fsOpts               fs.Options
    51  	instrumentOpts       instrument.Options
    52  	storageOpts          storage.Options
    53  	log                  *zap.Logger
    54  }
    55  
    56  // NewMigrator creates a new Migrator.
    57  func NewMigrator(opts Options) (Migrator, error) {
    58  	if err := opts.Validate(); err != nil {
    59  		return Migrator{}, err
    60  	}
    61  	return Migrator{
    62  		migrationTaskFn:      opts.MigrationTaskFn(),
    63  		infoFilesByNamespace: opts.InfoFilesByNamespace(),
    64  		migrationOpts:        opts.MigrationOptions(),
    65  		fsOpts:               opts.FilesystemOptions(),
    66  		instrumentOpts:       opts.InstrumentOptions(),
    67  		storageOpts:          opts.StorageOptions(),
    68  		log:                  opts.InstrumentOptions().Logger(),
    69  	}, nil
    70  }
    71  
    72  // migrationCandidate is the struct we generate when we find a fileset in need of
    73  // migration. It's provided to the workers to perform the actual migration.
    74  type migrationCandidate struct {
    75  	newTaskFn      migration.NewTaskFn
    76  	infoFileResult fs.ReadInfoFileResult
    77  	metadata       namespace.Metadata
    78  	shard          uint32
    79  }
    80  
    81  // mergeKey is the unique set of data that identifies an ReadInfoFileResult.
    82  type mergeKey struct {
    83  	metadata   namespace.Metadata
    84  	shard      uint32
    85  	blockStart int64
    86  }
    87  
    88  // completedMigration is the updated ReadInfoFileSet after a migration has been performed
    89  // plus the merge key, so that we can properly merge the updated result back into
    90  // infoFilesByNamespace map.
    91  type completedMigration struct {
    92  	key                   mergeKey
    93  	updatedInfoFileResult fs.ReadInfoFileResult
    94  }
    95  
    96  // Run runs the migrator.
    97  func (m *Migrator) Run(ctx context.Context) error {
    98  	ctx, span, _ := ctx.StartSampledTraceSpan(tracepoint.BootstrapperFilesystemSourceMigrator)
    99  	defer span.Finish()
   100  
   101  	// Find candidates
   102  	candidates := m.findMigrationCandidates()
   103  	if len(candidates) == 0 {
   104  		m.log.Debug("no filesets to migrate. exiting.")
   105  		return nil
   106  	}
   107  
   108  	m.log.Info("starting fileset migration", zap.Int("migrations", len(candidates)))
   109  
   110  	nowFn := m.fsOpts.ClockOptions().NowFn()
   111  	begin := nowFn()
   112  
   113  	// Setup workers to perform migrations
   114  	var (
   115  		numWorkers = m.migrationOpts.Concurrency()
   116  		workers    = make([]*worker, 0, numWorkers)
   117  	)
   118  
   119  	baseOpts := migration.NewTaskOptions().
   120  		SetFilesystemOptions(m.fsOpts).
   121  		SetStorageOptions(m.storageOpts)
   122  	for i := 0; i < numWorkers; i++ {
   123  		// Give each worker their own persist manager so that we can write files concurrently.
   124  		pm, err := fs.NewPersistManager(m.fsOpts)
   125  		if err != nil {
   126  			return err
   127  		}
   128  		worker := &worker{
   129  			persistManager: pm,
   130  			taskOptions:    baseOpts,
   131  		}
   132  		workers = append(workers, worker)
   133  	}
   134  
   135  	// Start up workers.
   136  	var (
   137  		wg                  sync.WaitGroup
   138  		candidatesPerWorker = len(candidates) / numWorkers
   139  		candidateIdx        = 0
   140  
   141  		completedMigrationsLock sync.Mutex
   142  		completedMigrations     = make([]completedMigration, 0, len(candidates))
   143  	)
   144  	for i, worker := range workers {
   145  		endIdx := candidateIdx + candidatesPerWorker
   146  		if i == len(workers)-1 {
   147  			endIdx = len(candidates)
   148  		}
   149  
   150  		worker := worker
   151  		startIdx := candidateIdx // Capture current candidateIdx value for goroutine
   152  		wg.Add(1)
   153  		go func() {
   154  			output := m.startWorker(worker, candidates[startIdx:endIdx])
   155  
   156  			completedMigrationsLock.Lock()
   157  			completedMigrations = append(completedMigrations, output...)
   158  			completedMigrationsLock.Unlock()
   159  
   160  			wg.Done()
   161  		}()
   162  
   163  		candidateIdx = endIdx
   164  	}
   165  
   166  	// Wait until all workers have finished and completedMigrations has been updated
   167  	wg.Wait()
   168  
   169  	migrationResults := make(map[mergeKey]fs.ReadInfoFileResult, len(candidates))
   170  	for _, result := range completedMigrations {
   171  		migrationResults[result.key] = result.updatedInfoFileResult
   172  	}
   173  
   174  	m.mergeUpdatedInfoFiles(migrationResults)
   175  
   176  	m.log.Info("fileset migration finished", zap.Duration("took", nowFn().Sub(begin)))
   177  
   178  	return nil
   179  }
   180  
   181  func (m *Migrator) findMigrationCandidates() []migrationCandidate {
   182  	maxCapacity := 0
   183  	for _, resultsByShard := range m.infoFilesByNamespace {
   184  		for _, results := range resultsByShard {
   185  			maxCapacity += len(results)
   186  		}
   187  	}
   188  
   189  	candidates := make([]migrationCandidate, 0, maxCapacity)
   190  	for md, resultsByShard := range m.infoFilesByNamespace {
   191  		for shard, results := range resultsByShard {
   192  			for _, info := range results {
   193  				newTaskFn, shouldMigrate := m.migrationTaskFn(info)
   194  				if shouldMigrate {
   195  					candidates = append(candidates, migrationCandidate{
   196  						newTaskFn:      newTaskFn,
   197  						metadata:       md,
   198  						shard:          shard,
   199  						infoFileResult: info,
   200  					})
   201  				}
   202  			}
   203  		}
   204  	}
   205  
   206  	return candidates
   207  }
   208  
   209  func (m *Migrator) startWorker(worker *worker, candidates []migrationCandidate) []completedMigration {
   210  	output := make([]completedMigration, 0, len(candidates))
   211  	for _, candidate := range candidates {
   212  		task, err := candidate.newTaskFn(worker.taskOptions.
   213  			SetInfoFileResult(candidate.infoFileResult).
   214  			SetShard(candidate.shard).
   215  			SetNamespaceMetadata(candidate.metadata).
   216  			SetPersistManager(worker.persistManager))
   217  		if err != nil {
   218  			m.log.Error("error creating migration task", zap.Error(err))
   219  		}
   220  		// NB(nate): Handling of errors should be re-evaluated as migrations are added. Current migrations
   221  		// do not mutate state in such a way that data can be left in an invalid state in the case of failures. Additionally,
   222  		// we want to ensure that the bootstrap process is always able to continue. If either of these conditions change,
   223  		// error handling at this level AND the migrator level should be reconsidered.
   224  		infoFileResult, err := task.Run()
   225  		if err != nil {
   226  			m.log.Error("error running migration task", zap.Error(err))
   227  		} else {
   228  			output = append(output, completedMigration{
   229  				key: mergeKey{
   230  					metadata:   candidate.metadata,
   231  					shard:      candidate.shard,
   232  					blockStart: candidate.infoFileResult.Info.BlockStart,
   233  				},
   234  				updatedInfoFileResult: infoFileResult,
   235  			})
   236  		}
   237  	}
   238  
   239  	return output
   240  }
   241  
   242  // mergeUpdatedInfoFiles takes all ReadInfoFileResults updated by a migration and merges them back
   243  // into the infoFilesByNamespace map. This prevents callers from having to re-read info files to get
   244  // updated in-memory structures.
   245  func (m *Migrator) mergeUpdatedInfoFiles(migrationResults map[mergeKey]fs.ReadInfoFileResult) {
   246  	for md, resultsByShard := range m.infoFilesByNamespace {
   247  		for shard, results := range resultsByShard {
   248  			for i, info := range results {
   249  				if val, ok := migrationResults[mergeKey{
   250  					metadata:   md,
   251  					shard:      shard,
   252  					blockStart: info.Info.BlockStart,
   253  				}]; ok {
   254  					results[i] = val
   255  				}
   256  			}
   257  		}
   258  	}
   259  }