github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/bootstrap/bootstrapper/fs/migrator/migrator.go (about) 1 // Copyright (c) 2020 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package migrator 22 23 import ( 24 "sync" 25 26 "github.com/m3db/m3/src/dbnode/namespace" 27 "github.com/m3db/m3/src/dbnode/persist" 28 "github.com/m3db/m3/src/dbnode/persist/fs" 29 "github.com/m3db/m3/src/dbnode/persist/fs/migration" 30 "github.com/m3db/m3/src/dbnode/storage" 31 "github.com/m3db/m3/src/dbnode/storage/bootstrap" 32 "github.com/m3db/m3/src/dbnode/tracepoint" 33 "github.com/m3db/m3/src/x/context" 34 "github.com/m3db/m3/src/x/instrument" 35 36 "go.uber.org/zap" 37 ) 38 39 type worker struct { 40 persistManager persist.Manager 41 taskOptions migration.TaskOptions 42 } 43 44 // Migrator is responsible for migrating data filesets based on version information in 45 // the info files. 46 type Migrator struct { 47 migrationTaskFn MigrationTaskFn 48 infoFilesByNamespace bootstrap.InfoFilesByNamespace 49 migrationOpts migration.Options 50 fsOpts fs.Options 51 instrumentOpts instrument.Options 52 storageOpts storage.Options 53 log *zap.Logger 54 } 55 56 // NewMigrator creates a new Migrator. 57 func NewMigrator(opts Options) (Migrator, error) { 58 if err := opts.Validate(); err != nil { 59 return Migrator{}, err 60 } 61 return Migrator{ 62 migrationTaskFn: opts.MigrationTaskFn(), 63 infoFilesByNamespace: opts.InfoFilesByNamespace(), 64 migrationOpts: opts.MigrationOptions(), 65 fsOpts: opts.FilesystemOptions(), 66 instrumentOpts: opts.InstrumentOptions(), 67 storageOpts: opts.StorageOptions(), 68 log: opts.InstrumentOptions().Logger(), 69 }, nil 70 } 71 72 // migrationCandidate is the struct we generate when we find a fileset in need of 73 // migration. It's provided to the workers to perform the actual migration. 74 type migrationCandidate struct { 75 newTaskFn migration.NewTaskFn 76 infoFileResult fs.ReadInfoFileResult 77 metadata namespace.Metadata 78 shard uint32 79 } 80 81 // mergeKey is the unique set of data that identifies an ReadInfoFileResult. 82 type mergeKey struct { 83 metadata namespace.Metadata 84 shard uint32 85 blockStart int64 86 } 87 88 // completedMigration is the updated ReadInfoFileSet after a migration has been performed 89 // plus the merge key, so that we can properly merge the updated result back into 90 // infoFilesByNamespace map. 91 type completedMigration struct { 92 key mergeKey 93 updatedInfoFileResult fs.ReadInfoFileResult 94 } 95 96 // Run runs the migrator. 97 func (m *Migrator) Run(ctx context.Context) error { 98 ctx, span, _ := ctx.StartSampledTraceSpan(tracepoint.BootstrapperFilesystemSourceMigrator) 99 defer span.Finish() 100 101 // Find candidates 102 candidates := m.findMigrationCandidates() 103 if len(candidates) == 0 { 104 m.log.Debug("no filesets to migrate. exiting.") 105 return nil 106 } 107 108 m.log.Info("starting fileset migration", zap.Int("migrations", len(candidates))) 109 110 nowFn := m.fsOpts.ClockOptions().NowFn() 111 begin := nowFn() 112 113 // Setup workers to perform migrations 114 var ( 115 numWorkers = m.migrationOpts.Concurrency() 116 workers = make([]*worker, 0, numWorkers) 117 ) 118 119 baseOpts := migration.NewTaskOptions(). 120 SetFilesystemOptions(m.fsOpts). 121 SetStorageOptions(m.storageOpts) 122 for i := 0; i < numWorkers; i++ { 123 // Give each worker their own persist manager so that we can write files concurrently. 124 pm, err := fs.NewPersistManager(m.fsOpts) 125 if err != nil { 126 return err 127 } 128 worker := &worker{ 129 persistManager: pm, 130 taskOptions: baseOpts, 131 } 132 workers = append(workers, worker) 133 } 134 135 // Start up workers. 136 var ( 137 wg sync.WaitGroup 138 candidatesPerWorker = len(candidates) / numWorkers 139 candidateIdx = 0 140 141 completedMigrationsLock sync.Mutex 142 completedMigrations = make([]completedMigration, 0, len(candidates)) 143 ) 144 for i, worker := range workers { 145 endIdx := candidateIdx + candidatesPerWorker 146 if i == len(workers)-1 { 147 endIdx = len(candidates) 148 } 149 150 worker := worker 151 startIdx := candidateIdx // Capture current candidateIdx value for goroutine 152 wg.Add(1) 153 go func() { 154 output := m.startWorker(worker, candidates[startIdx:endIdx]) 155 156 completedMigrationsLock.Lock() 157 completedMigrations = append(completedMigrations, output...) 158 completedMigrationsLock.Unlock() 159 160 wg.Done() 161 }() 162 163 candidateIdx = endIdx 164 } 165 166 // Wait until all workers have finished and completedMigrations has been updated 167 wg.Wait() 168 169 migrationResults := make(map[mergeKey]fs.ReadInfoFileResult, len(candidates)) 170 for _, result := range completedMigrations { 171 migrationResults[result.key] = result.updatedInfoFileResult 172 } 173 174 m.mergeUpdatedInfoFiles(migrationResults) 175 176 m.log.Info("fileset migration finished", zap.Duration("took", nowFn().Sub(begin))) 177 178 return nil 179 } 180 181 func (m *Migrator) findMigrationCandidates() []migrationCandidate { 182 maxCapacity := 0 183 for _, resultsByShard := range m.infoFilesByNamespace { 184 for _, results := range resultsByShard { 185 maxCapacity += len(results) 186 } 187 } 188 189 candidates := make([]migrationCandidate, 0, maxCapacity) 190 for md, resultsByShard := range m.infoFilesByNamespace { 191 for shard, results := range resultsByShard { 192 for _, info := range results { 193 newTaskFn, shouldMigrate := m.migrationTaskFn(info) 194 if shouldMigrate { 195 candidates = append(candidates, migrationCandidate{ 196 newTaskFn: newTaskFn, 197 metadata: md, 198 shard: shard, 199 infoFileResult: info, 200 }) 201 } 202 } 203 } 204 } 205 206 return candidates 207 } 208 209 func (m *Migrator) startWorker(worker *worker, candidates []migrationCandidate) []completedMigration { 210 output := make([]completedMigration, 0, len(candidates)) 211 for _, candidate := range candidates { 212 task, err := candidate.newTaskFn(worker.taskOptions. 213 SetInfoFileResult(candidate.infoFileResult). 214 SetShard(candidate.shard). 215 SetNamespaceMetadata(candidate.metadata). 216 SetPersistManager(worker.persistManager)) 217 if err != nil { 218 m.log.Error("error creating migration task", zap.Error(err)) 219 } 220 // NB(nate): Handling of errors should be re-evaluated as migrations are added. Current migrations 221 // do not mutate state in such a way that data can be left in an invalid state in the case of failures. Additionally, 222 // we want to ensure that the bootstrap process is always able to continue. If either of these conditions change, 223 // error handling at this level AND the migrator level should be reconsidered. 224 infoFileResult, err := task.Run() 225 if err != nil { 226 m.log.Error("error running migration task", zap.Error(err)) 227 } else { 228 output = append(output, completedMigration{ 229 key: mergeKey{ 230 metadata: candidate.metadata, 231 shard: candidate.shard, 232 blockStart: candidate.infoFileResult.Info.BlockStart, 233 }, 234 updatedInfoFileResult: infoFileResult, 235 }) 236 } 237 } 238 239 return output 240 } 241 242 // mergeUpdatedInfoFiles takes all ReadInfoFileResults updated by a migration and merges them back 243 // into the infoFilesByNamespace map. This prevents callers from having to re-read info files to get 244 // updated in-memory structures. 245 func (m *Migrator) mergeUpdatedInfoFiles(migrationResults map[mergeKey]fs.ReadInfoFileResult) { 246 for md, resultsByShard := range m.infoFilesByNamespace { 247 for shard, results := range resultsByShard { 248 for i, info := range results { 249 if val, ok := migrationResults[mergeKey{ 250 metadata: md, 251 shard: shard, 252 blockStart: info.Info.BlockStart, 253 }]; ok { 254 results[i] = val 255 } 256 } 257 } 258 } 259 }