github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/cleanup.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package storage 22 23 import ( 24 "fmt" 25 "sort" 26 "sync" 27 28 "github.com/m3db/m3/src/dbnode/persist" 29 "github.com/m3db/m3/src/dbnode/persist/fs" 30 "github.com/m3db/m3/src/dbnode/persist/fs/commitlog" 31 "github.com/m3db/m3/src/dbnode/retention" 32 "github.com/m3db/m3/src/x/clock" 33 xerrors "github.com/m3db/m3/src/x/errors" 34 "github.com/m3db/m3/src/x/ident" 35 xtime "github.com/m3db/m3/src/x/time" 36 37 "github.com/pborman/uuid" 38 "github.com/uber-go/tally" 39 "go.uber.org/zap" 40 ) 41 42 type ( 43 commitLogFilesFn func(commitlog.Options) ( 44 persist.CommitLogFiles, []commitlog.ErrorWithPath, error, 45 ) 46 snapshotMetadataFilesFn func(fs.Options) ( 47 []fs.SnapshotMetadata, []fs.SnapshotMetadataErrorWithPaths, error, 48 ) 49 ) 50 51 type snapshotFilesFn func( 52 filePathPrefix string, namespace ident.ID, shard uint32, 53 ) (fs.FileSetFilesSlice, error) 54 55 type deleteFilesFn func(files []string) error 56 57 type deleteInactiveDirectoriesFn func(parentDirPath string, activeDirNames []string) error 58 59 // Narrow interface so as not to expose all the functionality of the commitlog 60 // to the cleanup manager. 61 type activeCommitlogs interface { 62 ActiveLogs() (persist.CommitLogFiles, error) 63 } 64 65 type cleanupManager struct { 66 sync.RWMutex 67 68 database database 69 activeCommitlogs activeCommitlogs 70 71 opts Options 72 nowFn clock.NowFn 73 filePathPrefix string 74 commitLogsDir string 75 commitLogFilesFn commitLogFilesFn 76 snapshotMetadataFilesFn snapshotMetadataFilesFn 77 snapshotFilesFn snapshotFilesFn 78 79 deleteFilesFn deleteFilesFn 80 deleteInactiveDirectoriesFn deleteInactiveDirectoriesFn 81 warmFlushCleanupInProgress bool 82 coldFlushCleanupInProgress bool 83 metrics cleanupManagerMetrics 84 logger *zap.Logger 85 } 86 87 type cleanupManagerMetrics struct { 88 warmFlushCleanupStatus tally.Gauge 89 coldFlushCleanupStatus tally.Gauge 90 corruptCommitlogFile tally.Counter 91 corruptSnapshotFile tally.Counter 92 corruptSnapshotMetadataFile tally.Counter 93 deletedCommitlogFile tally.Counter 94 deletedSnapshotFile tally.Counter 95 deletedSnapshotMetadataFile tally.Counter 96 } 97 98 func newCleanupManagerMetrics(scope tally.Scope) cleanupManagerMetrics { 99 clScope := scope.SubScope("commitlog") 100 sScope := scope.SubScope("snapshot") 101 smScope := scope.SubScope("snapshot-metadata") 102 return cleanupManagerMetrics{ 103 warmFlushCleanupStatus: scope.Gauge("warm-flush-cleanup"), 104 coldFlushCleanupStatus: scope.Gauge("cold-flush-cleanup"), 105 corruptCommitlogFile: clScope.Counter("corrupt"), 106 corruptSnapshotFile: sScope.Counter("corrupt"), 107 corruptSnapshotMetadataFile: smScope.Counter("corrupt"), 108 deletedCommitlogFile: clScope.Counter("deleted"), 109 deletedSnapshotFile: sScope.Counter("deleted"), 110 deletedSnapshotMetadataFile: smScope.Counter("deleted"), 111 } 112 } 113 114 func newCleanupManager( 115 database database, activeLogs activeCommitlogs, scope tally.Scope) databaseCleanupManager { 116 opts := database.Options() 117 filePathPrefix := opts.CommitLogOptions().FilesystemOptions().FilePathPrefix() 118 commitLogsDir := fs.CommitLogsDirPath(filePathPrefix) 119 120 return &cleanupManager{ 121 database: database, 122 activeCommitlogs: activeLogs, 123 124 opts: opts, 125 nowFn: opts.ClockOptions().NowFn(), 126 filePathPrefix: filePathPrefix, 127 commitLogsDir: commitLogsDir, 128 commitLogFilesFn: commitlog.Files, 129 snapshotMetadataFilesFn: fs.SortedSnapshotMetadataFiles, 130 snapshotFilesFn: fs.SnapshotFiles, 131 deleteFilesFn: fs.DeleteFiles, 132 deleteInactiveDirectoriesFn: fs.DeleteInactiveDirectories, 133 metrics: newCleanupManagerMetrics(scope), 134 logger: opts.InstrumentOptions().Logger(), 135 } 136 } 137 138 func (m *cleanupManager) WarmFlushCleanup(t xtime.UnixNano) error { 139 m.Lock() 140 m.warmFlushCleanupInProgress = true 141 m.Unlock() 142 143 defer func() { 144 m.Lock() 145 m.warmFlushCleanupInProgress = false 146 m.Unlock() 147 }() 148 149 namespaces, err := m.database.OwnedNamespaces() 150 if err != nil { 151 return err 152 } 153 154 multiErr := xerrors.NewMultiError() 155 if err := m.cleanupExpiredIndexFiles(t, namespaces); err != nil { 156 multiErr = multiErr.Add(fmt.Errorf( 157 "encountered errors when cleaning up index files for %v: %w", t, err)) 158 } 159 160 if err := m.cleanupCorruptedIndexFiles(namespaces); err != nil { 161 multiErr = multiErr.Add(fmt.Errorf( 162 "encountered errors when cleaning up corrupted files for %v: %w", t, err)) 163 } 164 165 if err := m.cleanupDuplicateIndexFiles(namespaces); err != nil { 166 multiErr = multiErr.Add(fmt.Errorf( 167 "encountered errors when cleaning up index files for %v: %w", t, err)) 168 } 169 170 if err := m.deleteInactiveDataSnapshotFiles(namespaces); err != nil { 171 multiErr = multiErr.Add(fmt.Errorf( 172 "encountered errors when deleting inactive snapshot files for %v: %w", t, err)) 173 } 174 175 if err := m.deleteInactiveNamespaceFiles(namespaces); err != nil { 176 multiErr = multiErr.Add(fmt.Errorf( 177 "encountered errors when deleting inactive namespace files for %v: %w", t, err)) 178 } 179 180 if err := m.cleanupSnapshotsAndCommitlogs(namespaces); err != nil { 181 multiErr = multiErr.Add(fmt.Errorf( 182 "encountered errors when cleaning up snapshot and commitlog files: %w", err)) 183 } 184 185 return multiErr.FinalError() 186 } 187 188 func (m *cleanupManager) ColdFlushCleanup(t xtime.UnixNano) error { 189 m.Lock() 190 m.coldFlushCleanupInProgress = true 191 m.Unlock() 192 193 defer func() { 194 m.Lock() 195 m.coldFlushCleanupInProgress = false 196 m.Unlock() 197 }() 198 199 namespaces, err := m.database.OwnedNamespaces() 200 if err != nil { 201 return err 202 } 203 204 multiErr := xerrors.NewMultiError() 205 if err := m.cleanupDataFiles(t, namespaces); err != nil { 206 multiErr = multiErr.Add(fmt.Errorf( 207 "encountered errors when cleaning up data files for %v: %v", t, err)) 208 } 209 210 if err := m.deleteInactiveDataFiles(namespaces); err != nil { 211 multiErr = multiErr.Add(fmt.Errorf( 212 "encountered errors when deleting inactive data files for %v: %v", t, err)) 213 } 214 215 return multiErr.FinalError() 216 } 217 218 func (m *cleanupManager) Report() { 219 m.RLock() 220 coldFlushCleanupInProgress := m.coldFlushCleanupInProgress 221 warmFlushCleanupInProgress := m.warmFlushCleanupInProgress 222 m.RUnlock() 223 224 if coldFlushCleanupInProgress { 225 m.metrics.coldFlushCleanupStatus.Update(1) 226 } else { 227 m.metrics.coldFlushCleanupStatus.Update(0) 228 } 229 230 if warmFlushCleanupInProgress { 231 m.metrics.warmFlushCleanupStatus.Update(1) 232 } else { 233 m.metrics.warmFlushCleanupStatus.Update(0) 234 } 235 } 236 237 func (m *cleanupManager) deleteInactiveNamespaceFiles(namespaces []databaseNamespace) error { 238 var namespaceDirNames []string 239 filePathPrefix := m.database.Options().CommitLogOptions().FilesystemOptions().FilePathPrefix() 240 dataDirPath := fs.DataDirPath(filePathPrefix) 241 242 for _, n := range namespaces { 243 namespaceDirNames = append(namespaceDirNames, n.ID().String()) 244 } 245 246 return m.deleteInactiveDirectoriesFn(dataDirPath, namespaceDirNames) 247 } 248 249 // deleteInactiveDataFiles will delete data files for shards that the node no longer owns 250 // which can occur in the case of topology changes 251 func (m *cleanupManager) deleteInactiveDataFiles(namespaces []databaseNamespace) error { 252 return m.deleteInactiveDataFileSetFiles(fs.NamespaceDataDirPath, namespaces) 253 } 254 255 // deleteInactiveDataSnapshotFiles will delete snapshot files for shards that the node no longer owns 256 // which can occur in the case of topology changes 257 func (m *cleanupManager) deleteInactiveDataSnapshotFiles(namespaces []databaseNamespace) error { 258 return m.deleteInactiveDataFileSetFiles(fs.NamespaceSnapshotsDirPath, namespaces) 259 } 260 261 func (m *cleanupManager) deleteInactiveDataFileSetFiles( 262 filesetFilesDirPathFn func(string, ident.ID) string, namespaces []databaseNamespace, 263 ) error { 264 multiErr := xerrors.NewMultiError() 265 filePathPrefix := m.database.Options().CommitLogOptions().FilesystemOptions().FilePathPrefix() 266 for _, n := range namespaces { 267 var activeShards []string 268 namespaceDirPath := filesetFilesDirPathFn(filePathPrefix, n.ID()) 269 // NB(linasn) This should list ALL shards because it will delete 270 // dirs for the shards NOT LISTED below. 271 for _, s := range n.OwnedShards() { 272 shard := fmt.Sprintf("%d", s.ID()) 273 activeShards = append(activeShards, shard) 274 } 275 multiErr = multiErr.Add(m.deleteInactiveDirectoriesFn(namespaceDirPath, activeShards)) 276 } 277 278 return multiErr.FinalError() 279 } 280 281 func (m *cleanupManager) cleanupDataFiles(t xtime.UnixNano, namespaces []databaseNamespace) error { 282 multiErr := xerrors.NewMultiError() 283 for _, n := range namespaces { 284 if !n.Options().CleanupEnabled() { 285 continue 286 } 287 earliestToRetain := retention.FlushTimeStart(n.Options().RetentionOptions(), t) 288 shards := n.OwnedShards() 289 multiErr = multiErr.Add(m.cleanupExpiredNamespaceDataFiles(earliestToRetain, shards)) 290 multiErr = multiErr.Add(m.cleanupCompactedNamespaceDataFiles(shards)) 291 } 292 return multiErr.FinalError() 293 } 294 295 func (m *cleanupManager) cleanupExpiredIndexFiles( 296 t xtime.UnixNano, namespaces []databaseNamespace, 297 ) error { 298 multiErr := xerrors.NewMultiError() 299 for _, n := range namespaces { 300 if !n.Options().CleanupEnabled() || !n.Options().IndexOptions().Enabled() { 301 continue 302 } 303 idx, err := n.Index() 304 if err != nil { 305 multiErr = multiErr.Add(err) 306 continue 307 } 308 multiErr = multiErr.Add(idx.CleanupExpiredFileSets(t)) 309 } 310 return multiErr.FinalError() 311 } 312 313 func (m *cleanupManager) cleanupCorruptedIndexFiles(namespaces []databaseNamespace) error { 314 multiErr := xerrors.NewMultiError() 315 for _, n := range namespaces { 316 if !n.Options().CleanupEnabled() || !n.Options().IndexOptions().Enabled() { 317 continue 318 } 319 idx, err := n.Index() 320 if err != nil { 321 multiErr = multiErr.Add(err) 322 continue 323 } 324 multiErr = multiErr.Add(idx.CleanupCorruptedFileSets()) 325 } 326 return multiErr.FinalError() 327 } 328 329 func (m *cleanupManager) cleanupDuplicateIndexFiles(namespaces []databaseNamespace) error { 330 multiErr := xerrors.NewMultiError() 331 for _, n := range namespaces { 332 if !n.Options().CleanupEnabled() || !n.Options().IndexOptions().Enabled() { 333 continue 334 } 335 idx, err := n.Index() 336 if err != nil { 337 multiErr = multiErr.Add(err) 338 continue 339 } 340 activeShards := make([]uint32, 0) 341 for _, s := range n.OwnedShards() { 342 activeShards = append(activeShards, s.ID()) 343 } 344 multiErr = multiErr.Add(idx.CleanupDuplicateFileSets(activeShards)) 345 } 346 return multiErr.FinalError() 347 } 348 349 func (m *cleanupManager) cleanupExpiredNamespaceDataFiles( 350 earliestToRetain xtime.UnixNano, shards []databaseShard, 351 ) error { 352 multiErr := xerrors.NewMultiError() 353 for _, shard := range shards { 354 if !shard.IsBootstrapped() { 355 continue 356 } 357 if err := shard.CleanupExpiredFileSets(earliestToRetain); err != nil { 358 multiErr = multiErr.Add(err) 359 } 360 } 361 362 return multiErr.FinalError() 363 } 364 365 func (m *cleanupManager) cleanupCompactedNamespaceDataFiles(shards []databaseShard) error { 366 multiErr := xerrors.NewMultiError() 367 for _, shard := range shards { 368 if !shard.IsBootstrapped() { 369 continue 370 } 371 if err := shard.CleanupCompactedFileSets(); err != nil { 372 multiErr = multiErr.Add(err) 373 } 374 } 375 376 return multiErr.FinalError() 377 } 378 379 // The goal of the cleanupSnapshotsAndCommitlogs function is to delete all snapshots files, snapshot metadata 380 // files, and commitlog files except for those that are currently required for recovery from a node failure. 381 // According to the snapshotting / commitlog rotation logic, the files that are required for a complete 382 // recovery are: 383 // 384 // 1. The most recent (highest index) snapshot metadata files. 385 // 2. All snapshot files whose associated snapshot ID matches the snapshot ID of the most recent snapshot 386 // metadata file. 387 // 3. All commitlog files whose index is larger than or equal to the index of the commitlog identifier stored 388 // in the most recent snapshot metadata file. This is because the snapshotting and commitlog rotation process 389 // guarantees that the most recent snapshot contains all data stored in commitlogs that were created before 390 // the rotation / snapshot process began. 391 // 392 // cleanupSnapshotsAndCommitlogs accomplishes this goal by performing the following steps: 393 // 394 // 1. List all the snapshot metadata files on disk. 395 // 2. Identify the most recent one (highest index). 396 // 3. For every namespace/shard/block combination, delete all snapshot 397 // files that match one of the following criteria: 398 // 1. Snapshot files whose associated snapshot ID does not match the snapshot ID of the most recent 399 // snapshot metadata file. 400 // 2. Snapshot files that are corrupt. 401 // 4. Delete all snapshot metadata files prior to the most recent once. 402 // 5. Delete corrupt snapshot metadata files. 403 // 6. List all the commitlog files on disk. 404 // 7. List all the commitlog files that are being actively written to. 405 // 8. Delete all commitlog files whose index is lower than the index of the commitlog file referenced in the 406 // most recent snapshot metadata file (ignoring any commitlog files being actively written to.) 407 // 9. Delete all corrupt commitlog files (ignoring any commitlog files being actively written to.) 408 // 409 // This process is also modeled formally in TLA+ in the file `SnapshotsSpec.tla`. 410 func (m *cleanupManager) cleanupSnapshotsAndCommitlogs(namespaces []databaseNamespace) (finalErr error) { 411 logger := m.opts.InstrumentOptions().Logger().With( 412 zap.String("comment", 413 "partial/corrupt files are expected as result of a restart (this is ok)"), 414 ) 415 416 fsOpts := m.opts.CommitLogOptions().FilesystemOptions() 417 snapshotMetadatas, snapshotMetadataErrorsWithPaths, err := m.snapshotMetadataFilesFn(fsOpts) 418 if err != nil { 419 return err 420 } 421 422 if len(snapshotMetadatas) == 0 { 423 // No cleanup can be performed until we have at least one complete snapshot. 424 return nil 425 } 426 427 // They should technically already be sorted, but better to be safe. 428 sort.Slice(snapshotMetadatas, func(i, j int) bool { 429 return snapshotMetadatas[i].ID.Index < snapshotMetadatas[j].ID.Index 430 }) 431 sortedSnapshotMetadatas := snapshotMetadatas 432 433 // Sanity check. 434 lastMetadataIndex := int64(-1) 435 for _, snapshotMetadata := range sortedSnapshotMetadatas { 436 currIndex := snapshotMetadata.ID.Index 437 if currIndex == lastMetadataIndex { 438 // Should never happen. 439 return fmt.Errorf( 440 "found two snapshot metadata files with duplicate index: %d", currIndex) 441 } 442 lastMetadataIndex = currIndex 443 } 444 445 if len(sortedSnapshotMetadatas) == 0 { 446 // No cleanup can be performed until we have at least one complete snapshot. 447 return nil 448 } 449 450 var ( 451 multiErr = xerrors.NewMultiError() 452 filesToDelete = []string{} 453 mostRecentSnapshot = sortedSnapshotMetadatas[len(sortedSnapshotMetadatas)-1] 454 ) 455 defer func() { 456 // Use a defer to perform the final file deletion so that we can attempt to cleanup *some* files 457 // when we encounter partial errors on a best effort basis. 458 multiErr = multiErr.Add(finalErr) 459 multiErr = multiErr.Add(m.deleteFilesFn(filesToDelete)) 460 finalErr = multiErr.FinalError() 461 }() 462 463 for _, ns := range namespaces { 464 for _, s := range ns.OwnedShards() { 465 if !s.IsBootstrapped() { 466 continue 467 } 468 shardSnapshots, err := m.snapshotFilesFn(fsOpts.FilePathPrefix(), ns.ID(), s.ID()) 469 if err != nil { 470 multiErr = multiErr.Add(fmt.Errorf( 471 "err reading snapshot files for ns: %s and shard: %d, err: %w", 472 ns.ID(), s.ID(), err, 473 )) 474 continue 475 } 476 477 for _, snapshot := range shardSnapshots { 478 _, snapshotID, err := snapshot.SnapshotTimeAndID() 479 if err != nil { 480 // If we can't parse the snapshotID, assume the snapshot is corrupt and delete it. This could be caused 481 // by a variety of situations, like a node crashing while writing out a set of snapshot files and should 482 // have no impact on correctness as the snapshot files from previous (successful) snapshot will still be 483 // retained. 484 m.metrics.corruptSnapshotFile.Inc(1) 485 logger.With( 486 zap.Error(err), 487 zap.Strings("files", snapshot.AbsoluteFilePaths), 488 ).Warn("corrupt snapshot file during cleanup, marking files for deletion") 489 filesToDelete = append(filesToDelete, snapshot.AbsoluteFilePaths...) 490 continue 491 } 492 493 if !uuid.Equal(snapshotID, mostRecentSnapshot.ID.UUID) { 494 // If the UUID of the snapshot files doesn't match the most recent snapshot 495 // then its safe to delete because it means we have a more recently complete set. 496 m.metrics.deletedSnapshotFile.Inc(1) 497 filesToDelete = append(filesToDelete, snapshot.AbsoluteFilePaths...) 498 } 499 } 500 } 501 } 502 503 // Delete all snapshot metadatas prior to the most recent one. 504 for _, snapshot := range sortedSnapshotMetadatas[:len(sortedSnapshotMetadatas)-1] { 505 m.metrics.deletedSnapshotMetadataFile.Inc(1) 506 filesToDelete = append(filesToDelete, snapshot.AbsoluteFilePaths()...) 507 } 508 509 // Delete corrupt snapshot metadata files. 510 for _, errorWithPath := range snapshotMetadataErrorsWithPaths { 511 m.metrics.corruptSnapshotMetadataFile.Inc(1) 512 logger.With( 513 zap.Error(errorWithPath.Error), 514 zap.String("metadataFilePath", errorWithPath.MetadataFilePath), 515 zap.String("checkpointFilePath", errorWithPath.CheckpointFilePath), 516 ).Warn("corrupt snapshot metadata file during cleanup, marking files for deletion") 517 filesToDelete = append(filesToDelete, errorWithPath.MetadataFilePath) 518 filesToDelete = append(filesToDelete, errorWithPath.CheckpointFilePath) 519 } 520 521 // Figure out which commitlog files exist on disk. 522 files, commitlogErrorsWithPaths, err := m.commitLogFilesFn(m.opts.CommitLogOptions()) 523 if err != nil { 524 // Hard failure here because the remaining cleanup logic relies on this data 525 // being available. 526 return err 527 } 528 529 // Figure out which commitlog files are being actively written to. 530 activeCommitlogs, err := m.activeCommitlogs.ActiveLogs() 531 if err != nil { 532 // Hard failure here because the remaining cleanup logic relies on this data 533 // being available. 534 return err 535 } 536 537 // Delete all commitlog files prior to the one captured by the most recent snapshot. 538 for _, file := range files { 539 if activeCommitlogs.Contains(file.FilePath) { 540 // Skip over any commitlog files that are being actively written to. 541 continue 542 } 543 544 if file.Index < mostRecentSnapshot.CommitlogIdentifier.Index { 545 m.metrics.deletedCommitlogFile.Inc(1) 546 filesToDelete = append(filesToDelete, file.FilePath) 547 } 548 } 549 550 // Delete corrupt commitlog files. 551 for _, errorWithPath := range commitlogErrorsWithPaths { 552 if activeCommitlogs.Contains(errorWithPath.Path()) { 553 // Skip over any commitlog files that are being actively written to. Note that is 554 // is common for an active commitlog to appear corrupt because the info header has 555 // not been flushed yet. 556 continue 557 } 558 559 m.metrics.corruptCommitlogFile.Inc(1) 560 // If we were unable to read the commit log files info header, then we're forced to assume 561 // that the file is corrupt and remove it. This can happen in situations where M3DB experiences 562 // sudden shutdown. 563 logger.With( 564 zap.Error(errorWithPath), 565 zap.String("path", errorWithPath.Path()), 566 ).Warn("corrupt commitlog file during cleanup, marking file for deletion") 567 filesToDelete = append(filesToDelete, errorWithPath.Path()) 568 } 569 570 return finalErr 571 }