github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/persist/fs/files.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package fs 22 23 import ( 24 "bufio" 25 "errors" 26 "fmt" 27 "os" 28 "path" 29 "path/filepath" 30 "sort" 31 "strconv" 32 "strings" 33 "time" 34 35 "github.com/m3db/m3/src/dbnode/digest" 36 "github.com/m3db/m3/src/dbnode/generated/proto/index" 37 "github.com/m3db/m3/src/dbnode/persist" 38 "github.com/m3db/m3/src/dbnode/persist/fs/msgpack" 39 "github.com/m3db/m3/src/dbnode/persist/schema" 40 idxpersist "github.com/m3db/m3/src/m3ninx/persist" 41 xerrors "github.com/m3db/m3/src/x/errors" 42 "github.com/m3db/m3/src/x/ident" 43 "github.com/m3db/m3/src/x/instrument" 44 xtime "github.com/m3db/m3/src/x/time" 45 46 "github.com/pborman/uuid" 47 ) 48 49 var ( 50 timeZero time.Time 51 52 errSnapshotTimeAndIDZero = errors.New("tried to read snapshot time and ID of zero value") 53 errNonSnapshotFileset = errors.New("tried to determine snapshot time and id of non-snapshot") 54 ) 55 56 const ( 57 dataDirName = "data" 58 indexDirName = "index" 59 snapshotDirName = "snapshots" 60 commitLogsDirName = "commitlogs" 61 62 // The maximum number of delimeters ('-' or '.') that is expected in a 63 // (base) filename. 64 maxDelimNum = 4 65 66 // The volume index assigned to (legacy) filesets that don't have a volume 67 // number in their filename. 68 // NOTE: Since this index is the same as the index for the first 69 // (non-legacy) fileset, receiving an index of 0 means that we need to 70 // check for both indexed and non-indexed filenames. 71 unindexedFilesetIndex = 0 72 73 timeComponentPosition = 1 74 commitLogComponentPosition = 2 75 indexFileSetComponentPosition = 2 76 dataFileSetComponentPosition = 2 77 78 numComponentsSnapshotMetadataFile = 4 79 numComponentsSnapshotMetadataCheckpointFile = 5 80 snapshotMetadataUUIDComponentPosition = 1 81 snapshotMetadataIndexComponentPosition = 2 82 83 errUnexpectedFilenamePattern = "unexpected filename: %s" 84 ) 85 86 var defaultBufioReaderSize = bufio.NewReader(nil).Size() 87 88 type fileOpener func(filePath string) (*os.File, error) 89 90 // LazyEvalBool is a boolean that is lazily evaluated. 91 type LazyEvalBool uint8 92 93 const ( 94 // EvalNone indicates the boolean has not been evaluated. 95 EvalNone LazyEvalBool = iota 96 // EvalTrue indicates the boolean has been evaluated to true. 97 EvalTrue 98 // EvalFalse indicates the boolean has been evaluated to false. 99 EvalFalse 100 ) 101 102 // FileSetFile represents a set of FileSet files for a given block start 103 type FileSetFile struct { 104 ID FileSetFileIdentifier 105 AbsoluteFilePaths []string 106 107 CachedSnapshotTime xtime.UnixNano 108 CachedSnapshotID uuid.UUID 109 CachedHasCompleteCheckpointFile LazyEvalBool 110 filePathPrefix string 111 } 112 113 // SnapshotTimeAndID returns the snapshot time and id for the given FileSetFile. 114 // Value is meaningless if the the FileSetFile is a flush instead of a snapshot. 115 func (f *FileSetFile) SnapshotTimeAndID() (xtime.UnixNano, uuid.UUID, error) { 116 if f.IsZero() { 117 return 0, nil, errSnapshotTimeAndIDZero 118 } 119 if _, ok := f.SnapshotFilepath(); !ok { 120 return 0, nil, errNonSnapshotFileset 121 } 122 123 if !f.CachedSnapshotTime.IsZero() || f.CachedSnapshotID != nil { 124 // Return immediately if we've already cached it. 125 return f.CachedSnapshotTime, f.CachedSnapshotID, nil 126 } 127 128 snapshotTime, snapshotID, err := SnapshotTimeAndID(f.filePathPrefix, f.ID) 129 if err != nil { 130 return 0, nil, err 131 } 132 133 // Cache for future use and return. 134 f.CachedSnapshotTime = snapshotTime 135 f.CachedSnapshotID = snapshotID 136 return f.CachedSnapshotTime, f.CachedSnapshotID, nil 137 } 138 139 // InfoFilePath returns the info file path of a filesetfile (if found). 140 func (f *FileSetFile) InfoFilePath() (string, bool) { 141 return f.filepath(InfoFileSuffix) 142 } 143 144 // SnapshotFilepath returns the info file path of a filesetfile (if found). 145 func (f *FileSetFile) SnapshotFilepath() (string, bool) { 146 return f.filepath(snapshotDirName) 147 } 148 149 // IsZero returns whether the FileSetFile is a zero value. 150 func (f FileSetFile) IsZero() bool { 151 return len(f.AbsoluteFilePaths) == 0 152 } 153 154 func (f *FileSetFile) filepath(pathContains string) (string, bool) { 155 var ( 156 found bool 157 foundIdx int 158 ) 159 for idx, path := range f.AbsoluteFilePaths { 160 if strings.Contains(path, pathContains) { 161 found = true 162 foundIdx = idx 163 } 164 } 165 if found { 166 return f.AbsoluteFilePaths[foundIdx], true 167 } 168 return "", false 169 } 170 171 // HasCompleteCheckpointFile returns a bool indicating whether the given set of 172 // fileset files has a checkpoint file. 173 func (f *FileSetFile) HasCompleteCheckpointFile() bool { 174 switch f.CachedHasCompleteCheckpointFile { 175 case EvalNone: 176 f.CachedHasCompleteCheckpointFile = f.evalHasCompleteCheckpointFile() 177 return f.HasCompleteCheckpointFile() 178 case EvalTrue: 179 return true 180 } 181 return false 182 } 183 184 func (f *FileSetFile) evalHasCompleteCheckpointFile() LazyEvalBool { 185 for _, fileName := range f.AbsoluteFilePaths { 186 if strings.Contains(fileName, CheckpointFileSuffix) { 187 exists, err := CompleteCheckpointFileExists(fileName) 188 if err != nil { 189 continue 190 } 191 if exists { 192 return EvalTrue 193 } 194 } 195 } 196 197 return EvalFalse 198 } 199 200 // FileSetFilesSlice is a slice of FileSetFile 201 type FileSetFilesSlice []FileSetFile 202 203 // Filepaths flattens a slice of FileSetFiles to a single slice of filepaths. 204 // All paths returned are absolute. 205 func (f FileSetFilesSlice) Filepaths() []string { 206 flattened := []string{} 207 for _, fileset := range f { 208 flattened = append(flattened, fileset.AbsoluteFilePaths...) 209 } 210 211 return flattened 212 } 213 214 // LatestVolumeForBlock returns the latest (highest index) FileSetFile in the 215 // slice for a given block start that has a complete checkpoint file. 216 func (f FileSetFilesSlice) LatestVolumeForBlock(blockStart xtime.UnixNano) (FileSetFile, bool) { 217 // Make sure we're already sorted. 218 f.sortByTimeAndVolumeIndexAscending() 219 220 for i, curr := range f { 221 if curr.ID.BlockStart.Equal(blockStart) { 222 var ( 223 bestSoFar FileSetFile 224 bestSoFarExists bool 225 ) 226 227 for j := i; j < len(f); j++ { 228 curr = f[j] 229 230 if !curr.ID.BlockStart.Equal(blockStart) { 231 break 232 } 233 234 if curr.HasCompleteCheckpointFile() && curr.ID.VolumeIndex >= bestSoFar.ID.VolumeIndex { 235 bestSoFar = curr 236 bestSoFarExists = true 237 } 238 239 } 240 241 return bestSoFar, bestSoFarExists 242 } 243 } 244 245 return FileSetFile{}, false 246 } 247 248 // VolumeExistsForBlock returns whether there is a valid FileSetFile for the 249 // given block start and volume index. 250 func (f FileSetFilesSlice) VolumeExistsForBlock(blockStart xtime.UnixNano, volume int) bool { 251 for _, curr := range f { 252 if curr.ID.BlockStart.Equal(blockStart) && curr.ID.VolumeIndex == volume { 253 return curr.HasCompleteCheckpointFile() 254 } 255 } 256 257 return false 258 } 259 260 // ignores the index in the FileSetFileIdentifier because fileset files should 261 // always have index 0. 262 func (f FileSetFilesSlice) sortByTimeAscending() { 263 sort.Slice(f, func(i, j int) bool { 264 return f[i].ID.BlockStart.Before(f[j].ID.BlockStart) 265 }) 266 } 267 268 func (f FileSetFilesSlice) sortByTimeAndVolumeIndexAscending() { 269 sort.Slice(f, func(i, j int) bool { 270 if f[i].ID.BlockStart.Equal(f[j].ID.BlockStart) { 271 return f[i].ID.VolumeIndex < f[j].ID.VolumeIndex 272 } 273 274 return f[i].ID.BlockStart.Before(f[j].ID.BlockStart) 275 }) 276 } 277 278 // SnapshotMetadata represents a SnapshotMetadata file, along with its checkpoint file, 279 // as well as all the information contained within the metadata file and paths to the 280 // physical files on disk. 281 type SnapshotMetadata struct { 282 ID SnapshotMetadataIdentifier 283 CommitlogIdentifier persist.CommitLogFile 284 MetadataFilePath string 285 CheckpointFilePath string 286 } 287 288 // AbsoluteFilePaths returns a slice of all the absolute filepaths associated 289 // with a snapshot metadata. 290 func (s SnapshotMetadata) AbsoluteFilePaths() []string { 291 return []string{s.MetadataFilePath, s.CheckpointFilePath} 292 } 293 294 // SnapshotMetadataErrorWithPaths contains an error that occurred while trying to 295 // read a snapshot metadata file, as well as paths for the metadata file path and 296 // the checkpoint file path so that they can be cleaned up. The checkpoint file may 297 // not exist if only the metadata file was written out (due to sudden node failure) 298 // or if the metadata file name was structured incorrectly (should never happen.) 299 type SnapshotMetadataErrorWithPaths struct { 300 Error error 301 MetadataFilePath string 302 CheckpointFilePath string 303 } 304 305 // SnapshotMetadataIdentifier is an identifier for a snapshot metadata file 306 type SnapshotMetadataIdentifier struct { 307 Index int64 308 UUID uuid.UUID 309 } 310 311 // NewFileSetFileIdentifier creates a new FileSetFileIdentifier. 312 func NewFileSetFileIdentifier( 313 namespace ident.ID, 314 blockStart xtime.UnixNano, 315 shard uint32, 316 volumeIndex int, 317 ) FileSetFileIdentifier { 318 return FileSetFileIdentifier{ 319 Namespace: namespace, 320 Shard: shard, 321 BlockStart: blockStart, 322 VolumeIndex: volumeIndex, 323 } 324 } 325 326 // NewFileSetFile creates a new FileSet file 327 func NewFileSetFile(id FileSetFileIdentifier, filePathPrefix string) FileSetFile { 328 return FileSetFile{ 329 ID: id, 330 AbsoluteFilePaths: []string{}, 331 filePathPrefix: filePathPrefix, 332 } 333 } 334 335 func openFiles(opener fileOpener, fds map[string]**os.File) error { 336 var firstErr error 337 for filePath, fdPtr := range fds { 338 fd, err := opener(filePath) 339 if err != nil { 340 firstErr = err 341 break 342 } 343 *fdPtr = fd 344 } 345 346 if firstErr == nil { 347 return nil 348 } 349 350 // If we have encountered an error when opening the files, 351 // close the ones that have been opened. 352 for _, fdPtr := range fds { 353 if *fdPtr != nil { 354 (*fdPtr).Close() 355 } 356 } 357 358 return firstErr 359 } 360 361 // DeleteFiles delete a set of files, returning all the errors encountered during 362 // the deletion process. 363 func DeleteFiles(filePaths []string) error { 364 multiErr := xerrors.NewMultiError() 365 for _, file := range filePaths { 366 if err := os.Remove(file); err != nil { 367 detailedErr := fmt.Errorf("failed to remove file %s: %v", file, err) 368 multiErr = multiErr.Add(detailedErr) 369 } 370 } 371 return multiErr.FinalError() 372 } 373 374 // DeleteDirectories delets a set of directories and its contents, returning all 375 // of the errors encountered during the deletion process. 376 func DeleteDirectories(dirPaths []string) error { 377 multiErr := xerrors.NewMultiError() 378 for _, dir := range dirPaths { 379 if err := os.RemoveAll(dir); err != nil { 380 detailedErr := fmt.Errorf("failed to remove dir %s: %v", dir, err) 381 multiErr = multiErr.Add(detailedErr) 382 } 383 } 384 return multiErr.FinalError() 385 } 386 387 // byTimeAscending sorts files by their block start times in ascending order. 388 // If the files do not have block start times in their names, the result is undefined. 389 type byTimeAscending []string 390 391 func (a byTimeAscending) Len() int { return len(a) } 392 func (a byTimeAscending) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 393 func (a byTimeAscending) Less(i, j int) bool { 394 ti, _ := TimeFromFileName(a[i]) 395 tj, _ := TimeFromFileName(a[j]) 396 return ti.Before(tj) 397 } 398 399 // commitlogsByTimeAndIndexAscending sorts commitlogs by their block start times and index in ascending 400 // order. If the files do not have block start times or indexes in their names, the result is undefined. 401 type commitlogsByTimeAndIndexAscending []string 402 403 func (a commitlogsByTimeAndIndexAscending) Len() int { return len(a) } 404 func (a commitlogsByTimeAndIndexAscending) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 405 func (a commitlogsByTimeAndIndexAscending) Less(i, j int) bool { 406 ti, ii, _ := TimeAndIndexFromCommitlogFilename(a[i]) 407 tj, ij, _ := TimeAndIndexFromCommitlogFilename(a[j]) 408 if ti.Before(tj) { 409 return true 410 } 411 return ti.Equal(tj) && ii < ij 412 } 413 414 // Returns the positions of filename delimiters ('-' and '.') and the number of 415 // delimeters found, to be used in conjunction with the intComponentAtIndex 416 // function to extract filename components. This function is deliberately 417 // optimized for speed and lack of allocations, since allocation-heavy filename 418 // parsing can quickly become a large source of allocations in the entire 419 // system, especially when namespaces with long retentions are configured. 420 func delimiterPositions(baseFilename string) ([maxDelimNum]int, int) { 421 var ( 422 delimPos [maxDelimNum]int 423 delimsFound int 424 ) 425 426 for i := range baseFilename { 427 if r := baseFilename[i]; r == separatorRune || r == fileSuffixDelimeterRune { 428 delimPos[delimsFound] = i 429 delimsFound++ 430 431 if delimsFound == len(delimPos) { 432 // Found the maximum expected number of separators. 433 break 434 } 435 } 436 } 437 438 return delimPos, delimsFound 439 } 440 441 // Returns the the specified component of a filename, given the positions of 442 // delimeters. Our only use cases for this involve extracting numeric 443 // components, so this function assumes this and returns the component as an 444 // int64. 445 func intComponentAtIndex( 446 baseFilename string, 447 componentPos int, 448 delimPos [maxDelimNum]int, 449 ) (xtime.UnixNano, error) { 450 start := 0 451 if componentPos > 0 { 452 start = delimPos[componentPos-1] + 1 453 } 454 end := delimPos[componentPos] 455 if start > end || end > len(baseFilename)-1 || start < 0 { 456 return 0, fmt.Errorf(errUnexpectedFilenamePattern, baseFilename) 457 } 458 459 num, err := strconv.ParseInt(baseFilename[start:end], 10, 64) 460 if err != nil { 461 return 0, fmt.Errorf(errUnexpectedFilenamePattern, baseFilename) 462 } 463 return xtime.UnixNano(num), nil 464 } 465 466 // TimeFromFileName extracts the block start time from file name. 467 func TimeFromFileName(fname string) (xtime.UnixNano, error) { 468 base := filepath.Base(fname) 469 470 delims, delimsFound := delimiterPositions(base) 471 // There technically only needs to be two delimeters here since the time 472 // component is in index 1. However, all DB files have a minimum of three 473 // delimeters, so check for that instead. 474 if delimsFound < 3 { 475 return 0, fmt.Errorf(errUnexpectedFilenamePattern, fname) 476 } 477 nanos, err := intComponentAtIndex(base, timeComponentPosition, delims) 478 if err != nil { 479 return 0, fmt.Errorf(errUnexpectedFilenamePattern, fname) 480 } 481 482 return nanos, nil 483 } 484 485 // TimeAndIndexFromCommitlogFilename extracts the block start and index from 486 // file name for a commitlog. 487 func TimeAndIndexFromCommitlogFilename(fname string) (xtime.UnixNano, int, error) { 488 return timeAndIndexFromFileName(fname, commitLogComponentPosition) 489 } 490 491 // TimeAndVolumeIndexFromDataFileSetFilename extracts the block start and volume 492 // index from a data fileset file name that may or may not have an index. If the 493 // file name does not include an index, unindexedFilesetIndex is returned as the 494 // volume index. 495 func TimeAndVolumeIndexFromDataFileSetFilename(fname string) (xtime.UnixNano, int, error) { 496 base := filepath.Base(fname) 497 498 delims, delimsFound := delimiterPositions(base) 499 if delimsFound < 3 { 500 return 0, 0, fmt.Errorf(errUnexpectedFilenamePattern, fname) 501 } 502 503 nanos, err := intComponentAtIndex(base, timeComponentPosition, delims) 504 if err != nil { 505 return 0, 0, fmt.Errorf(errUnexpectedFilenamePattern, fname) 506 } 507 508 // Legacy filename with no volume index. 509 if delimsFound == 3 { 510 return nanos, unindexedFilesetIndex, nil 511 } 512 513 volume, err := intComponentAtIndex(base, dataFileSetComponentPosition, delims) 514 if err != nil { 515 return 0, 0, fmt.Errorf(errUnexpectedFilenamePattern, fname) 516 } 517 518 return nanos, int(volume), nil 519 } 520 521 // TimeAndVolumeIndexFromFileSetFilename extracts the block start and 522 // volume index from an index file name. 523 func TimeAndVolumeIndexFromFileSetFilename(fname string) (xtime.UnixNano, int, error) { 524 return timeAndIndexFromFileName(fname, indexFileSetComponentPosition) 525 } 526 527 func timeAndIndexFromFileName(fname string, componentPosition int) (xtime.UnixNano, int, error) { 528 base := filepath.Base(fname) 529 530 delims, delimsFound := delimiterPositions(base) 531 if componentPosition > delimsFound { 532 return 0, 0, fmt.Errorf(errUnexpectedFilenamePattern, fname) 533 } 534 535 nanos, err := intComponentAtIndex(base, 1, delims) 536 if err != nil { 537 return 0, 0, fmt.Errorf(errUnexpectedFilenamePattern, fname) 538 } 539 540 index, err := intComponentAtIndex(base, componentPosition, delims) 541 if err != nil { 542 return 0, 0, fmt.Errorf(errUnexpectedFilenamePattern, fname) 543 } 544 545 return nanos, int(index), nil 546 } 547 548 // SnapshotTimeAndID returns the metadata for the snapshot. 549 func SnapshotTimeAndID( 550 filePathPrefix string, id FileSetFileIdentifier) (xtime.UnixNano, uuid.UUID, error) { 551 decoder := msgpack.NewDecoder(nil) 552 return snapshotTimeAndID(filePathPrefix, id, decoder) 553 } 554 555 func snapshotTimeAndID( 556 filePathPrefix string, 557 id FileSetFileIdentifier, 558 decoder *msgpack.Decoder, 559 ) (xtime.UnixNano, uuid.UUID, error) { 560 infoBytes, err := readSnapshotInfoFile(filePathPrefix, id, defaultBufioReaderSize) 561 if err != nil { 562 return 0, nil, fmt.Errorf("error reading snapshot info file: %w", err) 563 } 564 565 decoder.Reset(msgpack.NewByteDecoderStream(infoBytes)) 566 info, err := decoder.DecodeIndexInfo() 567 if err != nil { 568 return 0, nil, fmt.Errorf("error decoding snapshot info file: %w", err) 569 } 570 571 var parsedSnapshotID uuid.UUID 572 err = parsedSnapshotID.UnmarshalBinary(info.SnapshotID) 573 if err != nil { 574 return 0, nil, fmt.Errorf("error parsing snapshot ID from snapshot info file: %w", err) 575 } 576 577 return xtime.UnixNano(info.SnapshotTime), parsedSnapshotID, nil 578 } 579 580 func readSnapshotInfoFile( 581 filePathPrefix string, id FileSetFileIdentifier, readerBufferSize int, 582 ) ([]byte, error) { 583 var ( 584 shardDir = ShardSnapshotsDirPath(filePathPrefix, id.Namespace, id.Shard) 585 checkpointFilePath = FilesetPathFromTimeAndIndex( 586 shardDir, id.BlockStart, id.VolumeIndex, CheckpointFileSuffix, 587 ) 588 digestFilePath = FilesetPathFromTimeAndIndex( 589 shardDir, id.BlockStart, id.VolumeIndex, DigestFileSuffix, 590 ) 591 infoFilePath = FilesetPathFromTimeAndIndex( 592 shardDir, id.BlockStart, id.VolumeIndex, InfoFileSuffix, 593 ) 594 ) 595 596 checkpointFd, err := os.Open(checkpointFilePath) 597 if err != nil { 598 return nil, err 599 } 600 601 // Read digest of digests from the checkpoint file 602 digestBuf := digest.NewBuffer() 603 expectedDigestOfDigest, err := digestBuf.ReadDigestFromFile(checkpointFd) 604 closeErr := checkpointFd.Close() 605 if err != nil { 606 return nil, err 607 } 608 if closeErr != nil { 609 return nil, closeErr 610 } 611 612 // Read and validate the digest file 613 digestData, err := readAndValidate( 614 digestFilePath, readerBufferSize, expectedDigestOfDigest) 615 if err != nil { 616 return nil, err 617 } 618 619 // Read and validate the info file 620 expectedInfoDigest := digest.ToBuffer(digestData).ReadDigest() 621 return readAndValidate( 622 infoFilePath, readerBufferSize, expectedInfoDigest) 623 } 624 625 func readCheckpointFile(filePath string, digestBuf digest.Buffer) (uint32, error) { 626 exists, err := CompleteCheckpointFileExists(filePath) 627 if err != nil { 628 return 0, err 629 } 630 if !exists { 631 return 0, ErrCheckpointFileNotFound 632 } 633 fd, err := os.Open(filePath) 634 if err != nil { 635 return 0, err 636 } 637 defer fd.Close() 638 digest, err := digestBuf.ReadDigestFromFile(fd) 639 if err != nil { 640 return 0, err 641 } 642 643 return digest, nil 644 } 645 646 type forEachInfoFileSelector struct { 647 fileSetType persist.FileSetType 648 contentType persist.FileSetContentType 649 filePathPrefix string 650 namespace ident.ID 651 shard uint32 // shard only applicable for data content type 652 includeCorrupted bool // include corrupted filesets (fail validation) 653 } 654 655 type infoFileFn func(file FileSetFile, infoData []byte, corrupted bool) 656 657 func forEachInfoFile( 658 args forEachInfoFileSelector, 659 readerBufferSize int, 660 fn infoFileFn, 661 ) { 662 matched, err := filesetFiles(filesetFilesSelector{ 663 fileSetType: args.fileSetType, 664 contentType: args.contentType, 665 filePathPrefix: args.filePathPrefix, 666 namespace: args.namespace, 667 shard: args.shard, 668 pattern: filesetFilePattern, 669 }) 670 if err != nil { 671 return 672 } 673 674 var dir string 675 switch args.fileSetType { 676 case persist.FileSetFlushType: 677 switch args.contentType { 678 case persist.FileSetDataContentType: 679 dir = ShardDataDirPath(args.filePathPrefix, args.namespace, args.shard) 680 case persist.FileSetIndexContentType: 681 dir = NamespaceIndexDataDirPath(args.filePathPrefix, args.namespace) 682 default: 683 return 684 } 685 case persist.FileSetSnapshotType: 686 switch args.contentType { 687 case persist.FileSetDataContentType: 688 dir = ShardSnapshotsDirPath(args.filePathPrefix, args.namespace, args.shard) 689 case persist.FileSetIndexContentType: 690 dir = NamespaceIndexSnapshotDirPath(args.filePathPrefix, args.namespace) 691 default: 692 return 693 } 694 default: 695 return 696 } 697 698 maybeIncludeCorrupted := func(corrupted FileSetFile) { 699 if !args.includeCorrupted { 700 return 701 } 702 // NB: We do not want to give up here on error or else we may not clean up 703 // corrupt index filesets. 704 infoFilePath, ok := corrupted.InfoFilePath() 705 if !ok { 706 fn(corrupted, nil, true) 707 return 708 } 709 infoData, err := read(infoFilePath) 710 if err != nil { 711 // NB: If no info data is supplied, we assume that the 712 // info file itself is corrupted. Since this is the 713 // first file written to disk, this should be safe to remove. 714 fn(corrupted, nil, true) 715 return 716 } 717 // NB: We always write an index info file when we begin writing to an index volume 718 // so we are always guaranteed that there's AT LEAST the info file on disk w/ incomplete info. 719 fn(corrupted, infoData, true) 720 } 721 722 var indexDigests index.IndexDigests 723 digestBuf := digest.NewBuffer() 724 for i := range matched { 725 t := matched[i].ID.BlockStart 726 volume := matched[i].ID.VolumeIndex 727 728 var ( 729 checkpointFilePath string 730 digestsFilePath string 731 infoFilePath string 732 ) 733 switch args.fileSetType { 734 case persist.FileSetFlushType: 735 switch args.contentType { 736 case persist.FileSetDataContentType: 737 isLegacy := false 738 if volume == 0 { 739 isLegacy, err = isFirstVolumeLegacy(dir, t, CheckpointFileSuffix) 740 if err != nil { 741 continue 742 } 743 } 744 checkpointFilePath = dataFilesetPathFromTimeAndIndex(dir, t, volume, CheckpointFileSuffix, isLegacy) 745 digestsFilePath = dataFilesetPathFromTimeAndIndex(dir, t, volume, DigestFileSuffix, isLegacy) 746 infoFilePath = dataFilesetPathFromTimeAndIndex(dir, t, volume, InfoFileSuffix, isLegacy) 747 case persist.FileSetIndexContentType: 748 checkpointFilePath = FilesetPathFromTimeAndIndex(dir, t, volume, CheckpointFileSuffix) 749 digestsFilePath = FilesetPathFromTimeAndIndex(dir, t, volume, DigestFileSuffix) 750 infoFilePath = FilesetPathFromTimeAndIndex(dir, t, volume, InfoFileSuffix) 751 } 752 case persist.FileSetSnapshotType: 753 checkpointFilePath = FilesetPathFromTimeAndIndex(dir, t, volume, CheckpointFileSuffix) 754 digestsFilePath = FilesetPathFromTimeAndIndex(dir, t, volume, DigestFileSuffix) 755 infoFilePath = FilesetPathFromTimeAndIndex(dir, t, volume, InfoFileSuffix) 756 } 757 // Read digest of digests from the checkpoint file 758 expectedDigestOfDigest, err := readCheckpointFile(checkpointFilePath, digestBuf) 759 if err != nil { 760 maybeIncludeCorrupted(matched[i]) 761 continue 762 } 763 // Read and validate the digest file 764 digestData, err := readAndValidate(digestsFilePath, readerBufferSize, 765 expectedDigestOfDigest) 766 if err != nil { 767 maybeIncludeCorrupted(matched[i]) 768 continue 769 } 770 771 // Read and validate the info file 772 var expectedInfoDigest uint32 773 switch args.contentType { 774 case persist.FileSetDataContentType: 775 expectedInfoDigest = digest.ToBuffer(digestData).ReadDigest() 776 case persist.FileSetIndexContentType: 777 if err := indexDigests.Unmarshal(digestData); err != nil { 778 maybeIncludeCorrupted(matched[i]) 779 continue 780 } 781 expectedInfoDigest = indexDigests.GetInfoDigest() 782 } 783 784 infoData, err := readAndValidate(infoFilePath, readerBufferSize, 785 expectedInfoDigest) 786 if err != nil { 787 maybeIncludeCorrupted(matched[i]) 788 continue 789 } 790 // Guarantee that every matched fileset has an info file. 791 if _, ok := matched[i].InfoFilePath(); !ok { 792 maybeIncludeCorrupted(matched[i]) 793 continue 794 } 795 796 fn(matched[i], infoData, false) 797 } 798 } 799 800 // ReadInfoFileResult is the result of reading an info file 801 type ReadInfoFileResult struct { 802 Info schema.IndexInfo 803 Err ReadInfoFileResultError 804 } 805 806 // ReadInfoFileResultError is the interface for obtaining information about an error 807 // that occurred trying to read an info file 808 type ReadInfoFileResultError interface { 809 Error() error 810 Filepath() string 811 } 812 813 type readInfoFileResultError struct { 814 err error 815 filepath string 816 } 817 818 // Error returns the error that occurred reading the info file 819 func (r readInfoFileResultError) Error() error { 820 return r.err 821 } 822 823 // FilePath returns the filepath for the problematic file 824 func (r readInfoFileResultError) Filepath() string { 825 return r.filepath 826 } 827 828 // ReadInfoFiles reads all the valid info entries. Even if ReadInfoFiles returns an error, 829 // there may be some valid entries in the returned slice. 830 func ReadInfoFiles( 831 filePathPrefix string, 832 namespace ident.ID, 833 shard uint32, 834 readerBufferSize int, 835 decodingOpts msgpack.DecodingOptions, 836 fileSetType persist.FileSetType, 837 ) []ReadInfoFileResult { 838 var infoFileResults []ReadInfoFileResult 839 decoder := msgpack.NewDecoder(decodingOpts) 840 forEachInfoFile( 841 forEachInfoFileSelector{ 842 fileSetType: fileSetType, 843 contentType: persist.FileSetDataContentType, 844 filePathPrefix: filePathPrefix, 845 namespace: namespace, 846 shard: shard, 847 }, 848 readerBufferSize, 849 func(file FileSetFile, data []byte, _ bool) { 850 filePath, _ := file.InfoFilePath() 851 decoder.Reset(msgpack.NewByteDecoderStream(data)) 852 info, err := decoder.DecodeIndexInfo() 853 infoFileResults = append(infoFileResults, ReadInfoFileResult{ 854 Info: info, 855 Err: readInfoFileResultError{ 856 err: err, 857 filepath: filePath, 858 }, 859 }) 860 }) 861 return infoFileResults 862 } 863 864 // ReadIndexInfoFilesOptions specifies options for reading index info files. 865 type ReadIndexInfoFilesOptions struct { 866 FilePathPrefix string 867 Namespace ident.ID 868 ReaderBufferSize int 869 IncludeCorrupted bool 870 } 871 872 // ReadIndexInfoFileResult is the result of reading an info file 873 type ReadIndexInfoFileResult struct { 874 ID FileSetFileIdentifier 875 Info index.IndexVolumeInfo 876 AbsoluteFilePaths []string 877 Err ReadInfoFileResultError 878 Corrupted bool 879 } 880 881 // ReadIndexInfoFiles reads all the valid index info entries. Even if ReadIndexInfoFiles returns an error, 882 // there may be some valid entries in the returned slice. 883 func ReadIndexInfoFiles(opts ReadIndexInfoFilesOptions) []ReadIndexInfoFileResult { 884 var infoFileResults []ReadIndexInfoFileResult 885 forEachInfoFile( 886 forEachInfoFileSelector{ 887 fileSetType: persist.FileSetFlushType, 888 contentType: persist.FileSetIndexContentType, 889 filePathPrefix: opts.FilePathPrefix, 890 namespace: opts.Namespace, 891 includeCorrupted: opts.IncludeCorrupted, 892 }, 893 opts.ReaderBufferSize, 894 func(file FileSetFile, data []byte, corrupted bool) { 895 filepath, _ := file.InfoFilePath() 896 id := file.ID 897 var info index.IndexVolumeInfo 898 err := info.Unmarshal(data) 899 infoFileResults = append(infoFileResults, ReadIndexInfoFileResult{ 900 ID: id, 901 Info: info, 902 AbsoluteFilePaths: file.AbsoluteFilePaths, 903 Err: readInfoFileResultError{ 904 err: err, 905 filepath: filepath, 906 }, 907 Corrupted: corrupted, 908 }) 909 }) 910 return infoFileResults 911 } 912 913 // SortedSnapshotMetadataFiles returns a slice of all the SnapshotMetadata files that are on disk, as well 914 // as any files that it encountered errors for (corrupt, missing checkpoints, etc) which facilitates 915 // cleanup of corrupt files. []SnapshotMetadata will be sorted by index (i.e the chronological order 916 // in which the snapshots were taken), but []SnapshotMetadataErrorWithPaths will not be in any particular 917 // order. 918 func SortedSnapshotMetadataFiles(opts Options) ( 919 []SnapshotMetadata, []SnapshotMetadataErrorWithPaths, error) { 920 var ( 921 prefix = opts.FilePathPrefix() 922 snapshotsDirPath = SnapshotDirPath(prefix) 923 ) 924 925 // Glob for metadata files directly instead of their checkpoint files. 926 // In the happy case this makes no difference, but in situations where 927 // the metadata file exists but the checkpoint file does not (due to sudden 928 // node failure) this strategy allows us to still cleanup the metadata file 929 // whereas if we looked for checkpoint files directly the dangling metadata 930 // file would hang around forever. 931 metadataFilePaths, err := filepath.Glob( 932 path.Join( 933 snapshotsDirPath, 934 fmt.Sprintf("*%s%s%s", separator, metadataFileSuffix, fileSuffix))) 935 if err != nil { 936 return nil, nil, err 937 } 938 939 var ( 940 reader = NewSnapshotMetadataReader(opts) 941 metadatas = []SnapshotMetadata{} 942 errorsWithPaths = []SnapshotMetadataErrorWithPaths{} 943 ) 944 for _, file := range metadataFilePaths { 945 id, err := snapshotMetadataIdentifierFromFilePath(file) 946 if err != nil { 947 errorsWithPaths = append(errorsWithPaths, SnapshotMetadataErrorWithPaths{ 948 Error: err, 949 MetadataFilePath: file, 950 // Can't construct checkpoint file path without ID 951 }) 952 continue 953 } 954 955 if file != snapshotMetadataFilePathFromIdentifier(prefix, id) { 956 // Should never happen 957 errorsWithPaths = append(errorsWithPaths, SnapshotMetadataErrorWithPaths{ 958 Error: instrument.InvariantErrorf( 959 "actual snapshot metadata filepath: %s and generated filepath: %s do not match", 960 file, snapshotMetadataFilePathFromIdentifier(prefix, id)), 961 MetadataFilePath: file, 962 CheckpointFilePath: snapshotMetadataCheckpointFilePathFromIdentifier(prefix, id), 963 }) 964 continue 965 } 966 967 metadata, err := reader.Read(id) 968 if err != nil { 969 errorsWithPaths = append(errorsWithPaths, SnapshotMetadataErrorWithPaths{ 970 Error: err, 971 MetadataFilePath: file, 972 CheckpointFilePath: snapshotMetadataCheckpointFilePathFromIdentifier(prefix, id), 973 }) 974 continue 975 } 976 977 metadatas = append(metadatas, metadata) 978 } 979 980 sort.Slice(metadatas, func(i, j int) bool { 981 return metadatas[i].ID.Index < metadatas[j].ID.Index 982 }) 983 return metadatas, errorsWithPaths, nil 984 } 985 986 // DataFiles returns a slice of all the names for all the fileset files 987 // for a given namespace and shard combination. 988 func DataFiles(filePathPrefix string, namespace ident.ID, shard uint32) (FileSetFilesSlice, error) { 989 return filesetFiles(filesetFilesSelector{ 990 fileSetType: persist.FileSetFlushType, 991 contentType: persist.FileSetDataContentType, 992 filePathPrefix: filePathPrefix, 993 namespace: namespace, 994 shard: shard, 995 pattern: filesetFilePattern, 996 }) 997 } 998 999 // SnapshotFiles returns a slice of all the names for all the snapshot files 1000 // for a given namespace and shard combination. 1001 func SnapshotFiles(filePathPrefix string, namespace ident.ID, shard uint32) (FileSetFilesSlice, error) { 1002 return filesetFiles(filesetFilesSelector{ 1003 fileSetType: persist.FileSetSnapshotType, 1004 contentType: persist.FileSetDataContentType, 1005 filePathPrefix: filePathPrefix, 1006 namespace: namespace, 1007 shard: shard, 1008 pattern: filesetFilePattern, 1009 }) 1010 } 1011 1012 // IndexSnapshotFiles returns a slice of all the names for all the index fileset files 1013 // for a given namespace. 1014 func IndexSnapshotFiles(filePathPrefix string, namespace ident.ID) (FileSetFilesSlice, error) { 1015 return filesetFiles(filesetFilesSelector{ 1016 fileSetType: persist.FileSetSnapshotType, 1017 contentType: persist.FileSetIndexContentType, 1018 filePathPrefix: filePathPrefix, 1019 namespace: namespace, 1020 pattern: filesetFilePattern, 1021 }) 1022 } 1023 1024 // FileSetAt returns a FileSetFile for the given namespace/shard/blockStart/volume combination if it exists. 1025 func FileSetAt( 1026 filePathPrefix string, 1027 namespace ident.ID, 1028 shard uint32, 1029 blockStart xtime.UnixNano, 1030 volume int, 1031 ) (FileSetFile, bool, error) { 1032 var pattern string 1033 // If this is the initial volume, then we need to check if files were written with the legacy file naming (i.e. 1034 // without the volume index) so that we can properly locate the fileset. 1035 if volume == 0 { 1036 dir := ShardDataDirPath(filePathPrefix, namespace, shard) 1037 isLegacy, err := isFirstVolumeLegacy(dir, blockStart, CheckpointFileSuffix) 1038 // NB(nate): don't propagate ErrCheckpointFileNotFound here as expectation is to simply return an 1039 // empty FileSetFile if files do not exist. 1040 if err == ErrCheckpointFileNotFound { 1041 return FileSetFile{}, false, nil 1042 } else if err != nil && err != ErrCheckpointFileNotFound { 1043 return FileSetFile{}, false, err 1044 } 1045 1046 if isLegacy { 1047 pattern = filesetFileForTime(blockStart, anyLowerCaseCharsPattern) 1048 } 1049 } 1050 1051 if len(pattern) == 0 { 1052 pattern = filesetFileForTimeAndVolumeIndex(blockStart, volume, anyLowerCaseCharsPattern) 1053 } 1054 1055 matched, err := filesetFiles(filesetFilesSelector{ 1056 fileSetType: persist.FileSetFlushType, 1057 contentType: persist.FileSetDataContentType, 1058 filePathPrefix: filePathPrefix, 1059 namespace: namespace, 1060 shard: shard, 1061 pattern: pattern, 1062 }) 1063 if err != nil { 1064 return FileSetFile{}, false, err 1065 } 1066 1067 matched.sortByTimeAndVolumeIndexAscending() 1068 for i, fileset := range matched { 1069 if fileset.ID.BlockStart.Equal(blockStart) && fileset.ID.VolumeIndex == volume { 1070 nextIdx := i + 1 1071 if nextIdx < len(matched) && matched[nextIdx].ID.BlockStart.Equal(blockStart) { 1072 // Should never happen. 1073 return FileSetFile{}, false, fmt.Errorf( 1074 "found multiple fileset files for blockStart: %d", blockStart.Seconds(), 1075 ) 1076 } 1077 1078 if !fileset.HasCompleteCheckpointFile() { 1079 continue 1080 } 1081 1082 return fileset, true, nil 1083 } 1084 } 1085 1086 return FileSetFile{}, false, nil 1087 } 1088 1089 // IndexFileSetsAt returns all FileSetFile(s) for the given 1090 // namespace/blockStart combination. 1091 // NB: It returns all complete Volumes found on disk. 1092 func IndexFileSetsAt( 1093 filePathPrefix string, namespace ident.ID, blockStart xtime.UnixNano, 1094 ) (FileSetFilesSlice, error) { 1095 matches, err := filesetFiles(filesetFilesSelector{ 1096 fileSetType: persist.FileSetFlushType, 1097 contentType: persist.FileSetIndexContentType, 1098 filePathPrefix: filePathPrefix, 1099 namespace: namespace, 1100 pattern: filesetFileForTime(blockStart, anyLowerCaseCharsNumbersPattern), 1101 }) 1102 if err != nil { 1103 return nil, err 1104 } 1105 1106 filesets := make(FileSetFilesSlice, 0, len(matches)) 1107 matches.sortByTimeAscending() 1108 for _, fileset := range matches { 1109 if fileset.ID.BlockStart.Equal(blockStart) { 1110 if !fileset.HasCompleteCheckpointFile() { 1111 continue 1112 } 1113 filesets = append(filesets, fileset) 1114 } 1115 } 1116 1117 return filesets, nil 1118 } 1119 1120 // DeleteFileSetAt deletes a FileSetFile for a given 1121 // namespace/shard/blockStart/volume combination if it exists. 1122 func DeleteFileSetAt( 1123 filePathPrefix string, 1124 namespace ident.ID, 1125 shard uint32, 1126 blockStart xtime.UnixNano, 1127 volume int, 1128 ) error { 1129 fileset, ok, err := FileSetAt(filePathPrefix, namespace, shard, blockStart, volume) 1130 if err != nil { 1131 return err 1132 } 1133 if !ok { 1134 return fmt.Errorf("fileset for blockStart: %d does not exist", blockStart.Seconds()) 1135 } 1136 1137 return DeleteFiles(fileset.AbsoluteFilePaths) 1138 } 1139 1140 // DataFileSetsBefore returns all the flush data fileset paths whose 1141 // timestamps are earlier than a given time. 1142 func DataFileSetsBefore( 1143 filePathPrefix string, namespace ident.ID, shard uint32, t xtime.UnixNano, 1144 ) ([]string, error) { 1145 matched, err := filesetFiles(filesetFilesSelector{ 1146 fileSetType: persist.FileSetFlushType, 1147 contentType: persist.FileSetDataContentType, 1148 filePathPrefix: filePathPrefix, 1149 namespace: namespace, 1150 shard: shard, 1151 pattern: filesetFilePattern, 1152 }) 1153 if err != nil { 1154 return nil, err 1155 } 1156 return FilesBefore(matched.Filepaths(), t) 1157 } 1158 1159 // IndexFileSetsBefore returns all the flush index fileset paths whose timestamps are earlier than a given time. 1160 func IndexFileSetsBefore(filePathPrefix string, namespace ident.ID, t xtime.UnixNano) ([]string, error) { 1161 matched, err := filesetFiles(filesetFilesSelector{ 1162 fileSetType: persist.FileSetFlushType, 1163 contentType: persist.FileSetIndexContentType, 1164 filePathPrefix: filePathPrefix, 1165 namespace: namespace, 1166 pattern: filesetFilePattern, 1167 }) 1168 if err != nil { 1169 return nil, err 1170 } 1171 return FilesBefore(matched.Filepaths(), t) 1172 } 1173 1174 // DeleteInactiveDirectories deletes any directories that are not currently active, as defined by the 1175 // inputed active directories within the parent directory 1176 func DeleteInactiveDirectories(parentDirectoryPath string, activeDirectories []string) error { 1177 var toDelete []string 1178 activeDirNames := make(map[string]struct{}) 1179 allSubDirs, err := findSubDirectoriesAndPaths(parentDirectoryPath) 1180 if err != nil { 1181 return nil 1182 } 1183 1184 // Create shard set, might also be useful to just send in as strings? 1185 for _, dir := range activeDirectories { 1186 activeDirNames[dir] = struct{}{} 1187 } 1188 1189 for dirName, dirPath := range allSubDirs { 1190 if _, ok := activeDirNames[dirName]; !ok { 1191 toDelete = append(toDelete, dirPath) 1192 } 1193 } 1194 return DeleteDirectories(toDelete) 1195 } 1196 1197 // SortedCommitLogFiles returns all the commit log files in the commit logs directory. 1198 func SortedCommitLogFiles(commitLogsDir string) ([]string, error) { 1199 return sortedCommitLogFiles(commitLogsDir, commitLogFilePattern) 1200 } 1201 1202 type filesetFile struct { 1203 volumeIndex int 1204 blockStart xtime.UnixNano 1205 fileName string 1206 } 1207 1208 type toSortableFn func(files []string) sort.Interface 1209 type toBlockStartAndVolumeIndexFn func(file string) (xtime.UnixNano, int, error) 1210 type sortedFilesetFiles []filesetFile 1211 1212 func (s sortedFilesetFiles) Len() int { 1213 return len(s) 1214 } 1215 1216 func (s sortedFilesetFiles) Less(i, j int) bool { 1217 iStart := s[i].blockStart 1218 jStart := s[j].blockStart 1219 1220 if iStart.Before(jStart) { 1221 return true 1222 } 1223 1224 jVolume := s[j].volumeIndex 1225 iVolume := s[i].volumeIndex 1226 return iStart.Equal(jStart) && iVolume < jVolume 1227 } 1228 1229 func (s sortedFilesetFiles) Swap(i, j int) { 1230 s[i], s[j] = s[j], s[i] 1231 } 1232 1233 func findSortedFilesetFiles( 1234 fileDir string, pattern string, 1235 fn toBlockStartAndVolumeIndexFn, 1236 ) (sortedFilesetFiles, error) { 1237 matched, err := filepath.Glob(path.Join(fileDir, pattern)) 1238 if err != nil { 1239 return nil, err 1240 } 1241 if len(matched) == 0 { 1242 return nil, nil 1243 } 1244 result := make([]filesetFile, len(matched)) 1245 for i, file := range matched { 1246 blockStart, volume, err := fn(file) 1247 if err != nil { 1248 return nil, err 1249 } 1250 1251 result[i] = filesetFile{ 1252 fileName: file, 1253 blockStart: blockStart, 1254 volumeIndex: volume, 1255 } 1256 } 1257 1258 sort.Sort(sortedFilesetFiles(result)) 1259 return result, nil 1260 } 1261 1262 func findFiles(fileDir string, pattern string, fn toSortableFn) ([]string, error) { 1263 matched, err := filepath.Glob(path.Join(fileDir, pattern)) 1264 if err != nil { 1265 return nil, err 1266 } 1267 sort.Sort(fn(matched)) 1268 return matched, nil 1269 } 1270 1271 type directoryNamesToPaths map[string]string 1272 1273 func findSubDirectoriesAndPaths(directoryPath string) (directoryNamesToPaths, error) { 1274 parent, err := os.Open(directoryPath) 1275 if err != nil { 1276 return nil, err 1277 } 1278 1279 subDirectoriesToPaths := make(directoryNamesToPaths) 1280 subDirNames, err := parent.Readdirnames(-1) 1281 if err != nil { 1282 return nil, err 1283 } 1284 1285 err = parent.Close() 1286 if err != nil { 1287 return nil, err 1288 } 1289 1290 for _, dirName := range subDirNames { 1291 subDirectoriesToPaths[dirName] = path.Join(directoryPath, dirName) 1292 } 1293 return subDirectoriesToPaths, nil 1294 } 1295 1296 type filesetFilesSelector struct { 1297 fileSetType persist.FileSetType 1298 contentType persist.FileSetContentType 1299 filePathPrefix string 1300 namespace ident.ID 1301 shard uint32 1302 pattern string 1303 } 1304 1305 func filesetFiles(args filesetFilesSelector) (FileSetFilesSlice, error) { 1306 var ( 1307 byTimeAsc sortedFilesetFiles 1308 err error 1309 ) 1310 switch args.fileSetType { 1311 case persist.FileSetFlushType: 1312 switch args.contentType { 1313 case persist.FileSetDataContentType: 1314 dir := ShardDataDirPath(args.filePathPrefix, args.namespace, args.shard) 1315 byTimeAsc, err = findSortedFilesetFiles(dir, args.pattern, TimeAndVolumeIndexFromDataFileSetFilename) 1316 case persist.FileSetIndexContentType: 1317 dir := NamespaceIndexDataDirPath(args.filePathPrefix, args.namespace) 1318 byTimeAsc, err = findSortedFilesetFiles(dir, args.pattern, TimeAndVolumeIndexFromFileSetFilename) 1319 default: 1320 return nil, fmt.Errorf("unknown content type: %d", args.contentType) 1321 } 1322 case persist.FileSetSnapshotType: 1323 var dir string 1324 switch args.contentType { 1325 case persist.FileSetDataContentType: 1326 dir = ShardSnapshotsDirPath(args.filePathPrefix, args.namespace, args.shard) 1327 case persist.FileSetIndexContentType: 1328 dir = NamespaceIndexSnapshotDirPath(args.filePathPrefix, args.namespace) 1329 default: 1330 return nil, fmt.Errorf("unknown content type: %d", args.contentType) 1331 } 1332 byTimeAsc, err = findSortedFilesetFiles(dir, args.pattern, TimeAndVolumeIndexFromFileSetFilename) 1333 default: 1334 return nil, fmt.Errorf("unknown type: %d", args.fileSetType) 1335 } 1336 if err != nil { 1337 return nil, err 1338 } 1339 1340 if len(byTimeAsc) == 0 { 1341 return nil, nil 1342 } 1343 1344 var ( 1345 latestBlockStart xtime.UnixNano 1346 latestVolumeIndex int 1347 latestFileSetFile FileSetFile 1348 filesetFiles = []FileSetFile{} 1349 ) 1350 for _, file := range byTimeAsc { 1351 if latestBlockStart == 0 { 1352 latestFileSetFile = NewFileSetFile(FileSetFileIdentifier{ 1353 Namespace: args.namespace, 1354 BlockStart: file.blockStart, 1355 Shard: args.shard, 1356 VolumeIndex: file.volumeIndex, 1357 }, args.filePathPrefix) 1358 } else if !file.blockStart.Equal(latestBlockStart) || latestVolumeIndex != file.volumeIndex { 1359 filesetFiles = append(filesetFiles, latestFileSetFile) 1360 latestFileSetFile = NewFileSetFile(FileSetFileIdentifier{ 1361 Namespace: args.namespace, 1362 BlockStart: file.blockStart, 1363 Shard: args.shard, 1364 VolumeIndex: file.volumeIndex, 1365 }, args.filePathPrefix) 1366 } 1367 1368 latestBlockStart = file.blockStart 1369 latestVolumeIndex = file.volumeIndex 1370 1371 latestFileSetFile.AbsoluteFilePaths = append(latestFileSetFile.AbsoluteFilePaths, file.fileName) 1372 } 1373 1374 filesetFiles = append(filesetFiles, latestFileSetFile) 1375 return filesetFiles, nil 1376 } 1377 1378 func sortedCommitLogFiles(commitLogsDir string, pattern string) ([]string, error) { 1379 return findFiles(commitLogsDir, pattern, func(files []string) sort.Interface { 1380 return commitlogsByTimeAndIndexAscending(files) 1381 }) 1382 } 1383 1384 // FilesBefore filters the list of files down to those whose name indicate they are 1385 // before a given time period. Mutates the provided slice. 1386 func FilesBefore(files []string, t xtime.UnixNano) ([]string, error) { 1387 var ( 1388 j int 1389 multiErr xerrors.MultiError 1390 ) 1391 // Matched files are sorted by their timestamps in ascending order. 1392 for i := range files { 1393 ft, err := TimeFromFileName(files[i]) 1394 if err != nil { 1395 multiErr = multiErr.Add(err) 1396 continue 1397 } 1398 if !ft.Before(t) { 1399 break 1400 } 1401 files[j] = files[i] 1402 j++ 1403 } 1404 return files[:j], multiErr.FinalError() 1405 } 1406 1407 func readAndValidate( 1408 filePath string, 1409 readerBufferSize int, 1410 expectedDigest uint32, 1411 ) ([]byte, error) { 1412 fd, err := os.Open(filePath) 1413 if err != nil { 1414 return nil, err 1415 } 1416 defer fd.Close() 1417 1418 buf, err := bufferForEntireFile(filePath) 1419 if err != nil { 1420 return nil, err 1421 } 1422 1423 fwd := digest.NewFdWithDigestReader(readerBufferSize) 1424 fwd.Reset(fd) 1425 n, err := fwd.ReadAllAndValidate(buf, expectedDigest) 1426 if err != nil { 1427 return nil, err 1428 } 1429 return buf[:n], nil 1430 } 1431 1432 func read(filePath string) ([]byte, error) { 1433 fd, err := os.Open(filePath) //nolint:gosec 1434 if err != nil { 1435 return nil, err 1436 } 1437 defer fd.Close() //nolint:errcheck,gosec 1438 1439 buf, err := bufferForEntireFile(filePath) 1440 if err != nil { 1441 return nil, err 1442 } 1443 1444 n, err := fd.Read(buf) 1445 if err != nil { 1446 return nil, err 1447 } 1448 return buf[:n], nil 1449 } 1450 1451 func bufferForEntireFile(filePath string) ([]byte, error) { 1452 stat, err := os.Stat(filePath) 1453 if err != nil { 1454 return nil, err 1455 } 1456 1457 size := int(stat.Size()) 1458 buf := make([]byte, size) 1459 return buf, nil 1460 } 1461 1462 // DataDirPath returns the path to the data directory belonging to a db 1463 func DataDirPath(prefix string) string { 1464 return path.Join(prefix, dataDirName) 1465 } 1466 1467 // IndexDataDirPath returns the path to the index data directory belonging to a db 1468 func IndexDataDirPath(prefix string) string { 1469 return path.Join(prefix, indexDirName, dataDirName) 1470 } 1471 1472 // SnapshotDirPath returns the path to the snapshot directory belong to a db 1473 func SnapshotDirPath(prefix string) string { 1474 return path.Join(prefix, snapshotDirName) 1475 } 1476 1477 // NamespaceDataDirPath returns the path to the data directory for a given namespace. 1478 func NamespaceDataDirPath(prefix string, namespace ident.ID) string { 1479 return path.Join(prefix, dataDirName, namespace.String()) 1480 } 1481 1482 // NamespaceSnapshotsDirPath returns the path to the snapshots directory for a given namespace. 1483 func NamespaceSnapshotsDirPath(prefix string, namespace ident.ID) string { 1484 return path.Join(SnapshotsDirPath(prefix), namespace.String()) 1485 } 1486 1487 // NamespaceIndexDataDirPath returns the path to the data directory for a given namespace. 1488 func NamespaceIndexDataDirPath(prefix string, namespace ident.ID) string { 1489 return path.Join(prefix, indexDirName, dataDirName, namespace.String()) 1490 } 1491 1492 // NamespaceIndexSnapshotDirPath returns the path to the data directory for a given namespace. 1493 func NamespaceIndexSnapshotDirPath(prefix string, namespace ident.ID) string { 1494 return path.Join(prefix, indexDirName, snapshotDirName, namespace.String()) 1495 } 1496 1497 // SnapshotsDirPath returns the path to the snapshots directory. 1498 func SnapshotsDirPath(prefix string) string { 1499 return path.Join(prefix, snapshotDirName) 1500 } 1501 1502 // ShardDataDirPath returns the path to the data directory for a given shard. 1503 func ShardDataDirPath(prefix string, namespace ident.ID, shard uint32) string { 1504 namespacePath := NamespaceDataDirPath(prefix, namespace) 1505 return path.Join(namespacePath, strconv.Itoa(int(shard))) 1506 } 1507 1508 // ShardSnapshotsDirPath returns the path to the snapshots directory for a given shard. 1509 func ShardSnapshotsDirPath(prefix string, namespace ident.ID, shard uint32) string { 1510 namespacePath := NamespaceSnapshotsDirPath(prefix, namespace) 1511 return path.Join(namespacePath, strconv.Itoa(int(shard))) 1512 } 1513 1514 // CommitLogsDirPath returns the path to commit logs. 1515 func CommitLogsDirPath(prefix string) string { 1516 return path.Join(prefix, commitLogsDirName) 1517 } 1518 1519 // DataFileSetExists determines whether data fileset files exist for the given 1520 // namespace, shard, block start, and volume. 1521 func DataFileSetExists( 1522 filePathPrefix string, 1523 namespace ident.ID, 1524 shard uint32, 1525 blockStart xtime.UnixNano, 1526 volume int, 1527 ) (bool, error) { 1528 // This function can easily become a performance bottleneck if the 1529 // implementation is slow or requires scanning directories with a large 1530 // number of files in them (as is common if namespaces with long retentions 1531 // are configured). As a result, instead of using existing helper functions, 1532 // it implements an optimized code path that only involves checking if a few 1533 // specific files exist and contain the correct contents. 1534 shardDir := ShardDataDirPath(filePathPrefix, namespace, shard) 1535 1536 // Check fileset with volume first to optimize for non-legacy use case. 1537 checkpointPath := FilesetPathFromTimeAndIndex(shardDir, blockStart, volume, CheckpointFileSuffix) 1538 exists, err := CompleteCheckpointFileExists(checkpointPath) 1539 if err == nil && exists { 1540 return true, nil 1541 } 1542 1543 if volume != 0 { 1544 // Only check for legacy file path if volume is 0. 1545 return false, nil 1546 } 1547 1548 checkpointPath = filesetPathFromTimeLegacy(shardDir, blockStart, CheckpointFileSuffix) 1549 return CompleteCheckpointFileExists(checkpointPath) 1550 } 1551 1552 // SnapshotFileSetExistsAt determines whether snapshot fileset files exist for 1553 // the given namespace, shard, and block start time. 1554 func SnapshotFileSetExistsAt( 1555 prefix string, 1556 namespace ident.ID, 1557 snapshotID uuid.UUID, 1558 shard uint32, 1559 blockStart xtime.UnixNano, 1560 ) (bool, error) { 1561 snapshotFiles, err := SnapshotFiles(prefix, namespace, shard) 1562 if err != nil { 1563 return false, err 1564 } 1565 1566 latest, ok := snapshotFiles.LatestVolumeForBlock(blockStart) 1567 if !ok { 1568 return false, nil 1569 } 1570 1571 _, latestSnapshotID, err := latest.SnapshotTimeAndID() 1572 if err != nil { 1573 return false, err 1574 } 1575 1576 if !uuid.Equal(latestSnapshotID, snapshotID) { 1577 return false, nil 1578 } 1579 1580 // LatestVolumeForBlock checks for a complete checkpoint file, so we don't 1581 // need to recheck it here. 1582 return true, nil 1583 } 1584 1585 // NextSnapshotMetadataFileIndex returns the next snapshot metadata file index. 1586 func NextSnapshotMetadataFileIndex(opts Options) (int64, error) { 1587 // We can ignore any SnapshotMetadataErrorsWithpaths that are returned because even if a corrupt 1588 // snapshot metadata file exists with the next index that we want to return from this function, 1589 // every snapshot metadata has its own UUID so there will never be a collision with a corrupt file 1590 // anyways and we can ignore them entirely when considering what the next index should be. 1591 snapshotMetadataFiles, _, err := SortedSnapshotMetadataFiles(opts) 1592 if err != nil { 1593 return 0, err 1594 } 1595 1596 if len(snapshotMetadataFiles) == 0 { 1597 return 0, nil 1598 } 1599 1600 lastSnapshotMetadataFile := snapshotMetadataFiles[len(snapshotMetadataFiles)-1] 1601 return lastSnapshotMetadataFile.ID.Index + 1, nil 1602 } 1603 1604 // NextSnapshotFileSetVolumeIndex returns the next snapshot file set index for a given 1605 // namespace/shard/blockStart combination. 1606 func NextSnapshotFileSetVolumeIndex( 1607 filePathPrefix string, namespace ident.ID, shard uint32, blockStart xtime.UnixNano, 1608 ) (int, error) { 1609 snapshotFiles, err := SnapshotFiles(filePathPrefix, namespace, shard) 1610 if err != nil { 1611 return -1, err 1612 } 1613 1614 latestFile, ok := snapshotFiles.LatestVolumeForBlock(blockStart) 1615 if !ok { 1616 return 0, nil 1617 } 1618 1619 return latestFile.ID.VolumeIndex + 1, nil 1620 } 1621 1622 // NextIndexFileSetVolumeIndex returns the next index file set index for a given 1623 // namespace/blockStart combination. 1624 func NextIndexFileSetVolumeIndex( 1625 filePathPrefix string, namespace ident.ID, blockStart xtime.UnixNano, 1626 ) (int, error) { 1627 files, err := filesetFiles(filesetFilesSelector{ 1628 fileSetType: persist.FileSetFlushType, 1629 contentType: persist.FileSetIndexContentType, 1630 filePathPrefix: filePathPrefix, 1631 namespace: namespace, 1632 pattern: filesetFileForTime(blockStart, anyLowerCaseCharsNumbersPattern), 1633 }) 1634 if err != nil { 1635 return -1, err 1636 } 1637 1638 latestFile, ok := files.LatestVolumeForBlock(blockStart) 1639 if !ok { 1640 return 0, nil 1641 } 1642 1643 return latestFile.ID.VolumeIndex + 1, nil 1644 } 1645 1646 // NextIndexSnapshotFileIndex returns the next snapshot file index for a given 1647 // namespace/shard/blockStart combination. 1648 func NextIndexSnapshotFileIndex( 1649 filePathPrefix string, namespace ident.ID, blockStart xtime.UnixNano, 1650 ) (int, error) { 1651 snapshotFiles, err := IndexSnapshotFiles(filePathPrefix, namespace) 1652 if err != nil { 1653 return -1, err 1654 } 1655 1656 currentSnapshotIndex := -1 1657 for _, snapshot := range snapshotFiles { 1658 if snapshot.ID.BlockStart.Equal(blockStart) { 1659 currentSnapshotIndex = snapshot.ID.VolumeIndex 1660 break 1661 } 1662 } 1663 1664 return currentSnapshotIndex + 1, nil 1665 } 1666 1667 // CompleteCheckpointFileExists returns whether a checkpoint file exists, and if so, 1668 // is it complete. 1669 func CompleteCheckpointFileExists(filePath string) (bool, error) { 1670 if !strings.Contains(filePath, CheckpointFileSuffix) { 1671 return false, instrument.InvariantErrorf( 1672 "tried to use CompleteCheckpointFileExists to verify existence of non checkpoint file: %s", 1673 filePath, 1674 ) 1675 } 1676 1677 f, err := os.Stat(filePath) 1678 if err != nil { 1679 if os.IsNotExist(err) { 1680 return false, nil 1681 } 1682 return false, err 1683 } 1684 1685 // Make sure the checkpoint file was completely written out and its 1686 // not just an empty file. 1687 return f.Size() == CheckpointFileSizeBytes, nil 1688 } 1689 1690 // FileExists returns whether a file at the given path exists. 1691 func FileExists(filePath string) (bool, error) { 1692 if strings.Contains(filePath, CheckpointFileSuffix) { 1693 // Existence of a checkpoint file needs to be verified using the function 1694 // CompleteCheckpointFileExists instead to ensure that it has been 1695 // completely written out. 1696 return false, instrument.InvariantErrorf( 1697 "tried to use FileExists to verify existence of checkpoint file: %s", 1698 filePath, 1699 ) 1700 } 1701 1702 _, err := os.Stat(filePath) 1703 if err != nil { 1704 if os.IsNotExist(err) { 1705 return false, nil 1706 } 1707 1708 return false, err 1709 } 1710 1711 return true, nil 1712 } 1713 1714 // OpenWritable opens a file for writing and truncating as necessary. 1715 func OpenWritable(filePath string, perm os.FileMode) (*os.File, error) { 1716 return os.OpenFile(filePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, perm) 1717 } 1718 1719 // CommitLogFilePath returns the path for a commitlog file. 1720 func CommitLogFilePath(prefix string, index int) string { 1721 var ( 1722 entry = fmt.Sprintf("%d%s%d", 0, separator, index) 1723 fileName = fmt.Sprintf("%s%s%s%s", commitLogFilePrefix, separator, entry, fileSuffix) 1724 filePath = path.Join(CommitLogsDirPath(prefix), fileName) 1725 ) 1726 return filePath 1727 } 1728 1729 func filesetFileForTime(t xtime.UnixNano, suffix string) string { 1730 return fmt.Sprintf("%s%s%d%s%s%s", filesetFilePrefix, separator, int64(t), separator, suffix, fileSuffix) 1731 } 1732 1733 func filesetFileForTimeAndVolumeIndex(t xtime.UnixNano, index int, suffix string) string { 1734 newSuffix := fmt.Sprintf("%d%s%s", index, separator, suffix) 1735 return filesetFileForTime(t, newSuffix) 1736 } 1737 1738 func filesetPathFromTimeLegacy(prefix string, t xtime.UnixNano, suffix string) string { 1739 return path.Join(prefix, filesetFileForTime(t, suffix)) 1740 } 1741 1742 // FilesetPathFromTimeAndIndex builds a path of a fileset file. 1743 func FilesetPathFromTimeAndIndex(prefix string, t xtime.UnixNano, index int, suffix string) string { 1744 return path.Join(prefix, filesetFileForTimeAndVolumeIndex(t, index, suffix)) 1745 } 1746 1747 // isFirstVolumeLegacy returns whether the first volume of the provided type is 1748 // legacy, i.e. does not have a volume index in its filename. Using this 1749 // function, the caller expects there to be a legacy or non-legacy file, and 1750 // thus returns an error if neither exist. Note that this function does not 1751 // check for the volume's complete checkpoint file. 1752 //nolint: unparam 1753 func isFirstVolumeLegacy(prefix string, t xtime.UnixNano, suffix string) (bool, error) { 1754 // Check non-legacy path first to optimize for newer files. 1755 path := FilesetPathFromTimeAndIndex(prefix, t, 0, suffix) 1756 _, err := os.Stat(path) 1757 if err == nil { 1758 return false, nil 1759 } 1760 1761 legacyPath := filesetPathFromTimeLegacy(prefix, t, suffix) 1762 _, err = os.Stat(legacyPath) 1763 if err == nil { 1764 return true, nil 1765 } 1766 1767 return false, ErrCheckpointFileNotFound 1768 } 1769 1770 // Once we decide that we no longer want to support legacy (non-volume-indexed) 1771 // filesets, we can remove this function and just use 1772 // `FilesetPathFromTimeAndIndex`. Getting code to compile and tests to pass 1773 // after that should be a comprehensive way to remove dead code. 1774 func dataFilesetPathFromTimeAndIndex( 1775 prefix string, 1776 t xtime.UnixNano, 1777 index int, 1778 suffix string, 1779 isLegacy bool, 1780 ) string { 1781 if isLegacy { 1782 return filesetPathFromTimeLegacy(prefix, t, suffix) 1783 } 1784 1785 return FilesetPathFromTimeAndIndex(prefix, t, index, suffix) 1786 } 1787 1788 func filesetIndexSegmentFileSuffixFromTime( 1789 segmentIndex int, 1790 segmentFileType idxpersist.IndexSegmentFileType, 1791 ) string { 1792 return fmt.Sprintf("%s%s%d%s%s", segmentFileSetFilePrefix, separator, segmentIndex, separator, segmentFileType) 1793 } 1794 1795 func filesetIndexSegmentFilePathFromTime( 1796 prefix string, 1797 t xtime.UnixNano, 1798 volumeIndex int, 1799 segmentIndex int, 1800 segmentFileType idxpersist.IndexSegmentFileType, 1801 ) string { 1802 suffix := filesetIndexSegmentFileSuffixFromTime(segmentIndex, segmentFileType) 1803 return FilesetPathFromTimeAndIndex(prefix, t, volumeIndex, suffix) 1804 } 1805 1806 func snapshotIndexSegmentFilePathFromTimeAndIndex( 1807 prefix string, 1808 t xtime.UnixNano, 1809 snapshotIndex int, 1810 segmentIndex int, 1811 segmentFileType idxpersist.IndexSegmentFileType, 1812 ) string { 1813 suffix := filesetIndexSegmentFileSuffixFromTime(segmentIndex, segmentFileType) 1814 return FilesetPathFromTimeAndIndex(prefix, t, snapshotIndex, suffix) 1815 } 1816 1817 func snapshotMetadataFilePathFromIdentifier(prefix string, id SnapshotMetadataIdentifier) string { 1818 return path.Join( 1819 prefix, 1820 snapshotDirName, 1821 fmt.Sprintf( 1822 "%s%s%s%s%d%s%s%s", 1823 snapshotFilePrefix, separator, 1824 sanitizeUUID(id.UUID), separator, 1825 id.Index, separator, 1826 metadataFileSuffix, fileSuffix)) 1827 } 1828 1829 func snapshotMetadataCheckpointFilePathFromIdentifier(prefix string, id SnapshotMetadataIdentifier) string { 1830 return path.Join( 1831 prefix, 1832 snapshotDirName, 1833 fmt.Sprintf( 1834 "%s%s%s%s%d%s%s%s%s%s", 1835 snapshotFilePrefix, separator, 1836 sanitizeUUID(id.UUID), separator, 1837 id.Index, separator, 1838 metadataFileSuffix, separator, 1839 CheckpointFileSuffix, fileSuffix)) 1840 } 1841 1842 // sanitizeUUID strips all instances of separator ("-") in the provided UUID string. This prevents us from 1843 // treating every "piece" of the UUID as a separate fragment of the name when we split filepaths by 1844 // separator. This works because the UUID library can still parse stripped UUID strings. 1845 func sanitizeUUID(u uuid.UUID) string { 1846 return strings.Replace(u.String(), separator, "", -1) 1847 } 1848 1849 func parseUUID(sanitizedUUID string) (uuid.UUID, bool) { 1850 parsed := uuid.Parse(sanitizedUUID) 1851 return parsed, parsed != nil 1852 } 1853 1854 func snapshotMetadataIdentifierFromFilePath(filePath string) (SnapshotMetadataIdentifier, error) { 1855 _, fileName := path.Split(filePath) 1856 if fileName == "" { 1857 return SnapshotMetadataIdentifier{}, fmt.Errorf( 1858 "splitting: %s created empty filename", filePath) 1859 } 1860 1861 var ( 1862 splitFileName = strings.Split(fileName, separator) 1863 isCheckpointFile = strings.Contains(fileName, CheckpointFileSuffix) 1864 ) 1865 if len(splitFileName) != numComponentsSnapshotMetadataFile && 1866 // Snapshot metadata checkpoint files contain one extra separator. 1867 !(isCheckpointFile && len(splitFileName) == numComponentsSnapshotMetadataCheckpointFile) { 1868 return SnapshotMetadataIdentifier{}, fmt.Errorf( 1869 "invalid snapshot metadata file name: %s", filePath) 1870 } 1871 1872 index, err := strconv.ParseInt(splitFileName[snapshotMetadataIndexComponentPosition], 10, 64) 1873 if err != nil { 1874 return SnapshotMetadataIdentifier{}, fmt.Errorf( 1875 "invalid snapshot metadata file name, unable to parse index: %s", filePath) 1876 } 1877 1878 sanitizedUUID := splitFileName[snapshotMetadataUUIDComponentPosition] 1879 id, ok := parseUUID(sanitizedUUID) 1880 if !ok { 1881 return SnapshotMetadataIdentifier{}, fmt.Errorf( 1882 "invalid snapshot metadata file name, unable to parse UUID: %s", filePath) 1883 } 1884 1885 return SnapshotMetadataIdentifier{ 1886 Index: index, 1887 UUID: id, 1888 }, nil 1889 }