github.com/m3db/m3@v1.5.0/src/dbnode/persist/fs/files.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package fs
    22  
    23  import (
    24  	"bufio"
    25  	"errors"
    26  	"fmt"
    27  	"os"
    28  	"path"
    29  	"path/filepath"
    30  	"sort"
    31  	"strconv"
    32  	"strings"
    33  	"time"
    34  
    35  	"github.com/m3db/m3/src/dbnode/digest"
    36  	"github.com/m3db/m3/src/dbnode/generated/proto/index"
    37  	"github.com/m3db/m3/src/dbnode/persist"
    38  	"github.com/m3db/m3/src/dbnode/persist/fs/msgpack"
    39  	"github.com/m3db/m3/src/dbnode/persist/schema"
    40  	idxpersist "github.com/m3db/m3/src/m3ninx/persist"
    41  	xerrors "github.com/m3db/m3/src/x/errors"
    42  	"github.com/m3db/m3/src/x/ident"
    43  	"github.com/m3db/m3/src/x/instrument"
    44  	xtime "github.com/m3db/m3/src/x/time"
    45  
    46  	"github.com/pborman/uuid"
    47  )
    48  
    49  var (
    50  	timeZero time.Time
    51  
    52  	errSnapshotTimeAndIDZero = errors.New("tried to read snapshot time and ID of zero value")
    53  	errNonSnapshotFileset    = errors.New("tried to determine snapshot time and id of non-snapshot")
    54  )
    55  
    56  const (
    57  	dataDirName       = "data"
    58  	indexDirName      = "index"
    59  	snapshotDirName   = "snapshots"
    60  	commitLogsDirName = "commitlogs"
    61  
    62  	// The maximum number of delimeters ('-' or '.') that is expected in a
    63  	// (base) filename.
    64  	maxDelimNum = 4
    65  
    66  	// The volume index assigned to (legacy) filesets that don't have a volume
    67  	// number in their filename.
    68  	// NOTE: Since this index is the same as the index for the first
    69  	// (non-legacy) fileset, receiving an index of 0 means that we need to
    70  	// check for both indexed and non-indexed filenames.
    71  	unindexedFilesetIndex = 0
    72  
    73  	timeComponentPosition         = 1
    74  	commitLogComponentPosition    = 2
    75  	indexFileSetComponentPosition = 2
    76  	dataFileSetComponentPosition  = 2
    77  
    78  	numComponentsSnapshotMetadataFile           = 4
    79  	numComponentsSnapshotMetadataCheckpointFile = 5
    80  	snapshotMetadataUUIDComponentPosition       = 1
    81  	snapshotMetadataIndexComponentPosition      = 2
    82  
    83  	errUnexpectedFilenamePattern = "unexpected filename: %s"
    84  )
    85  
    86  var defaultBufioReaderSize = bufio.NewReader(nil).Size()
    87  
    88  type fileOpener func(filePath string) (*os.File, error)
    89  
    90  // LazyEvalBool is a boolean that is lazily evaluated.
    91  type LazyEvalBool uint8
    92  
    93  const (
    94  	// EvalNone indicates the boolean has not been evaluated.
    95  	EvalNone LazyEvalBool = iota
    96  	// EvalTrue indicates the boolean has been evaluated to true.
    97  	EvalTrue
    98  	// EvalFalse indicates the boolean has been evaluated to false.
    99  	EvalFalse
   100  )
   101  
   102  // FileSetFile represents a set of FileSet files for a given block start
   103  type FileSetFile struct {
   104  	ID                FileSetFileIdentifier
   105  	AbsoluteFilePaths []string
   106  
   107  	CachedSnapshotTime              xtime.UnixNano
   108  	CachedSnapshotID                uuid.UUID
   109  	CachedHasCompleteCheckpointFile LazyEvalBool
   110  	filePathPrefix                  string
   111  }
   112  
   113  // SnapshotTimeAndID returns the snapshot time and id for the given FileSetFile.
   114  // Value is meaningless if the the FileSetFile is a flush instead of a snapshot.
   115  func (f *FileSetFile) SnapshotTimeAndID() (xtime.UnixNano, uuid.UUID, error) {
   116  	if f.IsZero() {
   117  		return 0, nil, errSnapshotTimeAndIDZero
   118  	}
   119  	if _, ok := f.SnapshotFilepath(); !ok {
   120  		return 0, nil, errNonSnapshotFileset
   121  	}
   122  
   123  	if !f.CachedSnapshotTime.IsZero() || f.CachedSnapshotID != nil {
   124  		// Return immediately if we've already cached it.
   125  		return f.CachedSnapshotTime, f.CachedSnapshotID, nil
   126  	}
   127  
   128  	snapshotTime, snapshotID, err := SnapshotTimeAndID(f.filePathPrefix, f.ID)
   129  	if err != nil {
   130  		return 0, nil, err
   131  	}
   132  
   133  	// Cache for future use and return.
   134  	f.CachedSnapshotTime = snapshotTime
   135  	f.CachedSnapshotID = snapshotID
   136  	return f.CachedSnapshotTime, f.CachedSnapshotID, nil
   137  }
   138  
   139  // InfoFilePath returns the info file path of a filesetfile (if found).
   140  func (f *FileSetFile) InfoFilePath() (string, bool) {
   141  	return f.filepath(InfoFileSuffix)
   142  }
   143  
   144  // SnapshotFilepath returns the info file path of a filesetfile (if found).
   145  func (f *FileSetFile) SnapshotFilepath() (string, bool) {
   146  	return f.filepath(snapshotDirName)
   147  }
   148  
   149  // IsZero returns whether the FileSetFile is a zero value.
   150  func (f FileSetFile) IsZero() bool {
   151  	return len(f.AbsoluteFilePaths) == 0
   152  }
   153  
   154  func (f *FileSetFile) filepath(pathContains string) (string, bool) {
   155  	var (
   156  		found    bool
   157  		foundIdx int
   158  	)
   159  	for idx, path := range f.AbsoluteFilePaths {
   160  		if strings.Contains(path, pathContains) {
   161  			found = true
   162  			foundIdx = idx
   163  		}
   164  	}
   165  	if found {
   166  		return f.AbsoluteFilePaths[foundIdx], true
   167  	}
   168  	return "", false
   169  }
   170  
   171  // HasCompleteCheckpointFile returns a bool indicating whether the given set of
   172  // fileset files has a checkpoint file.
   173  func (f *FileSetFile) HasCompleteCheckpointFile() bool {
   174  	switch f.CachedHasCompleteCheckpointFile {
   175  	case EvalNone:
   176  		f.CachedHasCompleteCheckpointFile = f.evalHasCompleteCheckpointFile()
   177  		return f.HasCompleteCheckpointFile()
   178  	case EvalTrue:
   179  		return true
   180  	}
   181  	return false
   182  }
   183  
   184  func (f *FileSetFile) evalHasCompleteCheckpointFile() LazyEvalBool {
   185  	for _, fileName := range f.AbsoluteFilePaths {
   186  		if strings.Contains(fileName, CheckpointFileSuffix) {
   187  			exists, err := CompleteCheckpointFileExists(fileName)
   188  			if err != nil {
   189  				continue
   190  			}
   191  			if exists {
   192  				return EvalTrue
   193  			}
   194  		}
   195  	}
   196  
   197  	return EvalFalse
   198  }
   199  
   200  // FileSetFilesSlice is a slice of FileSetFile
   201  type FileSetFilesSlice []FileSetFile
   202  
   203  // Filepaths flattens a slice of FileSetFiles to a single slice of filepaths.
   204  // All paths returned are absolute.
   205  func (f FileSetFilesSlice) Filepaths() []string {
   206  	flattened := []string{}
   207  	for _, fileset := range f {
   208  		flattened = append(flattened, fileset.AbsoluteFilePaths...)
   209  	}
   210  
   211  	return flattened
   212  }
   213  
   214  // LatestVolumeForBlock returns the latest (highest index) FileSetFile in the
   215  // slice for a given block start that has a complete checkpoint file.
   216  func (f FileSetFilesSlice) LatestVolumeForBlock(blockStart xtime.UnixNano) (FileSetFile, bool) {
   217  	// Make sure we're already sorted.
   218  	f.sortByTimeAndVolumeIndexAscending()
   219  
   220  	for i, curr := range f {
   221  		if curr.ID.BlockStart.Equal(blockStart) {
   222  			var (
   223  				bestSoFar       FileSetFile
   224  				bestSoFarExists bool
   225  			)
   226  
   227  			for j := i; j < len(f); j++ {
   228  				curr = f[j]
   229  
   230  				if !curr.ID.BlockStart.Equal(blockStart) {
   231  					break
   232  				}
   233  
   234  				if curr.HasCompleteCheckpointFile() && curr.ID.VolumeIndex >= bestSoFar.ID.VolumeIndex {
   235  					bestSoFar = curr
   236  					bestSoFarExists = true
   237  				}
   238  
   239  			}
   240  
   241  			return bestSoFar, bestSoFarExists
   242  		}
   243  	}
   244  
   245  	return FileSetFile{}, false
   246  }
   247  
   248  // VolumeExistsForBlock returns whether there is a valid FileSetFile for the
   249  // given block start and volume index.
   250  func (f FileSetFilesSlice) VolumeExistsForBlock(blockStart xtime.UnixNano, volume int) bool {
   251  	for _, curr := range f {
   252  		if curr.ID.BlockStart.Equal(blockStart) && curr.ID.VolumeIndex == volume {
   253  			return curr.HasCompleteCheckpointFile()
   254  		}
   255  	}
   256  
   257  	return false
   258  }
   259  
   260  // ignores the index in the FileSetFileIdentifier because fileset files should
   261  // always have index 0.
   262  func (f FileSetFilesSlice) sortByTimeAscending() {
   263  	sort.Slice(f, func(i, j int) bool {
   264  		return f[i].ID.BlockStart.Before(f[j].ID.BlockStart)
   265  	})
   266  }
   267  
   268  func (f FileSetFilesSlice) sortByTimeAndVolumeIndexAscending() {
   269  	sort.Slice(f, func(i, j int) bool {
   270  		if f[i].ID.BlockStart.Equal(f[j].ID.BlockStart) {
   271  			return f[i].ID.VolumeIndex < f[j].ID.VolumeIndex
   272  		}
   273  
   274  		return f[i].ID.BlockStart.Before(f[j].ID.BlockStart)
   275  	})
   276  }
   277  
   278  // SnapshotMetadata represents a SnapshotMetadata file, along with its checkpoint file,
   279  // as well as all the information contained within the metadata file and paths to the
   280  // physical files on disk.
   281  type SnapshotMetadata struct {
   282  	ID                  SnapshotMetadataIdentifier
   283  	CommitlogIdentifier persist.CommitLogFile
   284  	MetadataFilePath    string
   285  	CheckpointFilePath  string
   286  }
   287  
   288  // AbsoluteFilePaths returns a slice of all the absolute filepaths associated
   289  // with a snapshot metadata.
   290  func (s SnapshotMetadata) AbsoluteFilePaths() []string {
   291  	return []string{s.MetadataFilePath, s.CheckpointFilePath}
   292  }
   293  
   294  // SnapshotMetadataErrorWithPaths contains an error that occurred while trying to
   295  // read a snapshot metadata file, as well as paths for the metadata file path and
   296  // the checkpoint file path so that they can be cleaned up. The checkpoint file may
   297  // not exist if only the metadata file was written out (due to sudden node failure)
   298  // or if the metadata file name was structured incorrectly (should never happen.)
   299  type SnapshotMetadataErrorWithPaths struct {
   300  	Error              error
   301  	MetadataFilePath   string
   302  	CheckpointFilePath string
   303  }
   304  
   305  // SnapshotMetadataIdentifier is an identifier for a snapshot metadata file
   306  type SnapshotMetadataIdentifier struct {
   307  	Index int64
   308  	UUID  uuid.UUID
   309  }
   310  
   311  // NewFileSetFileIdentifier creates a new FileSetFileIdentifier.
   312  func NewFileSetFileIdentifier(
   313  	namespace ident.ID,
   314  	blockStart xtime.UnixNano,
   315  	shard uint32,
   316  	volumeIndex int,
   317  ) FileSetFileIdentifier {
   318  	return FileSetFileIdentifier{
   319  		Namespace:   namespace,
   320  		Shard:       shard,
   321  		BlockStart:  blockStart,
   322  		VolumeIndex: volumeIndex,
   323  	}
   324  }
   325  
   326  // NewFileSetFile creates a new FileSet file
   327  func NewFileSetFile(id FileSetFileIdentifier, filePathPrefix string) FileSetFile {
   328  	return FileSetFile{
   329  		ID:                id,
   330  		AbsoluteFilePaths: []string{},
   331  		filePathPrefix:    filePathPrefix,
   332  	}
   333  }
   334  
   335  func openFiles(opener fileOpener, fds map[string]**os.File) error {
   336  	var firstErr error
   337  	for filePath, fdPtr := range fds {
   338  		fd, err := opener(filePath)
   339  		if err != nil {
   340  			firstErr = err
   341  			break
   342  		}
   343  		*fdPtr = fd
   344  	}
   345  
   346  	if firstErr == nil {
   347  		return nil
   348  	}
   349  
   350  	// If we have encountered an error when opening the files,
   351  	// close the ones that have been opened.
   352  	for _, fdPtr := range fds {
   353  		if *fdPtr != nil {
   354  			(*fdPtr).Close()
   355  		}
   356  	}
   357  
   358  	return firstErr
   359  }
   360  
   361  // DeleteFiles delete a set of files, returning all the errors encountered during
   362  // the deletion process.
   363  func DeleteFiles(filePaths []string) error {
   364  	multiErr := xerrors.NewMultiError()
   365  	for _, file := range filePaths {
   366  		if err := os.Remove(file); err != nil {
   367  			detailedErr := fmt.Errorf("failed to remove file %s: %v", file, err)
   368  			multiErr = multiErr.Add(detailedErr)
   369  		}
   370  	}
   371  	return multiErr.FinalError()
   372  }
   373  
   374  // DeleteDirectories delets a set of directories and its contents, returning all
   375  // of the errors encountered during the deletion process.
   376  func DeleteDirectories(dirPaths []string) error {
   377  	multiErr := xerrors.NewMultiError()
   378  	for _, dir := range dirPaths {
   379  		if err := os.RemoveAll(dir); err != nil {
   380  			detailedErr := fmt.Errorf("failed to remove dir %s: %v", dir, err)
   381  			multiErr = multiErr.Add(detailedErr)
   382  		}
   383  	}
   384  	return multiErr.FinalError()
   385  }
   386  
   387  // byTimeAscending sorts files by their block start times in ascending order.
   388  // If the files do not have block start times in their names, the result is undefined.
   389  type byTimeAscending []string
   390  
   391  func (a byTimeAscending) Len() int      { return len(a) }
   392  func (a byTimeAscending) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
   393  func (a byTimeAscending) Less(i, j int) bool {
   394  	ti, _ := TimeFromFileName(a[i])
   395  	tj, _ := TimeFromFileName(a[j])
   396  	return ti.Before(tj)
   397  }
   398  
   399  // commitlogsByTimeAndIndexAscending sorts commitlogs by their block start times and index in ascending
   400  // order. If the files do not have block start times or indexes in their names, the result is undefined.
   401  type commitlogsByTimeAndIndexAscending []string
   402  
   403  func (a commitlogsByTimeAndIndexAscending) Len() int      { return len(a) }
   404  func (a commitlogsByTimeAndIndexAscending) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
   405  func (a commitlogsByTimeAndIndexAscending) Less(i, j int) bool {
   406  	ti, ii, _ := TimeAndIndexFromCommitlogFilename(a[i])
   407  	tj, ij, _ := TimeAndIndexFromCommitlogFilename(a[j])
   408  	if ti.Before(tj) {
   409  		return true
   410  	}
   411  	return ti.Equal(tj) && ii < ij
   412  }
   413  
   414  // Returns the positions of filename delimiters ('-' and '.') and the number of
   415  // delimeters found, to be used in conjunction with the intComponentAtIndex
   416  // function to extract filename components. This function is deliberately
   417  // optimized for speed and lack of allocations, since allocation-heavy filename
   418  // parsing can quickly become a large source of allocations in the entire
   419  // system, especially when namespaces with long retentions are configured.
   420  func delimiterPositions(baseFilename string) ([maxDelimNum]int, int) {
   421  	var (
   422  		delimPos    [maxDelimNum]int
   423  		delimsFound int
   424  	)
   425  
   426  	for i := range baseFilename {
   427  		if r := baseFilename[i]; r == separatorRune || r == fileSuffixDelimeterRune {
   428  			delimPos[delimsFound] = i
   429  			delimsFound++
   430  
   431  			if delimsFound == len(delimPos) {
   432  				// Found the maximum expected number of separators.
   433  				break
   434  			}
   435  		}
   436  	}
   437  
   438  	return delimPos, delimsFound
   439  }
   440  
   441  // Returns the the specified component of a filename, given the positions of
   442  // delimeters. Our only use cases for this involve extracting numeric
   443  // components, so this function assumes this and returns the component as an
   444  // int64.
   445  func intComponentAtIndex(
   446  	baseFilename string,
   447  	componentPos int,
   448  	delimPos [maxDelimNum]int,
   449  ) (xtime.UnixNano, error) {
   450  	start := 0
   451  	if componentPos > 0 {
   452  		start = delimPos[componentPos-1] + 1
   453  	}
   454  	end := delimPos[componentPos]
   455  	if start > end || end > len(baseFilename)-1 || start < 0 {
   456  		return 0, fmt.Errorf(errUnexpectedFilenamePattern, baseFilename)
   457  	}
   458  
   459  	num, err := strconv.ParseInt(baseFilename[start:end], 10, 64)
   460  	if err != nil {
   461  		return 0, fmt.Errorf(errUnexpectedFilenamePattern, baseFilename)
   462  	}
   463  	return xtime.UnixNano(num), nil
   464  }
   465  
   466  // TimeFromFileName extracts the block start time from file name.
   467  func TimeFromFileName(fname string) (xtime.UnixNano, error) {
   468  	base := filepath.Base(fname)
   469  
   470  	delims, delimsFound := delimiterPositions(base)
   471  	// There technically only needs to be two delimeters here since the time
   472  	// component is in index 1. However, all DB files have a minimum of three
   473  	// delimeters, so check for that instead.
   474  	if delimsFound < 3 {
   475  		return 0, fmt.Errorf(errUnexpectedFilenamePattern, fname)
   476  	}
   477  	nanos, err := intComponentAtIndex(base, timeComponentPosition, delims)
   478  	if err != nil {
   479  		return 0, fmt.Errorf(errUnexpectedFilenamePattern, fname)
   480  	}
   481  
   482  	return nanos, nil
   483  }
   484  
   485  // TimeAndIndexFromCommitlogFilename extracts the block start and index from
   486  // file name for a commitlog.
   487  func TimeAndIndexFromCommitlogFilename(fname string) (xtime.UnixNano, int, error) {
   488  	return timeAndIndexFromFileName(fname, commitLogComponentPosition)
   489  }
   490  
   491  // TimeAndVolumeIndexFromDataFileSetFilename extracts the block start and volume
   492  // index from a data fileset file name that may or may not have an index. If the
   493  // file name does not include an index, unindexedFilesetIndex is returned as the
   494  // volume index.
   495  func TimeAndVolumeIndexFromDataFileSetFilename(fname string) (xtime.UnixNano, int, error) {
   496  	base := filepath.Base(fname)
   497  
   498  	delims, delimsFound := delimiterPositions(base)
   499  	if delimsFound < 3 {
   500  		return 0, 0, fmt.Errorf(errUnexpectedFilenamePattern, fname)
   501  	}
   502  
   503  	nanos, err := intComponentAtIndex(base, timeComponentPosition, delims)
   504  	if err != nil {
   505  		return 0, 0, fmt.Errorf(errUnexpectedFilenamePattern, fname)
   506  	}
   507  
   508  	// Legacy filename with no volume index.
   509  	if delimsFound == 3 {
   510  		return nanos, unindexedFilesetIndex, nil
   511  	}
   512  
   513  	volume, err := intComponentAtIndex(base, dataFileSetComponentPosition, delims)
   514  	if err != nil {
   515  		return 0, 0, fmt.Errorf(errUnexpectedFilenamePattern, fname)
   516  	}
   517  
   518  	return nanos, int(volume), nil
   519  }
   520  
   521  // TimeAndVolumeIndexFromFileSetFilename extracts the block start and
   522  // volume index from an index file name.
   523  func TimeAndVolumeIndexFromFileSetFilename(fname string) (xtime.UnixNano, int, error) {
   524  	return timeAndIndexFromFileName(fname, indexFileSetComponentPosition)
   525  }
   526  
   527  func timeAndIndexFromFileName(fname string, componentPosition int) (xtime.UnixNano, int, error) {
   528  	base := filepath.Base(fname)
   529  
   530  	delims, delimsFound := delimiterPositions(base)
   531  	if componentPosition > delimsFound {
   532  		return 0, 0, fmt.Errorf(errUnexpectedFilenamePattern, fname)
   533  	}
   534  
   535  	nanos, err := intComponentAtIndex(base, 1, delims)
   536  	if err != nil {
   537  		return 0, 0, fmt.Errorf(errUnexpectedFilenamePattern, fname)
   538  	}
   539  
   540  	index, err := intComponentAtIndex(base, componentPosition, delims)
   541  	if err != nil {
   542  		return 0, 0, fmt.Errorf(errUnexpectedFilenamePattern, fname)
   543  	}
   544  
   545  	return nanos, int(index), nil
   546  }
   547  
   548  // SnapshotTimeAndID returns the metadata for the snapshot.
   549  func SnapshotTimeAndID(
   550  	filePathPrefix string, id FileSetFileIdentifier) (xtime.UnixNano, uuid.UUID, error) {
   551  	decoder := msgpack.NewDecoder(nil)
   552  	return snapshotTimeAndID(filePathPrefix, id, decoder)
   553  }
   554  
   555  func snapshotTimeAndID(
   556  	filePathPrefix string,
   557  	id FileSetFileIdentifier,
   558  	decoder *msgpack.Decoder,
   559  ) (xtime.UnixNano, uuid.UUID, error) {
   560  	infoBytes, err := readSnapshotInfoFile(filePathPrefix, id, defaultBufioReaderSize)
   561  	if err != nil {
   562  		return 0, nil, fmt.Errorf("error reading snapshot info file: %w", err)
   563  	}
   564  
   565  	decoder.Reset(msgpack.NewByteDecoderStream(infoBytes))
   566  	info, err := decoder.DecodeIndexInfo()
   567  	if err != nil {
   568  		return 0, nil, fmt.Errorf("error decoding snapshot info file: %w", err)
   569  	}
   570  
   571  	var parsedSnapshotID uuid.UUID
   572  	err = parsedSnapshotID.UnmarshalBinary(info.SnapshotID)
   573  	if err != nil {
   574  		return 0, nil, fmt.Errorf("error parsing snapshot ID from snapshot info file: %w", err)
   575  	}
   576  
   577  	return xtime.UnixNano(info.SnapshotTime), parsedSnapshotID, nil
   578  }
   579  
   580  func readSnapshotInfoFile(
   581  	filePathPrefix string, id FileSetFileIdentifier, readerBufferSize int,
   582  ) ([]byte, error) {
   583  	var (
   584  		shardDir           = ShardSnapshotsDirPath(filePathPrefix, id.Namespace, id.Shard)
   585  		checkpointFilePath = FilesetPathFromTimeAndIndex(
   586  			shardDir, id.BlockStart, id.VolumeIndex, CheckpointFileSuffix,
   587  		)
   588  		digestFilePath = FilesetPathFromTimeAndIndex(
   589  			shardDir, id.BlockStart, id.VolumeIndex, DigestFileSuffix,
   590  		)
   591  		infoFilePath = FilesetPathFromTimeAndIndex(
   592  			shardDir, id.BlockStart, id.VolumeIndex, InfoFileSuffix,
   593  		)
   594  	)
   595  
   596  	checkpointFd, err := os.Open(checkpointFilePath)
   597  	if err != nil {
   598  		return nil, err
   599  	}
   600  
   601  	// Read digest of digests from the checkpoint file
   602  	digestBuf := digest.NewBuffer()
   603  	expectedDigestOfDigest, err := digestBuf.ReadDigestFromFile(checkpointFd)
   604  	closeErr := checkpointFd.Close()
   605  	if err != nil {
   606  		return nil, err
   607  	}
   608  	if closeErr != nil {
   609  		return nil, closeErr
   610  	}
   611  
   612  	// Read and validate the digest file
   613  	digestData, err := readAndValidate(
   614  		digestFilePath, readerBufferSize, expectedDigestOfDigest)
   615  	if err != nil {
   616  		return nil, err
   617  	}
   618  
   619  	// Read and validate the info file
   620  	expectedInfoDigest := digest.ToBuffer(digestData).ReadDigest()
   621  	return readAndValidate(
   622  		infoFilePath, readerBufferSize, expectedInfoDigest)
   623  }
   624  
   625  func readCheckpointFile(filePath string, digestBuf digest.Buffer) (uint32, error) {
   626  	exists, err := CompleteCheckpointFileExists(filePath)
   627  	if err != nil {
   628  		return 0, err
   629  	}
   630  	if !exists {
   631  		return 0, ErrCheckpointFileNotFound
   632  	}
   633  	fd, err := os.Open(filePath)
   634  	if err != nil {
   635  		return 0, err
   636  	}
   637  	defer fd.Close()
   638  	digest, err := digestBuf.ReadDigestFromFile(fd)
   639  	if err != nil {
   640  		return 0, err
   641  	}
   642  
   643  	return digest, nil
   644  }
   645  
   646  type forEachInfoFileSelector struct {
   647  	fileSetType      persist.FileSetType
   648  	contentType      persist.FileSetContentType
   649  	filePathPrefix   string
   650  	namespace        ident.ID
   651  	shard            uint32 // shard only applicable for data content type
   652  	includeCorrupted bool   // include corrupted filesets (fail validation)
   653  }
   654  
   655  type infoFileFn func(file FileSetFile, infoData []byte, corrupted bool)
   656  
   657  func forEachInfoFile(
   658  	args forEachInfoFileSelector,
   659  	readerBufferSize int,
   660  	fn infoFileFn,
   661  ) {
   662  	matched, err := filesetFiles(filesetFilesSelector{
   663  		fileSetType:    args.fileSetType,
   664  		contentType:    args.contentType,
   665  		filePathPrefix: args.filePathPrefix,
   666  		namespace:      args.namespace,
   667  		shard:          args.shard,
   668  		pattern:        filesetFilePattern,
   669  	})
   670  	if err != nil {
   671  		return
   672  	}
   673  
   674  	var dir string
   675  	switch args.fileSetType {
   676  	case persist.FileSetFlushType:
   677  		switch args.contentType {
   678  		case persist.FileSetDataContentType:
   679  			dir = ShardDataDirPath(args.filePathPrefix, args.namespace, args.shard)
   680  		case persist.FileSetIndexContentType:
   681  			dir = NamespaceIndexDataDirPath(args.filePathPrefix, args.namespace)
   682  		default:
   683  			return
   684  		}
   685  	case persist.FileSetSnapshotType:
   686  		switch args.contentType {
   687  		case persist.FileSetDataContentType:
   688  			dir = ShardSnapshotsDirPath(args.filePathPrefix, args.namespace, args.shard)
   689  		case persist.FileSetIndexContentType:
   690  			dir = NamespaceIndexSnapshotDirPath(args.filePathPrefix, args.namespace)
   691  		default:
   692  			return
   693  		}
   694  	default:
   695  		return
   696  	}
   697  
   698  	maybeIncludeCorrupted := func(corrupted FileSetFile) {
   699  		if !args.includeCorrupted {
   700  			return
   701  		}
   702  		// NB: We do not want to give up here on error or else we may not clean up
   703  		// corrupt index filesets.
   704  		infoFilePath, ok := corrupted.InfoFilePath()
   705  		if !ok {
   706  			fn(corrupted, nil, true)
   707  			return
   708  		}
   709  		infoData, err := read(infoFilePath)
   710  		if err != nil {
   711  			// NB: If no info data is supplied, we assume that the
   712  			// info file itself is corrupted. Since this is the
   713  			// first file written to disk, this should be safe to remove.
   714  			fn(corrupted, nil, true)
   715  			return
   716  		}
   717  		// NB: We always write an index info file when we begin writing to an index volume
   718  		// so we are always guaranteed that there's AT LEAST the info file on disk w/ incomplete info.
   719  		fn(corrupted, infoData, true)
   720  	}
   721  
   722  	var indexDigests index.IndexDigests
   723  	digestBuf := digest.NewBuffer()
   724  	for i := range matched {
   725  		t := matched[i].ID.BlockStart
   726  		volume := matched[i].ID.VolumeIndex
   727  
   728  		var (
   729  			checkpointFilePath string
   730  			digestsFilePath    string
   731  			infoFilePath       string
   732  		)
   733  		switch args.fileSetType {
   734  		case persist.FileSetFlushType:
   735  			switch args.contentType {
   736  			case persist.FileSetDataContentType:
   737  				isLegacy := false
   738  				if volume == 0 {
   739  					isLegacy, err = isFirstVolumeLegacy(dir, t, CheckpointFileSuffix)
   740  					if err != nil {
   741  						continue
   742  					}
   743  				}
   744  				checkpointFilePath = dataFilesetPathFromTimeAndIndex(dir, t, volume, CheckpointFileSuffix, isLegacy)
   745  				digestsFilePath = dataFilesetPathFromTimeAndIndex(dir, t, volume, DigestFileSuffix, isLegacy)
   746  				infoFilePath = dataFilesetPathFromTimeAndIndex(dir, t, volume, InfoFileSuffix, isLegacy)
   747  			case persist.FileSetIndexContentType:
   748  				checkpointFilePath = FilesetPathFromTimeAndIndex(dir, t, volume, CheckpointFileSuffix)
   749  				digestsFilePath = FilesetPathFromTimeAndIndex(dir, t, volume, DigestFileSuffix)
   750  				infoFilePath = FilesetPathFromTimeAndIndex(dir, t, volume, InfoFileSuffix)
   751  			}
   752  		case persist.FileSetSnapshotType:
   753  			checkpointFilePath = FilesetPathFromTimeAndIndex(dir, t, volume, CheckpointFileSuffix)
   754  			digestsFilePath = FilesetPathFromTimeAndIndex(dir, t, volume, DigestFileSuffix)
   755  			infoFilePath = FilesetPathFromTimeAndIndex(dir, t, volume, InfoFileSuffix)
   756  		}
   757  		// Read digest of digests from the checkpoint file
   758  		expectedDigestOfDigest, err := readCheckpointFile(checkpointFilePath, digestBuf)
   759  		if err != nil {
   760  			maybeIncludeCorrupted(matched[i])
   761  			continue
   762  		}
   763  		// Read and validate the digest file
   764  		digestData, err := readAndValidate(digestsFilePath, readerBufferSize,
   765  			expectedDigestOfDigest)
   766  		if err != nil {
   767  			maybeIncludeCorrupted(matched[i])
   768  			continue
   769  		}
   770  
   771  		// Read and validate the info file
   772  		var expectedInfoDigest uint32
   773  		switch args.contentType {
   774  		case persist.FileSetDataContentType:
   775  			expectedInfoDigest = digest.ToBuffer(digestData).ReadDigest()
   776  		case persist.FileSetIndexContentType:
   777  			if err := indexDigests.Unmarshal(digestData); err != nil {
   778  				maybeIncludeCorrupted(matched[i])
   779  				continue
   780  			}
   781  			expectedInfoDigest = indexDigests.GetInfoDigest()
   782  		}
   783  
   784  		infoData, err := readAndValidate(infoFilePath, readerBufferSize,
   785  			expectedInfoDigest)
   786  		if err != nil {
   787  			maybeIncludeCorrupted(matched[i])
   788  			continue
   789  		}
   790  		// Guarantee that every matched fileset has an info file.
   791  		if _, ok := matched[i].InfoFilePath(); !ok {
   792  			maybeIncludeCorrupted(matched[i])
   793  			continue
   794  		}
   795  
   796  		fn(matched[i], infoData, false)
   797  	}
   798  }
   799  
   800  // ReadInfoFileResult is the result of reading an info file
   801  type ReadInfoFileResult struct {
   802  	Info schema.IndexInfo
   803  	Err  ReadInfoFileResultError
   804  }
   805  
   806  // ReadInfoFileResultError is the interface for obtaining information about an error
   807  // that occurred trying to read an info file
   808  type ReadInfoFileResultError interface {
   809  	Error() error
   810  	Filepath() string
   811  }
   812  
   813  type readInfoFileResultError struct {
   814  	err      error
   815  	filepath string
   816  }
   817  
   818  // Error returns the error that occurred reading the info file
   819  func (r readInfoFileResultError) Error() error {
   820  	return r.err
   821  }
   822  
   823  // FilePath returns the filepath for the problematic file
   824  func (r readInfoFileResultError) Filepath() string {
   825  	return r.filepath
   826  }
   827  
   828  // ReadInfoFiles reads all the valid info entries. Even if ReadInfoFiles returns an error,
   829  // there may be some valid entries in the returned slice.
   830  func ReadInfoFiles(
   831  	filePathPrefix string,
   832  	namespace ident.ID,
   833  	shard uint32,
   834  	readerBufferSize int,
   835  	decodingOpts msgpack.DecodingOptions,
   836  	fileSetType persist.FileSetType,
   837  ) []ReadInfoFileResult {
   838  	var infoFileResults []ReadInfoFileResult
   839  	decoder := msgpack.NewDecoder(decodingOpts)
   840  	forEachInfoFile(
   841  		forEachInfoFileSelector{
   842  			fileSetType:    fileSetType,
   843  			contentType:    persist.FileSetDataContentType,
   844  			filePathPrefix: filePathPrefix,
   845  			namespace:      namespace,
   846  			shard:          shard,
   847  		},
   848  		readerBufferSize,
   849  		func(file FileSetFile, data []byte, _ bool) {
   850  			filePath, _ := file.InfoFilePath()
   851  			decoder.Reset(msgpack.NewByteDecoderStream(data))
   852  			info, err := decoder.DecodeIndexInfo()
   853  			infoFileResults = append(infoFileResults, ReadInfoFileResult{
   854  				Info: info,
   855  				Err: readInfoFileResultError{
   856  					err:      err,
   857  					filepath: filePath,
   858  				},
   859  			})
   860  		})
   861  	return infoFileResults
   862  }
   863  
   864  // ReadIndexInfoFilesOptions specifies options for reading index info files.
   865  type ReadIndexInfoFilesOptions struct {
   866  	FilePathPrefix   string
   867  	Namespace        ident.ID
   868  	ReaderBufferSize int
   869  	IncludeCorrupted bool
   870  }
   871  
   872  // ReadIndexInfoFileResult is the result of reading an info file
   873  type ReadIndexInfoFileResult struct {
   874  	ID                FileSetFileIdentifier
   875  	Info              index.IndexVolumeInfo
   876  	AbsoluteFilePaths []string
   877  	Err               ReadInfoFileResultError
   878  	Corrupted         bool
   879  }
   880  
   881  // ReadIndexInfoFiles reads all the valid index info entries. Even if ReadIndexInfoFiles returns an error,
   882  // there may be some valid entries in the returned slice.
   883  func ReadIndexInfoFiles(opts ReadIndexInfoFilesOptions) []ReadIndexInfoFileResult {
   884  	var infoFileResults []ReadIndexInfoFileResult
   885  	forEachInfoFile(
   886  		forEachInfoFileSelector{
   887  			fileSetType:      persist.FileSetFlushType,
   888  			contentType:      persist.FileSetIndexContentType,
   889  			filePathPrefix:   opts.FilePathPrefix,
   890  			namespace:        opts.Namespace,
   891  			includeCorrupted: opts.IncludeCorrupted,
   892  		},
   893  		opts.ReaderBufferSize,
   894  		func(file FileSetFile, data []byte, corrupted bool) {
   895  			filepath, _ := file.InfoFilePath()
   896  			id := file.ID
   897  			var info index.IndexVolumeInfo
   898  			err := info.Unmarshal(data)
   899  			infoFileResults = append(infoFileResults, ReadIndexInfoFileResult{
   900  				ID:                id,
   901  				Info:              info,
   902  				AbsoluteFilePaths: file.AbsoluteFilePaths,
   903  				Err: readInfoFileResultError{
   904  					err:      err,
   905  					filepath: filepath,
   906  				},
   907  				Corrupted: corrupted,
   908  			})
   909  		})
   910  	return infoFileResults
   911  }
   912  
   913  // SortedSnapshotMetadataFiles returns a slice of all the SnapshotMetadata files that are on disk, as well
   914  // as any files that it encountered errors for (corrupt, missing checkpoints, etc) which facilitates
   915  // cleanup of corrupt files. []SnapshotMetadata will be sorted by index (i.e the chronological order
   916  // in which the snapshots were taken), but []SnapshotMetadataErrorWithPaths will not be in any particular
   917  // order.
   918  func SortedSnapshotMetadataFiles(opts Options) (
   919  	[]SnapshotMetadata, []SnapshotMetadataErrorWithPaths, error) {
   920  	var (
   921  		prefix           = opts.FilePathPrefix()
   922  		snapshotsDirPath = SnapshotDirPath(prefix)
   923  	)
   924  
   925  	// Glob for metadata files directly instead of their checkpoint files.
   926  	// In the happy case this makes no difference, but in situations where
   927  	// the metadata file exists but the checkpoint file does not (due to sudden
   928  	// node failure) this strategy allows us to still cleanup the metadata file
   929  	// whereas if we looked for checkpoint files directly the dangling metadata
   930  	// file would hang around forever.
   931  	metadataFilePaths, err := filepath.Glob(
   932  		path.Join(
   933  			snapshotsDirPath,
   934  			fmt.Sprintf("*%s%s%s", separator, metadataFileSuffix, fileSuffix)))
   935  	if err != nil {
   936  		return nil, nil, err
   937  	}
   938  
   939  	var (
   940  		reader          = NewSnapshotMetadataReader(opts)
   941  		metadatas       = []SnapshotMetadata{}
   942  		errorsWithPaths = []SnapshotMetadataErrorWithPaths{}
   943  	)
   944  	for _, file := range metadataFilePaths {
   945  		id, err := snapshotMetadataIdentifierFromFilePath(file)
   946  		if err != nil {
   947  			errorsWithPaths = append(errorsWithPaths, SnapshotMetadataErrorWithPaths{
   948  				Error:            err,
   949  				MetadataFilePath: file,
   950  				// Can't construct checkpoint file path without ID
   951  			})
   952  			continue
   953  		}
   954  
   955  		if file != snapshotMetadataFilePathFromIdentifier(prefix, id) {
   956  			// Should never happen
   957  			errorsWithPaths = append(errorsWithPaths, SnapshotMetadataErrorWithPaths{
   958  				Error: instrument.InvariantErrorf(
   959  					"actual snapshot metadata filepath: %s and generated filepath: %s do not match",
   960  					file, snapshotMetadataFilePathFromIdentifier(prefix, id)),
   961  				MetadataFilePath:   file,
   962  				CheckpointFilePath: snapshotMetadataCheckpointFilePathFromIdentifier(prefix, id),
   963  			})
   964  			continue
   965  		}
   966  
   967  		metadata, err := reader.Read(id)
   968  		if err != nil {
   969  			errorsWithPaths = append(errorsWithPaths, SnapshotMetadataErrorWithPaths{
   970  				Error:              err,
   971  				MetadataFilePath:   file,
   972  				CheckpointFilePath: snapshotMetadataCheckpointFilePathFromIdentifier(prefix, id),
   973  			})
   974  			continue
   975  		}
   976  
   977  		metadatas = append(metadatas, metadata)
   978  	}
   979  
   980  	sort.Slice(metadatas, func(i, j int) bool {
   981  		return metadatas[i].ID.Index < metadatas[j].ID.Index
   982  	})
   983  	return metadatas, errorsWithPaths, nil
   984  }
   985  
   986  // DataFiles returns a slice of all the names for all the fileset files
   987  // for a given namespace and shard combination.
   988  func DataFiles(filePathPrefix string, namespace ident.ID, shard uint32) (FileSetFilesSlice, error) {
   989  	return filesetFiles(filesetFilesSelector{
   990  		fileSetType:    persist.FileSetFlushType,
   991  		contentType:    persist.FileSetDataContentType,
   992  		filePathPrefix: filePathPrefix,
   993  		namespace:      namespace,
   994  		shard:          shard,
   995  		pattern:        filesetFilePattern,
   996  	})
   997  }
   998  
   999  // SnapshotFiles returns a slice of all the names for all the snapshot files
  1000  // for a given namespace and shard combination.
  1001  func SnapshotFiles(filePathPrefix string, namespace ident.ID, shard uint32) (FileSetFilesSlice, error) {
  1002  	return filesetFiles(filesetFilesSelector{
  1003  		fileSetType:    persist.FileSetSnapshotType,
  1004  		contentType:    persist.FileSetDataContentType,
  1005  		filePathPrefix: filePathPrefix,
  1006  		namespace:      namespace,
  1007  		shard:          shard,
  1008  		pattern:        filesetFilePattern,
  1009  	})
  1010  }
  1011  
  1012  // IndexSnapshotFiles returns a slice of all the names for all the index fileset files
  1013  // for a given namespace.
  1014  func IndexSnapshotFiles(filePathPrefix string, namespace ident.ID) (FileSetFilesSlice, error) {
  1015  	return filesetFiles(filesetFilesSelector{
  1016  		fileSetType:    persist.FileSetSnapshotType,
  1017  		contentType:    persist.FileSetIndexContentType,
  1018  		filePathPrefix: filePathPrefix,
  1019  		namespace:      namespace,
  1020  		pattern:        filesetFilePattern,
  1021  	})
  1022  }
  1023  
  1024  // FileSetAt returns a FileSetFile for the given namespace/shard/blockStart/volume combination if it exists.
  1025  func FileSetAt(
  1026  	filePathPrefix string,
  1027  	namespace ident.ID,
  1028  	shard uint32,
  1029  	blockStart xtime.UnixNano,
  1030  	volume int,
  1031  ) (FileSetFile, bool, error) {
  1032  	var pattern string
  1033  	// If this is the initial volume, then we need to check if files were written with the legacy file naming (i.e.
  1034  	// without the volume index) so that we can properly locate the fileset.
  1035  	if volume == 0 {
  1036  		dir := ShardDataDirPath(filePathPrefix, namespace, shard)
  1037  		isLegacy, err := isFirstVolumeLegacy(dir, blockStart, CheckpointFileSuffix)
  1038  		// NB(nate): don't propagate ErrCheckpointFileNotFound here as expectation is to simply return an
  1039  		// empty FileSetFile if files do not exist.
  1040  		if err == ErrCheckpointFileNotFound {
  1041  			return FileSetFile{}, false, nil
  1042  		} else if err != nil && err != ErrCheckpointFileNotFound {
  1043  			return FileSetFile{}, false, err
  1044  		}
  1045  
  1046  		if isLegacy {
  1047  			pattern = filesetFileForTime(blockStart, anyLowerCaseCharsPattern)
  1048  		}
  1049  	}
  1050  
  1051  	if len(pattern) == 0 {
  1052  		pattern = filesetFileForTimeAndVolumeIndex(blockStart, volume, anyLowerCaseCharsPattern)
  1053  	}
  1054  
  1055  	matched, err := filesetFiles(filesetFilesSelector{
  1056  		fileSetType:    persist.FileSetFlushType,
  1057  		contentType:    persist.FileSetDataContentType,
  1058  		filePathPrefix: filePathPrefix,
  1059  		namespace:      namespace,
  1060  		shard:          shard,
  1061  		pattern:        pattern,
  1062  	})
  1063  	if err != nil {
  1064  		return FileSetFile{}, false, err
  1065  	}
  1066  
  1067  	matched.sortByTimeAndVolumeIndexAscending()
  1068  	for i, fileset := range matched {
  1069  		if fileset.ID.BlockStart.Equal(blockStart) && fileset.ID.VolumeIndex == volume {
  1070  			nextIdx := i + 1
  1071  			if nextIdx < len(matched) && matched[nextIdx].ID.BlockStart.Equal(blockStart) {
  1072  				// Should never happen.
  1073  				return FileSetFile{}, false, fmt.Errorf(
  1074  					"found multiple fileset files for blockStart: %d", blockStart.Seconds(),
  1075  				)
  1076  			}
  1077  
  1078  			if !fileset.HasCompleteCheckpointFile() {
  1079  				continue
  1080  			}
  1081  
  1082  			return fileset, true, nil
  1083  		}
  1084  	}
  1085  
  1086  	return FileSetFile{}, false, nil
  1087  }
  1088  
  1089  // IndexFileSetsAt returns all FileSetFile(s) for the given
  1090  // namespace/blockStart combination.
  1091  // NB: It returns all complete Volumes found on disk.
  1092  func IndexFileSetsAt(
  1093  	filePathPrefix string, namespace ident.ID, blockStart xtime.UnixNano,
  1094  ) (FileSetFilesSlice, error) {
  1095  	matches, err := filesetFiles(filesetFilesSelector{
  1096  		fileSetType:    persist.FileSetFlushType,
  1097  		contentType:    persist.FileSetIndexContentType,
  1098  		filePathPrefix: filePathPrefix,
  1099  		namespace:      namespace,
  1100  		pattern:        filesetFileForTime(blockStart, anyLowerCaseCharsNumbersPattern),
  1101  	})
  1102  	if err != nil {
  1103  		return nil, err
  1104  	}
  1105  
  1106  	filesets := make(FileSetFilesSlice, 0, len(matches))
  1107  	matches.sortByTimeAscending()
  1108  	for _, fileset := range matches {
  1109  		if fileset.ID.BlockStart.Equal(blockStart) {
  1110  			if !fileset.HasCompleteCheckpointFile() {
  1111  				continue
  1112  			}
  1113  			filesets = append(filesets, fileset)
  1114  		}
  1115  	}
  1116  
  1117  	return filesets, nil
  1118  }
  1119  
  1120  // DeleteFileSetAt deletes a FileSetFile for a given
  1121  // namespace/shard/blockStart/volume combination if it exists.
  1122  func DeleteFileSetAt(
  1123  	filePathPrefix string,
  1124  	namespace ident.ID,
  1125  	shard uint32,
  1126  	blockStart xtime.UnixNano,
  1127  	volume int,
  1128  ) error {
  1129  	fileset, ok, err := FileSetAt(filePathPrefix, namespace, shard, blockStart, volume)
  1130  	if err != nil {
  1131  		return err
  1132  	}
  1133  	if !ok {
  1134  		return fmt.Errorf("fileset for blockStart: %d does not exist", blockStart.Seconds())
  1135  	}
  1136  
  1137  	return DeleteFiles(fileset.AbsoluteFilePaths)
  1138  }
  1139  
  1140  // DataFileSetsBefore returns all the flush data fileset paths whose
  1141  // timestamps are earlier than a given time.
  1142  func DataFileSetsBefore(
  1143  	filePathPrefix string, namespace ident.ID, shard uint32, t xtime.UnixNano,
  1144  ) ([]string, error) {
  1145  	matched, err := filesetFiles(filesetFilesSelector{
  1146  		fileSetType:    persist.FileSetFlushType,
  1147  		contentType:    persist.FileSetDataContentType,
  1148  		filePathPrefix: filePathPrefix,
  1149  		namespace:      namespace,
  1150  		shard:          shard,
  1151  		pattern:        filesetFilePattern,
  1152  	})
  1153  	if err != nil {
  1154  		return nil, err
  1155  	}
  1156  	return FilesBefore(matched.Filepaths(), t)
  1157  }
  1158  
  1159  // IndexFileSetsBefore returns all the flush index fileset paths whose timestamps are earlier than a given time.
  1160  func IndexFileSetsBefore(filePathPrefix string, namespace ident.ID, t xtime.UnixNano) ([]string, error) {
  1161  	matched, err := filesetFiles(filesetFilesSelector{
  1162  		fileSetType:    persist.FileSetFlushType,
  1163  		contentType:    persist.FileSetIndexContentType,
  1164  		filePathPrefix: filePathPrefix,
  1165  		namespace:      namespace,
  1166  		pattern:        filesetFilePattern,
  1167  	})
  1168  	if err != nil {
  1169  		return nil, err
  1170  	}
  1171  	return FilesBefore(matched.Filepaths(), t)
  1172  }
  1173  
  1174  // DeleteInactiveDirectories deletes any directories that are not currently active, as defined by the
  1175  // inputed active directories within the parent directory
  1176  func DeleteInactiveDirectories(parentDirectoryPath string, activeDirectories []string) error {
  1177  	var toDelete []string
  1178  	activeDirNames := make(map[string]struct{})
  1179  	allSubDirs, err := findSubDirectoriesAndPaths(parentDirectoryPath)
  1180  	if err != nil {
  1181  		return nil
  1182  	}
  1183  
  1184  	// Create shard set, might also be useful to just send in as strings?
  1185  	for _, dir := range activeDirectories {
  1186  		activeDirNames[dir] = struct{}{}
  1187  	}
  1188  
  1189  	for dirName, dirPath := range allSubDirs {
  1190  		if _, ok := activeDirNames[dirName]; !ok {
  1191  			toDelete = append(toDelete, dirPath)
  1192  		}
  1193  	}
  1194  	return DeleteDirectories(toDelete)
  1195  }
  1196  
  1197  // SortedCommitLogFiles returns all the commit log files in the commit logs directory.
  1198  func SortedCommitLogFiles(commitLogsDir string) ([]string, error) {
  1199  	return sortedCommitLogFiles(commitLogsDir, commitLogFilePattern)
  1200  }
  1201  
  1202  type filesetFile struct {
  1203  	volumeIndex int
  1204  	blockStart  xtime.UnixNano
  1205  	fileName    string
  1206  }
  1207  
  1208  type toSortableFn func(files []string) sort.Interface
  1209  type toBlockStartAndVolumeIndexFn func(file string) (xtime.UnixNano, int, error)
  1210  type sortedFilesetFiles []filesetFile
  1211  
  1212  func (s sortedFilesetFiles) Len() int {
  1213  	return len(s)
  1214  }
  1215  
  1216  func (s sortedFilesetFiles) Less(i, j int) bool {
  1217  	iStart := s[i].blockStart
  1218  	jStart := s[j].blockStart
  1219  
  1220  	if iStart.Before(jStart) {
  1221  		return true
  1222  	}
  1223  
  1224  	jVolume := s[j].volumeIndex
  1225  	iVolume := s[i].volumeIndex
  1226  	return iStart.Equal(jStart) && iVolume < jVolume
  1227  }
  1228  
  1229  func (s sortedFilesetFiles) Swap(i, j int) {
  1230  	s[i], s[j] = s[j], s[i]
  1231  }
  1232  
  1233  func findSortedFilesetFiles(
  1234  	fileDir string, pattern string,
  1235  	fn toBlockStartAndVolumeIndexFn,
  1236  ) (sortedFilesetFiles, error) {
  1237  	matched, err := filepath.Glob(path.Join(fileDir, pattern))
  1238  	if err != nil {
  1239  		return nil, err
  1240  	}
  1241  	if len(matched) == 0 {
  1242  		return nil, nil
  1243  	}
  1244  	result := make([]filesetFile, len(matched))
  1245  	for i, file := range matched {
  1246  		blockStart, volume, err := fn(file)
  1247  		if err != nil {
  1248  			return nil, err
  1249  		}
  1250  
  1251  		result[i] = filesetFile{
  1252  			fileName:    file,
  1253  			blockStart:  blockStart,
  1254  			volumeIndex: volume,
  1255  		}
  1256  	}
  1257  
  1258  	sort.Sort(sortedFilesetFiles(result))
  1259  	return result, nil
  1260  }
  1261  
  1262  func findFiles(fileDir string, pattern string, fn toSortableFn) ([]string, error) {
  1263  	matched, err := filepath.Glob(path.Join(fileDir, pattern))
  1264  	if err != nil {
  1265  		return nil, err
  1266  	}
  1267  	sort.Sort(fn(matched))
  1268  	return matched, nil
  1269  }
  1270  
  1271  type directoryNamesToPaths map[string]string
  1272  
  1273  func findSubDirectoriesAndPaths(directoryPath string) (directoryNamesToPaths, error) {
  1274  	parent, err := os.Open(directoryPath)
  1275  	if err != nil {
  1276  		return nil, err
  1277  	}
  1278  
  1279  	subDirectoriesToPaths := make(directoryNamesToPaths)
  1280  	subDirNames, err := parent.Readdirnames(-1)
  1281  	if err != nil {
  1282  		return nil, err
  1283  	}
  1284  
  1285  	err = parent.Close()
  1286  	if err != nil {
  1287  		return nil, err
  1288  	}
  1289  
  1290  	for _, dirName := range subDirNames {
  1291  		subDirectoriesToPaths[dirName] = path.Join(directoryPath, dirName)
  1292  	}
  1293  	return subDirectoriesToPaths, nil
  1294  }
  1295  
  1296  type filesetFilesSelector struct {
  1297  	fileSetType    persist.FileSetType
  1298  	contentType    persist.FileSetContentType
  1299  	filePathPrefix string
  1300  	namespace      ident.ID
  1301  	shard          uint32
  1302  	pattern        string
  1303  }
  1304  
  1305  func filesetFiles(args filesetFilesSelector) (FileSetFilesSlice, error) {
  1306  	var (
  1307  		byTimeAsc sortedFilesetFiles
  1308  		err       error
  1309  	)
  1310  	switch args.fileSetType {
  1311  	case persist.FileSetFlushType:
  1312  		switch args.contentType {
  1313  		case persist.FileSetDataContentType:
  1314  			dir := ShardDataDirPath(args.filePathPrefix, args.namespace, args.shard)
  1315  			byTimeAsc, err = findSortedFilesetFiles(dir, args.pattern, TimeAndVolumeIndexFromDataFileSetFilename)
  1316  		case persist.FileSetIndexContentType:
  1317  			dir := NamespaceIndexDataDirPath(args.filePathPrefix, args.namespace)
  1318  			byTimeAsc, err = findSortedFilesetFiles(dir, args.pattern, TimeAndVolumeIndexFromFileSetFilename)
  1319  		default:
  1320  			return nil, fmt.Errorf("unknown content type: %d", args.contentType)
  1321  		}
  1322  	case persist.FileSetSnapshotType:
  1323  		var dir string
  1324  		switch args.contentType {
  1325  		case persist.FileSetDataContentType:
  1326  			dir = ShardSnapshotsDirPath(args.filePathPrefix, args.namespace, args.shard)
  1327  		case persist.FileSetIndexContentType:
  1328  			dir = NamespaceIndexSnapshotDirPath(args.filePathPrefix, args.namespace)
  1329  		default:
  1330  			return nil, fmt.Errorf("unknown content type: %d", args.contentType)
  1331  		}
  1332  		byTimeAsc, err = findSortedFilesetFiles(dir, args.pattern, TimeAndVolumeIndexFromFileSetFilename)
  1333  	default:
  1334  		return nil, fmt.Errorf("unknown type: %d", args.fileSetType)
  1335  	}
  1336  	if err != nil {
  1337  		return nil, err
  1338  	}
  1339  
  1340  	if len(byTimeAsc) == 0 {
  1341  		return nil, nil
  1342  	}
  1343  
  1344  	var (
  1345  		latestBlockStart  xtime.UnixNano
  1346  		latestVolumeIndex int
  1347  		latestFileSetFile FileSetFile
  1348  		filesetFiles      = []FileSetFile{}
  1349  	)
  1350  	for _, file := range byTimeAsc {
  1351  		if latestBlockStart == 0 {
  1352  			latestFileSetFile = NewFileSetFile(FileSetFileIdentifier{
  1353  				Namespace:   args.namespace,
  1354  				BlockStart:  file.blockStart,
  1355  				Shard:       args.shard,
  1356  				VolumeIndex: file.volumeIndex,
  1357  			}, args.filePathPrefix)
  1358  		} else if !file.blockStart.Equal(latestBlockStart) || latestVolumeIndex != file.volumeIndex {
  1359  			filesetFiles = append(filesetFiles, latestFileSetFile)
  1360  			latestFileSetFile = NewFileSetFile(FileSetFileIdentifier{
  1361  				Namespace:   args.namespace,
  1362  				BlockStart:  file.blockStart,
  1363  				Shard:       args.shard,
  1364  				VolumeIndex: file.volumeIndex,
  1365  			}, args.filePathPrefix)
  1366  		}
  1367  
  1368  		latestBlockStart = file.blockStart
  1369  		latestVolumeIndex = file.volumeIndex
  1370  
  1371  		latestFileSetFile.AbsoluteFilePaths = append(latestFileSetFile.AbsoluteFilePaths, file.fileName)
  1372  	}
  1373  
  1374  	filesetFiles = append(filesetFiles, latestFileSetFile)
  1375  	return filesetFiles, nil
  1376  }
  1377  
  1378  func sortedCommitLogFiles(commitLogsDir string, pattern string) ([]string, error) {
  1379  	return findFiles(commitLogsDir, pattern, func(files []string) sort.Interface {
  1380  		return commitlogsByTimeAndIndexAscending(files)
  1381  	})
  1382  }
  1383  
  1384  // FilesBefore filters the list of files down to those whose name indicate they are
  1385  // before a given time period. Mutates the provided slice.
  1386  func FilesBefore(files []string, t xtime.UnixNano) ([]string, error) {
  1387  	var (
  1388  		j        int
  1389  		multiErr xerrors.MultiError
  1390  	)
  1391  	// Matched files are sorted by their timestamps in ascending order.
  1392  	for i := range files {
  1393  		ft, err := TimeFromFileName(files[i])
  1394  		if err != nil {
  1395  			multiErr = multiErr.Add(err)
  1396  			continue
  1397  		}
  1398  		if !ft.Before(t) {
  1399  			break
  1400  		}
  1401  		files[j] = files[i]
  1402  		j++
  1403  	}
  1404  	return files[:j], multiErr.FinalError()
  1405  }
  1406  
  1407  func readAndValidate(
  1408  	filePath string,
  1409  	readerBufferSize int,
  1410  	expectedDigest uint32,
  1411  ) ([]byte, error) {
  1412  	fd, err := os.Open(filePath)
  1413  	if err != nil {
  1414  		return nil, err
  1415  	}
  1416  	defer fd.Close()
  1417  
  1418  	buf, err := bufferForEntireFile(filePath)
  1419  	if err != nil {
  1420  		return nil, err
  1421  	}
  1422  
  1423  	fwd := digest.NewFdWithDigestReader(readerBufferSize)
  1424  	fwd.Reset(fd)
  1425  	n, err := fwd.ReadAllAndValidate(buf, expectedDigest)
  1426  	if err != nil {
  1427  		return nil, err
  1428  	}
  1429  	return buf[:n], nil
  1430  }
  1431  
  1432  func read(filePath string) ([]byte, error) {
  1433  	fd, err := os.Open(filePath) //nolint:gosec
  1434  	if err != nil {
  1435  		return nil, err
  1436  	}
  1437  	defer fd.Close() //nolint:errcheck,gosec
  1438  
  1439  	buf, err := bufferForEntireFile(filePath)
  1440  	if err != nil {
  1441  		return nil, err
  1442  	}
  1443  
  1444  	n, err := fd.Read(buf)
  1445  	if err != nil {
  1446  		return nil, err
  1447  	}
  1448  	return buf[:n], nil
  1449  }
  1450  
  1451  func bufferForEntireFile(filePath string) ([]byte, error) {
  1452  	stat, err := os.Stat(filePath)
  1453  	if err != nil {
  1454  		return nil, err
  1455  	}
  1456  
  1457  	size := int(stat.Size())
  1458  	buf := make([]byte, size)
  1459  	return buf, nil
  1460  }
  1461  
  1462  // DataDirPath returns the path to the data directory belonging to a db
  1463  func DataDirPath(prefix string) string {
  1464  	return path.Join(prefix, dataDirName)
  1465  }
  1466  
  1467  // IndexDataDirPath returns the path to the index data directory belonging to a db
  1468  func IndexDataDirPath(prefix string) string {
  1469  	return path.Join(prefix, indexDirName, dataDirName)
  1470  }
  1471  
  1472  // SnapshotDirPath returns the path to the snapshot directory belong to a db
  1473  func SnapshotDirPath(prefix string) string {
  1474  	return path.Join(prefix, snapshotDirName)
  1475  }
  1476  
  1477  // NamespaceDataDirPath returns the path to the data directory for a given namespace.
  1478  func NamespaceDataDirPath(prefix string, namespace ident.ID) string {
  1479  	return path.Join(prefix, dataDirName, namespace.String())
  1480  }
  1481  
  1482  // NamespaceSnapshotsDirPath returns the path to the snapshots directory for a given namespace.
  1483  func NamespaceSnapshotsDirPath(prefix string, namespace ident.ID) string {
  1484  	return path.Join(SnapshotsDirPath(prefix), namespace.String())
  1485  }
  1486  
  1487  // NamespaceIndexDataDirPath returns the path to the data directory for a given namespace.
  1488  func NamespaceIndexDataDirPath(prefix string, namespace ident.ID) string {
  1489  	return path.Join(prefix, indexDirName, dataDirName, namespace.String())
  1490  }
  1491  
  1492  // NamespaceIndexSnapshotDirPath returns the path to the data directory for a given namespace.
  1493  func NamespaceIndexSnapshotDirPath(prefix string, namespace ident.ID) string {
  1494  	return path.Join(prefix, indexDirName, snapshotDirName, namespace.String())
  1495  }
  1496  
  1497  // SnapshotsDirPath returns the path to the snapshots directory.
  1498  func SnapshotsDirPath(prefix string) string {
  1499  	return path.Join(prefix, snapshotDirName)
  1500  }
  1501  
  1502  // ShardDataDirPath returns the path to the data directory for a given shard.
  1503  func ShardDataDirPath(prefix string, namespace ident.ID, shard uint32) string {
  1504  	namespacePath := NamespaceDataDirPath(prefix, namespace)
  1505  	return path.Join(namespacePath, strconv.Itoa(int(shard)))
  1506  }
  1507  
  1508  // ShardSnapshotsDirPath returns the path to the snapshots directory for a given shard.
  1509  func ShardSnapshotsDirPath(prefix string, namespace ident.ID, shard uint32) string {
  1510  	namespacePath := NamespaceSnapshotsDirPath(prefix, namespace)
  1511  	return path.Join(namespacePath, strconv.Itoa(int(shard)))
  1512  }
  1513  
  1514  // CommitLogsDirPath returns the path to commit logs.
  1515  func CommitLogsDirPath(prefix string) string {
  1516  	return path.Join(prefix, commitLogsDirName)
  1517  }
  1518  
  1519  // DataFileSetExists determines whether data fileset files exist for the given
  1520  // namespace, shard, block start, and volume.
  1521  func DataFileSetExists(
  1522  	filePathPrefix string,
  1523  	namespace ident.ID,
  1524  	shard uint32,
  1525  	blockStart xtime.UnixNano,
  1526  	volume int,
  1527  ) (bool, error) {
  1528  	// This function can easily become a performance bottleneck if the
  1529  	// implementation is slow or requires scanning directories with a large
  1530  	// number of files in them (as is common if namespaces with long retentions
  1531  	// are configured). As a result, instead of using existing helper functions,
  1532  	// it implements an optimized code path that only involves checking if a few
  1533  	// specific files exist and contain the correct contents.
  1534  	shardDir := ShardDataDirPath(filePathPrefix, namespace, shard)
  1535  
  1536  	// Check fileset with volume first to optimize for non-legacy use case.
  1537  	checkpointPath := FilesetPathFromTimeAndIndex(shardDir, blockStart, volume, CheckpointFileSuffix)
  1538  	exists, err := CompleteCheckpointFileExists(checkpointPath)
  1539  	if err == nil && exists {
  1540  		return true, nil
  1541  	}
  1542  
  1543  	if volume != 0 {
  1544  		// Only check for legacy file path if volume is 0.
  1545  		return false, nil
  1546  	}
  1547  
  1548  	checkpointPath = filesetPathFromTimeLegacy(shardDir, blockStart, CheckpointFileSuffix)
  1549  	return CompleteCheckpointFileExists(checkpointPath)
  1550  }
  1551  
  1552  // SnapshotFileSetExistsAt determines whether snapshot fileset files exist for
  1553  // the given namespace, shard, and block start time.
  1554  func SnapshotFileSetExistsAt(
  1555  	prefix string,
  1556  	namespace ident.ID,
  1557  	snapshotID uuid.UUID,
  1558  	shard uint32,
  1559  	blockStart xtime.UnixNano,
  1560  ) (bool, error) {
  1561  	snapshotFiles, err := SnapshotFiles(prefix, namespace, shard)
  1562  	if err != nil {
  1563  		return false, err
  1564  	}
  1565  
  1566  	latest, ok := snapshotFiles.LatestVolumeForBlock(blockStart)
  1567  	if !ok {
  1568  		return false, nil
  1569  	}
  1570  
  1571  	_, latestSnapshotID, err := latest.SnapshotTimeAndID()
  1572  	if err != nil {
  1573  		return false, err
  1574  	}
  1575  
  1576  	if !uuid.Equal(latestSnapshotID, snapshotID) {
  1577  		return false, nil
  1578  	}
  1579  
  1580  	// LatestVolumeForBlock checks for a complete checkpoint file, so we don't
  1581  	// need to recheck it here.
  1582  	return true, nil
  1583  }
  1584  
  1585  // NextSnapshotMetadataFileIndex returns the next snapshot metadata file index.
  1586  func NextSnapshotMetadataFileIndex(opts Options) (int64, error) {
  1587  	// We can ignore any SnapshotMetadataErrorsWithpaths that are returned because even if a corrupt
  1588  	// snapshot metadata file exists with the next index that we want to return from this function,
  1589  	// every snapshot metadata has its own UUID so there will never be a collision with a corrupt file
  1590  	// anyways and we can ignore them entirely when considering what the next index should be.
  1591  	snapshotMetadataFiles, _, err := SortedSnapshotMetadataFiles(opts)
  1592  	if err != nil {
  1593  		return 0, err
  1594  	}
  1595  
  1596  	if len(snapshotMetadataFiles) == 0 {
  1597  		return 0, nil
  1598  	}
  1599  
  1600  	lastSnapshotMetadataFile := snapshotMetadataFiles[len(snapshotMetadataFiles)-1]
  1601  	return lastSnapshotMetadataFile.ID.Index + 1, nil
  1602  }
  1603  
  1604  // NextSnapshotFileSetVolumeIndex returns the next snapshot file set index for a given
  1605  // namespace/shard/blockStart combination.
  1606  func NextSnapshotFileSetVolumeIndex(
  1607  	filePathPrefix string, namespace ident.ID, shard uint32, blockStart xtime.UnixNano,
  1608  ) (int, error) {
  1609  	snapshotFiles, err := SnapshotFiles(filePathPrefix, namespace, shard)
  1610  	if err != nil {
  1611  		return -1, err
  1612  	}
  1613  
  1614  	latestFile, ok := snapshotFiles.LatestVolumeForBlock(blockStart)
  1615  	if !ok {
  1616  		return 0, nil
  1617  	}
  1618  
  1619  	return latestFile.ID.VolumeIndex + 1, nil
  1620  }
  1621  
  1622  // NextIndexFileSetVolumeIndex returns the next index file set index for a given
  1623  // namespace/blockStart combination.
  1624  func NextIndexFileSetVolumeIndex(
  1625  	filePathPrefix string, namespace ident.ID, blockStart xtime.UnixNano,
  1626  ) (int, error) {
  1627  	files, err := filesetFiles(filesetFilesSelector{
  1628  		fileSetType:    persist.FileSetFlushType,
  1629  		contentType:    persist.FileSetIndexContentType,
  1630  		filePathPrefix: filePathPrefix,
  1631  		namespace:      namespace,
  1632  		pattern:        filesetFileForTime(blockStart, anyLowerCaseCharsNumbersPattern),
  1633  	})
  1634  	if err != nil {
  1635  		return -1, err
  1636  	}
  1637  
  1638  	latestFile, ok := files.LatestVolumeForBlock(blockStart)
  1639  	if !ok {
  1640  		return 0, nil
  1641  	}
  1642  
  1643  	return latestFile.ID.VolumeIndex + 1, nil
  1644  }
  1645  
  1646  // NextIndexSnapshotFileIndex returns the next snapshot file index for a given
  1647  // namespace/shard/blockStart combination.
  1648  func NextIndexSnapshotFileIndex(
  1649  	filePathPrefix string, namespace ident.ID, blockStart xtime.UnixNano,
  1650  ) (int, error) {
  1651  	snapshotFiles, err := IndexSnapshotFiles(filePathPrefix, namespace)
  1652  	if err != nil {
  1653  		return -1, err
  1654  	}
  1655  
  1656  	currentSnapshotIndex := -1
  1657  	for _, snapshot := range snapshotFiles {
  1658  		if snapshot.ID.BlockStart.Equal(blockStart) {
  1659  			currentSnapshotIndex = snapshot.ID.VolumeIndex
  1660  			break
  1661  		}
  1662  	}
  1663  
  1664  	return currentSnapshotIndex + 1, nil
  1665  }
  1666  
  1667  // CompleteCheckpointFileExists returns whether a checkpoint file exists, and if so,
  1668  // is it complete.
  1669  func CompleteCheckpointFileExists(filePath string) (bool, error) {
  1670  	if !strings.Contains(filePath, CheckpointFileSuffix) {
  1671  		return false, instrument.InvariantErrorf(
  1672  			"tried to use CompleteCheckpointFileExists to verify existence of non checkpoint file: %s",
  1673  			filePath,
  1674  		)
  1675  	}
  1676  
  1677  	f, err := os.Stat(filePath)
  1678  	if err != nil {
  1679  		if os.IsNotExist(err) {
  1680  			return false, nil
  1681  		}
  1682  		return false, err
  1683  	}
  1684  
  1685  	// Make sure the checkpoint file was completely written out and its
  1686  	// not just an empty file.
  1687  	return f.Size() == CheckpointFileSizeBytes, nil
  1688  }
  1689  
  1690  // FileExists returns whether a file at the given path exists.
  1691  func FileExists(filePath string) (bool, error) {
  1692  	if strings.Contains(filePath, CheckpointFileSuffix) {
  1693  		// Existence of a checkpoint file needs to be verified using the function
  1694  		// CompleteCheckpointFileExists instead to ensure that it has been
  1695  		// completely written out.
  1696  		return false, instrument.InvariantErrorf(
  1697  			"tried to use FileExists to verify existence of checkpoint file: %s",
  1698  			filePath,
  1699  		)
  1700  	}
  1701  
  1702  	_, err := os.Stat(filePath)
  1703  	if err != nil {
  1704  		if os.IsNotExist(err) {
  1705  			return false, nil
  1706  		}
  1707  
  1708  		return false, err
  1709  	}
  1710  
  1711  	return true, nil
  1712  }
  1713  
  1714  // OpenWritable opens a file for writing and truncating as necessary.
  1715  func OpenWritable(filePath string, perm os.FileMode) (*os.File, error) {
  1716  	return os.OpenFile(filePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, perm)
  1717  }
  1718  
  1719  // CommitLogFilePath returns the path for a commitlog file.
  1720  func CommitLogFilePath(prefix string, index int) string {
  1721  	var (
  1722  		entry    = fmt.Sprintf("%d%s%d", 0, separator, index)
  1723  		fileName = fmt.Sprintf("%s%s%s%s", commitLogFilePrefix, separator, entry, fileSuffix)
  1724  		filePath = path.Join(CommitLogsDirPath(prefix), fileName)
  1725  	)
  1726  	return filePath
  1727  }
  1728  
  1729  func filesetFileForTime(t xtime.UnixNano, suffix string) string {
  1730  	return fmt.Sprintf("%s%s%d%s%s%s", filesetFilePrefix, separator, int64(t), separator, suffix, fileSuffix)
  1731  }
  1732  
  1733  func filesetFileForTimeAndVolumeIndex(t xtime.UnixNano, index int, suffix string) string {
  1734  	newSuffix := fmt.Sprintf("%d%s%s", index, separator, suffix)
  1735  	return filesetFileForTime(t, newSuffix)
  1736  }
  1737  
  1738  func filesetPathFromTimeLegacy(prefix string, t xtime.UnixNano, suffix string) string {
  1739  	return path.Join(prefix, filesetFileForTime(t, suffix))
  1740  }
  1741  
  1742  // FilesetPathFromTimeAndIndex builds a path of a fileset file.
  1743  func FilesetPathFromTimeAndIndex(prefix string, t xtime.UnixNano, index int, suffix string) string {
  1744  	return path.Join(prefix, filesetFileForTimeAndVolumeIndex(t, index, suffix))
  1745  }
  1746  
  1747  // isFirstVolumeLegacy returns whether the first volume of the provided type is
  1748  // legacy, i.e. does not have a volume index in its filename. Using this
  1749  // function, the caller expects there to be a legacy or non-legacy file, and
  1750  // thus returns an error if neither exist. Note that this function does not
  1751  // check for the volume's complete checkpoint file.
  1752  //nolint: unparam
  1753  func isFirstVolumeLegacy(prefix string, t xtime.UnixNano, suffix string) (bool, error) {
  1754  	// Check non-legacy path first to optimize for newer files.
  1755  	path := FilesetPathFromTimeAndIndex(prefix, t, 0, suffix)
  1756  	_, err := os.Stat(path)
  1757  	if err == nil {
  1758  		return false, nil
  1759  	}
  1760  
  1761  	legacyPath := filesetPathFromTimeLegacy(prefix, t, suffix)
  1762  	_, err = os.Stat(legacyPath)
  1763  	if err == nil {
  1764  		return true, nil
  1765  	}
  1766  
  1767  	return false, ErrCheckpointFileNotFound
  1768  }
  1769  
  1770  // Once we decide that we no longer want to support legacy (non-volume-indexed)
  1771  // filesets, we can remove this function and just use
  1772  // `FilesetPathFromTimeAndIndex`. Getting code to compile and tests to pass
  1773  // after that should be a comprehensive way to remove dead code.
  1774  func dataFilesetPathFromTimeAndIndex(
  1775  	prefix string,
  1776  	t xtime.UnixNano,
  1777  	index int,
  1778  	suffix string,
  1779  	isLegacy bool,
  1780  ) string {
  1781  	if isLegacy {
  1782  		return filesetPathFromTimeLegacy(prefix, t, suffix)
  1783  	}
  1784  
  1785  	return FilesetPathFromTimeAndIndex(prefix, t, index, suffix)
  1786  }
  1787  
  1788  func filesetIndexSegmentFileSuffixFromTime(
  1789  	segmentIndex int,
  1790  	segmentFileType idxpersist.IndexSegmentFileType,
  1791  ) string {
  1792  	return fmt.Sprintf("%s%s%d%s%s", segmentFileSetFilePrefix, separator, segmentIndex, separator, segmentFileType)
  1793  }
  1794  
  1795  func filesetIndexSegmentFilePathFromTime(
  1796  	prefix string,
  1797  	t xtime.UnixNano,
  1798  	volumeIndex int,
  1799  	segmentIndex int,
  1800  	segmentFileType idxpersist.IndexSegmentFileType,
  1801  ) string {
  1802  	suffix := filesetIndexSegmentFileSuffixFromTime(segmentIndex, segmentFileType)
  1803  	return FilesetPathFromTimeAndIndex(prefix, t, volumeIndex, suffix)
  1804  }
  1805  
  1806  func snapshotIndexSegmentFilePathFromTimeAndIndex(
  1807  	prefix string,
  1808  	t xtime.UnixNano,
  1809  	snapshotIndex int,
  1810  	segmentIndex int,
  1811  	segmentFileType idxpersist.IndexSegmentFileType,
  1812  ) string {
  1813  	suffix := filesetIndexSegmentFileSuffixFromTime(segmentIndex, segmentFileType)
  1814  	return FilesetPathFromTimeAndIndex(prefix, t, snapshotIndex, suffix)
  1815  }
  1816  
  1817  func snapshotMetadataFilePathFromIdentifier(prefix string, id SnapshotMetadataIdentifier) string {
  1818  	return path.Join(
  1819  		prefix,
  1820  		snapshotDirName,
  1821  		fmt.Sprintf(
  1822  			"%s%s%s%s%d%s%s%s",
  1823  			snapshotFilePrefix, separator,
  1824  			sanitizeUUID(id.UUID), separator,
  1825  			id.Index, separator,
  1826  			metadataFileSuffix, fileSuffix))
  1827  }
  1828  
  1829  func snapshotMetadataCheckpointFilePathFromIdentifier(prefix string, id SnapshotMetadataIdentifier) string {
  1830  	return path.Join(
  1831  		prefix,
  1832  		snapshotDirName,
  1833  		fmt.Sprintf(
  1834  			"%s%s%s%s%d%s%s%s%s%s",
  1835  			snapshotFilePrefix, separator,
  1836  			sanitizeUUID(id.UUID), separator,
  1837  			id.Index, separator,
  1838  			metadataFileSuffix, separator,
  1839  			CheckpointFileSuffix, fileSuffix))
  1840  }
  1841  
  1842  // sanitizeUUID strips all instances of separator ("-") in the provided UUID string. This prevents us from
  1843  // treating every "piece" of the UUID as a separate fragment of the name when we split filepaths by
  1844  // separator. This works because the UUID library can still parse stripped UUID strings.
  1845  func sanitizeUUID(u uuid.UUID) string {
  1846  	return strings.Replace(u.String(), separator, "", -1)
  1847  }
  1848  
  1849  func parseUUID(sanitizedUUID string) (uuid.UUID, bool) {
  1850  	parsed := uuid.Parse(sanitizedUUID)
  1851  	return parsed, parsed != nil
  1852  }
  1853  
  1854  func snapshotMetadataIdentifierFromFilePath(filePath string) (SnapshotMetadataIdentifier, error) {
  1855  	_, fileName := path.Split(filePath)
  1856  	if fileName == "" {
  1857  		return SnapshotMetadataIdentifier{}, fmt.Errorf(
  1858  			"splitting: %s created empty filename", filePath)
  1859  	}
  1860  
  1861  	var (
  1862  		splitFileName    = strings.Split(fileName, separator)
  1863  		isCheckpointFile = strings.Contains(fileName, CheckpointFileSuffix)
  1864  	)
  1865  	if len(splitFileName) != numComponentsSnapshotMetadataFile &&
  1866  		// Snapshot metadata checkpoint files contain one extra separator.
  1867  		!(isCheckpointFile && len(splitFileName) == numComponentsSnapshotMetadataCheckpointFile) {
  1868  		return SnapshotMetadataIdentifier{}, fmt.Errorf(
  1869  			"invalid snapshot metadata file name: %s", filePath)
  1870  	}
  1871  
  1872  	index, err := strconv.ParseInt(splitFileName[snapshotMetadataIndexComponentPosition], 10, 64)
  1873  	if err != nil {
  1874  		return SnapshotMetadataIdentifier{}, fmt.Errorf(
  1875  			"invalid snapshot metadata file name, unable to parse index: %s", filePath)
  1876  	}
  1877  
  1878  	sanitizedUUID := splitFileName[snapshotMetadataUUIDComponentPosition]
  1879  	id, ok := parseUUID(sanitizedUUID)
  1880  	if !ok {
  1881  		return SnapshotMetadataIdentifier{}, fmt.Errorf(
  1882  			"invalid snapshot metadata file name, unable to parse UUID: %s", filePath)
  1883  	}
  1884  
  1885  	return SnapshotMetadataIdentifier{
  1886  		Index: index,
  1887  		UUID:  id,
  1888  	}, nil
  1889  }