github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/persist/fs/persist_manager.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package fs
    22  
    23  import (
    24  	"errors"
    25  	"fmt"
    26  	"sync"
    27  	"time"
    28  
    29  	"github.com/m3db/m3/src/dbnode/persist"
    30  	"github.com/m3db/m3/src/dbnode/ratelimit"
    31  	"github.com/m3db/m3/src/dbnode/runtime"
    32  	"github.com/m3db/m3/src/dbnode/ts"
    33  	"github.com/m3db/m3/src/m3ninx/index/segment"
    34  	m3ninxfs "github.com/m3db/m3/src/m3ninx/index/segment/fst"
    35  	m3ninxpersist "github.com/m3db/m3/src/m3ninx/persist"
    36  	"github.com/m3db/m3/src/x/checked"
    37  	"github.com/m3db/m3/src/x/clock"
    38  	"github.com/m3db/m3/src/x/instrument"
    39  	xresource "github.com/m3db/m3/src/x/resource"
    40  
    41  	"github.com/pborman/uuid"
    42  	"github.com/uber-go/tally"
    43  	"go.uber.org/zap"
    44  )
    45  
    46  const (
    47  	bytesPerMegabit = 1024 * 1024 / 8
    48  )
    49  
    50  type persistManagerStatus int
    51  
    52  const (
    53  	persistManagerIdle persistManagerStatus = iota
    54  	persistManagerPersistingData
    55  	persistManagerPersistingIndex
    56  )
    57  
    58  var (
    59  	errPersistManagerNotIdle                         = errors.New("persist manager cannot start persist, not idle")
    60  	errPersistManagerNotPersisting                   = errors.New("persist manager cannot finish persisting, not persisting")
    61  	errPersistManagerCannotPrepareDataNotPersisting  = errors.New("persist manager cannot prepare data, not persisting")
    62  	errPersistManagerCannotPrepareIndexNotPersisting = errors.New("persist manager cannot prepare index, not persisting")
    63  	errPersistManagerFileSetAlreadyExists            = errors.New("persist manager cannot prepare, fileset already exists")
    64  	errPersistManagerCannotDoneSnapshotNotSnapshot   = errors.New("persist manager cannot done snapshot, file set type is not snapshot")
    65  	errPersistManagerCannotDoneFlushNotFlush         = errors.New("persist manager cannot done flush, file set type is not flush")
    66  )
    67  
    68  type sleepFn func(time.Duration)
    69  
    70  type nextSnapshotMetadataFileIndexFn func(opts Options) (index int64, err error)
    71  
    72  // persistManager is responsible for persisting series segments onto local filesystem.
    73  // It is not thread-safe.
    74  type persistManager struct {
    75  	sync.RWMutex
    76  
    77  	opts           Options
    78  	filePathPrefix string
    79  	nowFn          clock.NowFn
    80  	sleepFn        sleepFn
    81  
    82  	dataPM  dataPersistManager
    83  	indexPM indexPersistManager
    84  
    85  	status            persistManagerStatus
    86  	currRateLimitOpts ratelimit.Options
    87  
    88  	start        time.Time
    89  	count        int
    90  	bytesWritten int64
    91  	worked       time.Duration
    92  	slept        time.Duration
    93  
    94  	metrics persistManagerMetrics
    95  
    96  	runtimeOptsListener xresource.SimpleCloser
    97  }
    98  
    99  type dataPersistManager struct {
   100  	// Injected types.
   101  	writer                        DataFileSetWriter
   102  	nextSnapshotMetadataFileIndex nextSnapshotMetadataFileIndexFn
   103  	snapshotMetadataWriter        SnapshotMetadataFileWriter
   104  
   105  	// segmentHolder is a two-item slice that's reused to hold pointers to the
   106  	// head and the tail of each segment so we don't need to allocate memory
   107  	// and gc it shortly after.
   108  	segmentHolder []checked.Bytes
   109  
   110  	// The type of files that are being persisted. Assists with decision making
   111  	// in the "done" phase.
   112  	fileSetType persist.FileSetType
   113  
   114  	// The ID of the snapshot being prepared. Only used when writing out snapshots.
   115  	snapshotID uuid.UUID
   116  }
   117  
   118  type singleUseIndexWriterState struct {
   119  	// identifiers required to know which file to open
   120  	// after persistence is over
   121  	fileSetIdentifier FileSetFileIdentifier
   122  	fileSetType       persist.FileSetType
   123  
   124  	// track state of writer
   125  	writeErr error
   126  }
   127  
   128  // Support writing to multiple index blocks/filesets during index persist.
   129  // This allows us to prepare an index fileset writer per block start.
   130  type singleUseIndexWriter struct {
   131  	// back-ref to the index persist manager so we can share resources there
   132  	manager *indexPersistManager
   133  	writer  IndexFileSetWriter
   134  
   135  	state singleUseIndexWriterState
   136  }
   137  
   138  func (s *singleUseIndexWriter) persistIndex(builder segment.Builder) error {
   139  	// Lock the index persist manager as we're sharing the segment builder as a resource.
   140  	s.manager.Lock()
   141  	defer s.manager.Unlock()
   142  
   143  	markError := func(err error) {
   144  		s.state.writeErr = err
   145  	}
   146  	if err := s.state.writeErr; err != nil {
   147  		return fmt.Errorf("encountered error: %w, skipping further attempts to persist data", err)
   148  	}
   149  
   150  	if err := s.manager.segmentWriter.Reset(builder); err != nil {
   151  		markError(err)
   152  		return err
   153  	}
   154  
   155  	if err := s.writer.WriteSegmentFileSet(s.manager.segmentWriter); err != nil {
   156  		markError(err)
   157  		return err
   158  	}
   159  
   160  	return nil
   161  }
   162  
   163  func (s *singleUseIndexWriter) closeIndex() ([]segment.Segment, error) {
   164  	s.manager.Lock()
   165  	defer s.manager.Unlock()
   166  
   167  	// This writer will be thrown away after we're done persisting.
   168  	defer func() {
   169  		s.state = singleUseIndexWriterState{fileSetType: -1}
   170  		s.manager = nil
   171  		s.writer = nil
   172  	}()
   173  
   174  	// s.e. we're done writing all segments for PreparedIndexPersist.
   175  	// so we can close the writer.
   176  	if err := s.writer.Close(); err != nil {
   177  		return nil, err
   178  	}
   179  
   180  	// only attempt to retrieve data if we have not encountered errors during
   181  	// any writes.
   182  	if err := s.state.writeErr; err != nil {
   183  		return nil, err
   184  	}
   185  
   186  	// and then we get persistent segments backed by mmap'd data so the index
   187  	// can safely evict the segment's we have just persisted.
   188  	result, err := ReadIndexSegments(ReadIndexSegmentsOptions{
   189  		ReaderOptions: IndexReaderOpenOptions{
   190  			Identifier:  s.state.fileSetIdentifier,
   191  			FileSetType: s.state.fileSetType,
   192  		},
   193  		FilesystemOptions:      s.manager.opts,
   194  		newReaderFn:            s.manager.newReaderFn,
   195  		newPersistentSegmentFn: s.manager.newPersistentSegmentFn,
   196  	})
   197  	if err != nil {
   198  		return nil, err
   199  	}
   200  
   201  	return result.Segments, nil
   202  }
   203  
   204  type indexPersistManager struct {
   205  	sync.Mutex
   206  
   207  	// segmentWriter holds the bulk of the re-usable in-mem resources so
   208  	// we want to share this across writers.
   209  	segmentWriter m3ninxpersist.MutableSegmentFileSetWriter
   210  
   211  	// hooks used for testing
   212  	newReaderFn            newIndexReaderFn
   213  	newPersistentSegmentFn newPersistentSegmentFn
   214  	newIndexWriterFn       newIndexWriterFn
   215  
   216  	// options used by index writers
   217  	opts Options
   218  }
   219  
   220  type newIndexReaderFn func(Options) (IndexFileSetReader, error)
   221  
   222  type newPersistentSegmentFn func(
   223  	m3ninxpersist.IndexSegmentFileSet,
   224  	m3ninxfs.Options,
   225  ) (m3ninxfs.Segment, error)
   226  
   227  type newIndexWriterFn func(Options) (IndexFileSetWriter, error)
   228  
   229  type persistManagerMetrics struct {
   230  	writeDurationMs    tally.Gauge
   231  	throttleDurationMs tally.Gauge
   232  }
   233  
   234  func newPersistManagerMetrics(scope tally.Scope) persistManagerMetrics {
   235  	return persistManagerMetrics{
   236  		writeDurationMs:    scope.Gauge("write-duration-ms"),
   237  		throttleDurationMs: scope.Gauge("throttle-duration-ms"),
   238  	}
   239  }
   240  
   241  // NewPersistManager creates a new filesystem persist manager
   242  func NewPersistManager(opts Options) (persist.Manager, error) {
   243  	var (
   244  		filePathPrefix = opts.FilePathPrefix()
   245  		scope          = opts.InstrumentOptions().MetricsScope().SubScope("persist")
   246  	)
   247  	dataWriter, err := NewWriter(opts)
   248  	if err != nil {
   249  		return nil, err
   250  	}
   251  
   252  	segmentWriter, err := m3ninxpersist.NewMutableSegmentFileSetWriter(
   253  		opts.FSTWriterOptions())
   254  	if err != nil {
   255  		return nil, err
   256  	}
   257  
   258  	pm := &persistManager{
   259  		opts:           opts,
   260  		filePathPrefix: filePathPrefix,
   261  		nowFn:          opts.ClockOptions().NowFn(),
   262  		sleepFn:        time.Sleep,
   263  		dataPM: dataPersistManager{
   264  			writer:                        dataWriter,
   265  			segmentHolder:                 make([]checked.Bytes, 2),
   266  			nextSnapshotMetadataFileIndex: NextSnapshotMetadataFileIndex,
   267  			snapshotMetadataWriter:        NewSnapshotMetadataWriter(opts),
   268  		},
   269  		indexPM: indexPersistManager{
   270  			segmentWriter: segmentWriter,
   271  			// fs opts are used by underlying index writers
   272  			opts: opts,
   273  		},
   274  		status:  persistManagerIdle,
   275  		metrics: newPersistManagerMetrics(scope),
   276  	}
   277  	pm.indexPM.newReaderFn = NewIndexReader
   278  	pm.indexPM.newPersistentSegmentFn = m3ninxpersist.NewSegment
   279  	pm.indexPM.newIndexWriterFn = NewIndexWriter
   280  	pm.runtimeOptsListener = opts.RuntimeOptionsManager().RegisterListener(pm)
   281  
   282  	return pm, nil
   283  }
   284  
   285  func (pm *persistManager) resetWithLock() error {
   286  	pm.status = persistManagerIdle
   287  	pm.start = timeZero
   288  	pm.count = 0
   289  	pm.bytesWritten = 0
   290  	pm.worked = 0
   291  	pm.slept = 0
   292  	pm.dataPM.snapshotID = nil
   293  
   294  	return pm.indexPM.segmentWriter.Reset(nil)
   295  }
   296  
   297  // StartIndexPersist is called by the databaseFlushManager to begin the persist process for
   298  // index data.
   299  func (pm *persistManager) StartIndexPersist() (persist.IndexFlush, error) {
   300  	pm.Lock()
   301  	defer pm.Unlock()
   302  
   303  	if pm.status != persistManagerIdle {
   304  		return nil, errPersistManagerNotIdle
   305  	}
   306  	pm.status = persistManagerPersistingIndex
   307  
   308  	return pm, nil
   309  }
   310  
   311  // PrepareIndex returns a prepared persist object which can be used to persist index data.
   312  func (pm *persistManager) PrepareIndex(opts persist.IndexPrepareOptions) (persist.PreparedIndexPersist, error) {
   313  	var (
   314  		nsMetadata = opts.NamespaceMetadata
   315  		blockStart = opts.BlockStart
   316  		nsID       = opts.NamespaceMetadata.ID()
   317  		prepared   persist.PreparedIndexPersist
   318  	)
   319  
   320  	// only support persistence of index flush files for now
   321  	if opts.FileSetType != persist.FileSetFlushType {
   322  		return prepared, fmt.Errorf("unable to PrepareIndex, unsupported file set type: %v", opts.FileSetType)
   323  	}
   324  
   325  	// ensure namespace has indexing enabled
   326  	if !nsMetadata.Options().IndexOptions().Enabled() {
   327  		return prepared, fmt.Errorf("unable to PrepareIndex, namespace %s does not have indexing enabled", nsID.String())
   328  	}
   329  
   330  	// ensure StartIndexPersist has been called
   331  	pm.RLock()
   332  	status := pm.status
   333  	pm.RUnlock()
   334  
   335  	// ensure StartIndexPersist has been called
   336  	if status != persistManagerPersistingIndex {
   337  		return prepared, errPersistManagerCannotPrepareIndexNotPersisting
   338  	}
   339  
   340  	// we now have all the identifier needed to uniquely specificy a single Index FileSetFile on disk.
   341  	fileSetID := FileSetFileIdentifier{
   342  		FileSetContentType: persist.FileSetIndexContentType,
   343  		Namespace:          nsID,
   344  		BlockStart:         blockStart,
   345  		VolumeIndex:        opts.VolumeIndex,
   346  	}
   347  	blockSize := nsMetadata.Options().IndexOptions().BlockSize()
   348  	idxWriterOpts := IndexWriterOpenOptions{
   349  		BlockSize:       blockSize,
   350  		FileSetType:     opts.FileSetType,
   351  		Identifier:      fileSetID,
   352  		Shards:          opts.Shards,
   353  		IndexVolumeType: opts.IndexVolumeType,
   354  	}
   355  
   356  	writer, err := pm.indexPM.newIndexWriterFn(pm.opts)
   357  	if err != nil {
   358  		return prepared, err
   359  	}
   360  	idxWriter := &singleUseIndexWriter{
   361  		manager: &pm.indexPM,
   362  		writer:  writer,
   363  		state: singleUseIndexWriterState{
   364  			// track which file we are writing in the persist manager, so we
   365  			// know which file to read back on `closeIndex` being called.
   366  			fileSetIdentifier: fileSetID,
   367  			fileSetType:       opts.FileSetType,
   368  		},
   369  	}
   370  	// create writer for required fileset file.
   371  	if err := idxWriter.writer.Open(idxWriterOpts); err != nil {
   372  		return prepared, err
   373  	}
   374  
   375  	// provide persistManager hooks into PreparedIndexPersist object
   376  	prepared.Persist = idxWriter.persistIndex
   377  	prepared.Close = idxWriter.closeIndex
   378  
   379  	return prepared, nil
   380  }
   381  
   382  // DoneIndex is called by the databaseFlushManager to finish the index persist process.
   383  func (pm *persistManager) DoneIndex() error {
   384  	pm.Lock()
   385  	defer pm.Unlock()
   386  
   387  	if pm.status != persistManagerPersistingIndex {
   388  		return errPersistManagerNotPersisting
   389  	}
   390  
   391  	// Emit timing metrics
   392  	pm.metrics.writeDurationMs.Update(float64(pm.worked / time.Millisecond))
   393  	pm.metrics.throttleDurationMs.Update(float64(pm.slept / time.Millisecond))
   394  
   395  	// Reset state
   396  	return pm.resetWithLock()
   397  }
   398  
   399  // StartFlushPersist is called by the databaseFlushManager to begin the persist process.
   400  func (pm *persistManager) StartFlushPersist() (persist.FlushPreparer, error) {
   401  	pm.Lock()
   402  	defer pm.Unlock()
   403  
   404  	if pm.status != persistManagerIdle {
   405  		return nil, errPersistManagerNotIdle
   406  	}
   407  	pm.status = persistManagerPersistingData
   408  	pm.dataPM.fileSetType = persist.FileSetFlushType
   409  
   410  	return pm, nil
   411  }
   412  
   413  // StartSnapshotPersist is called by the databaseFlushManager to begin the snapshot process.
   414  func (pm *persistManager) StartSnapshotPersist(snapshotID uuid.UUID) (persist.SnapshotPreparer, error) {
   415  	pm.Lock()
   416  	defer pm.Unlock()
   417  
   418  	if pm.status != persistManagerIdle {
   419  		return nil, errPersistManagerNotIdle
   420  	}
   421  	pm.status = persistManagerPersistingData
   422  	pm.dataPM.fileSetType = persist.FileSetSnapshotType
   423  	pm.dataPM.snapshotID = snapshotID
   424  
   425  	return pm, nil
   426  }
   427  
   428  // PrepareData returns a prepared persist object which can be used to persist data.
   429  func (pm *persistManager) PrepareData(opts persist.DataPrepareOptions) (persist.PreparedDataPersist, error) {
   430  	var (
   431  		nsMetadata   = opts.NamespaceMetadata
   432  		shard        = opts.Shard
   433  		blockStart   = opts.BlockStart
   434  		snapshotTime = opts.Snapshot.SnapshotTime
   435  		snapshotID   = pm.dataPM.snapshotID
   436  		nsID         = opts.NamespaceMetadata.ID()
   437  		prepared     persist.PreparedDataPersist
   438  	)
   439  
   440  	// ensure StartDataPersist has been called
   441  	pm.RLock()
   442  	status := pm.status
   443  	pm.RUnlock()
   444  
   445  	if status != persistManagerPersistingData {
   446  		return prepared, errPersistManagerCannotPrepareDataNotPersisting
   447  	}
   448  
   449  	exists, err := pm.dataFilesetExists(opts)
   450  	if err != nil {
   451  		return prepared, err
   452  	}
   453  
   454  	var volumeIndex int
   455  	switch opts.FileSetType {
   456  	case persist.FileSetFlushType:
   457  		// Use the volume index passed in. This ensures that the volume index is
   458  		// the same as the cold flush version.
   459  		volumeIndex = opts.VolumeIndex
   460  	case persist.FileSetSnapshotType:
   461  		// Need to work out the volume index for the next snapshot.
   462  		volumeIndex, err = NextSnapshotFileSetVolumeIndex(pm.opts.FilePathPrefix(),
   463  			nsMetadata.ID(), shard, blockStart)
   464  		if err != nil {
   465  			return prepared, err
   466  		}
   467  	}
   468  
   469  	if exists && !opts.DeleteIfExists {
   470  		// This should never happen in practice since we always track which times
   471  		// are flushed in the shard when we bootstrap (so we should never
   472  		// duplicately write out one of those files) and for snapshotting we append
   473  		// a monotonically increasing number to avoid collisions.
   474  		// instrument.
   475  		iopts := pm.opts.InstrumentOptions()
   476  		instrument.EmitAndLogInvariantViolation(iopts, func(l *zap.Logger) {
   477  			l.With(
   478  				zap.Time("blockStart", blockStart.ToTime()),
   479  				zap.String("fileSetType", opts.FileSetType.String()),
   480  				zap.Int("volumeIndex", volumeIndex),
   481  				zap.Time("snapshotStart", snapshotTime.ToTime()),
   482  				zap.String("namespace", nsID.String()),
   483  				zap.Uint32("shard", shard),
   484  			).Error("prepared writing fileset volume that already exists")
   485  		})
   486  
   487  		return prepared, errPersistManagerFileSetAlreadyExists
   488  	}
   489  
   490  	if exists && opts.DeleteIfExists {
   491  		err := DeleteFileSetAt(pm.opts.FilePathPrefix(), nsID, shard, blockStart, volumeIndex)
   492  		if err != nil {
   493  			return prepared, err
   494  		}
   495  	}
   496  
   497  	blockSize := nsMetadata.Options().RetentionOptions().BlockSize()
   498  	dataWriterOpts := DataWriterOpenOptions{
   499  		BlockSize: blockSize,
   500  		Snapshot: DataWriterSnapshotOptions{
   501  			SnapshotTime: snapshotTime,
   502  			SnapshotID:   snapshotID,
   503  		},
   504  		FileSetType: opts.FileSetType,
   505  		Identifier: FileSetFileIdentifier{
   506  			Namespace:   nsID,
   507  			Shard:       shard,
   508  			BlockStart:  blockStart,
   509  			VolumeIndex: volumeIndex,
   510  		},
   511  	}
   512  	if err := pm.dataPM.writer.Open(dataWriterOpts); err != nil {
   513  		return prepared, err
   514  	}
   515  
   516  	prepared.Persist = pm.persist
   517  	prepared.Close = pm.closeData
   518  	prepared.DeferClose = pm.deferCloseData
   519  
   520  	return prepared, nil
   521  }
   522  
   523  func (pm *persistManager) persist(
   524  	metadata persist.Metadata,
   525  	segment ts.Segment,
   526  	checksum uint32,
   527  ) error {
   528  	pm.RLock()
   529  	// Rate limit options can change dynamically
   530  	opts := pm.currRateLimitOpts
   531  	pm.RUnlock()
   532  
   533  	var (
   534  		start = pm.nowFn()
   535  		slept time.Duration
   536  	)
   537  	rateLimitMbps := opts.LimitMbps()
   538  	if opts.LimitEnabled() && rateLimitMbps > 0.0 {
   539  		if pm.start.IsZero() {
   540  			pm.start = start
   541  		} else if pm.count >= opts.LimitCheckEvery() {
   542  			target := time.Duration(float64(time.Second) * float64(pm.bytesWritten) / (rateLimitMbps * bytesPerMegabit))
   543  			if elapsed := start.Sub(pm.start); elapsed < target {
   544  				pm.sleepFn(target - elapsed)
   545  				// Recapture start for precise timing, might take some time to "wakeup"
   546  				now := pm.nowFn()
   547  				slept = now.Sub(start)
   548  				start = now
   549  			}
   550  			pm.count = 0
   551  		}
   552  	}
   553  
   554  	pm.dataPM.segmentHolder[0] = segment.Head
   555  	pm.dataPM.segmentHolder[1] = segment.Tail
   556  	err := pm.dataPM.writer.WriteAll(metadata, pm.dataPM.segmentHolder, checksum)
   557  	pm.count++
   558  	pm.bytesWritten += int64(segment.Len())
   559  
   560  	pm.worked += pm.nowFn().Sub(start)
   561  	if slept > 0 {
   562  		pm.slept += slept
   563  	}
   564  
   565  	return err
   566  }
   567  
   568  func (pm *persistManager) closeData() error {
   569  	return pm.dataPM.writer.Close()
   570  }
   571  
   572  func (pm *persistManager) deferCloseData() (persist.DataCloser, error) {
   573  	return pm.dataPM.writer.DeferClose()
   574  }
   575  
   576  // DoneFlush is called by the databaseFlushManager to finish the data persist process.
   577  func (pm *persistManager) DoneFlush() error {
   578  	pm.Lock()
   579  	defer pm.Unlock()
   580  
   581  	if pm.status != persistManagerPersistingData {
   582  		return errPersistManagerNotPersisting
   583  	}
   584  
   585  	if pm.dataPM.fileSetType != persist.FileSetFlushType {
   586  		// Should never happen since interface returned by StartSnapshotPersist does not allow it.
   587  		return errPersistManagerCannotDoneFlushNotFlush
   588  	}
   589  
   590  	return pm.doneSharedWithLock()
   591  }
   592  
   593  // DoneSnapshot is called by the databaseFlushManager to finish the snapshot persist process.
   594  func (pm *persistManager) DoneSnapshot(
   595  	snapshotUUID uuid.UUID, commitLogIdentifier persist.CommitLogFile) error {
   596  	pm.Lock()
   597  	defer pm.Unlock()
   598  
   599  	if pm.status != persistManagerPersistingData {
   600  		return errPersistManagerNotPersisting
   601  	}
   602  
   603  	if pm.dataPM.fileSetType != persist.FileSetSnapshotType {
   604  		// Should never happen since interface returned by StartFlushPersist does not allow it.
   605  		return errPersistManagerCannotDoneSnapshotNotSnapshot
   606  	}
   607  
   608  	// Need to write out a snapshot metadata and checkpoint file in the snapshot case.
   609  	nextIndex, err := pm.dataPM.nextSnapshotMetadataFileIndex(pm.opts)
   610  	if err != nil {
   611  		return fmt.Errorf(
   612  			"error determining next snapshot metadata file index: %v", err)
   613  	}
   614  
   615  	err = pm.dataPM.snapshotMetadataWriter.Write(SnapshotMetadataWriteArgs{
   616  		ID: SnapshotMetadataIdentifier{
   617  			Index: nextIndex,
   618  			UUID:  snapshotUUID,
   619  		},
   620  		CommitlogIdentifier: commitLogIdentifier,
   621  	})
   622  	if err != nil {
   623  		return fmt.Errorf("error writing out snapshot metadata file: %v", err)
   624  	}
   625  
   626  	return pm.doneSharedWithLock()
   627  }
   628  
   629  // Close all resources.
   630  func (pm *persistManager) Close() {
   631  	pm.runtimeOptsListener.Close()
   632  }
   633  
   634  func (pm *persistManager) doneSharedWithLock() error {
   635  	// Emit timing metrics
   636  	pm.metrics.writeDurationMs.Update(float64(pm.worked / time.Millisecond))
   637  	pm.metrics.throttleDurationMs.Update(float64(pm.slept / time.Millisecond))
   638  
   639  	// Reset state
   640  	return pm.resetWithLock()
   641  }
   642  
   643  func (pm *persistManager) dataFilesetExists(prepareOpts persist.DataPrepareOptions) (bool, error) {
   644  	var (
   645  		nsID       = prepareOpts.NamespaceMetadata.ID()
   646  		shard      = prepareOpts.Shard
   647  		blockStart = prepareOpts.BlockStart
   648  		volume     = prepareOpts.VolumeIndex
   649  	)
   650  
   651  	switch prepareOpts.FileSetType {
   652  	case persist.FileSetSnapshotType:
   653  		// Checking if a snapshot file exists for a block start doesn't make
   654  		// sense in this context because the logic for creating new snapshot
   655  		// files does not use the volume index provided in the prepareOpts.
   656  		// Instead, the new volume index is determined by looking at what files
   657  		// exist on disk. This means that there can never be a conflict when
   658  		// trying to write new snapshot files.
   659  		return false, nil
   660  	case persist.FileSetFlushType:
   661  		return DataFileSetExists(pm.filePathPrefix, nsID, shard, blockStart, volume)
   662  	default:
   663  		return false, fmt.Errorf(
   664  			"unable to determine if fileset exists in persist manager for fileset type: %s",
   665  			prepareOpts.FileSetType)
   666  	}
   667  }
   668  
   669  func (pm *persistManager) SetRuntimeOptions(value runtime.Options) {
   670  	pm.Lock()
   671  	pm.currRateLimitOpts = value.PersistRateLimitOptions()
   672  	pm.Unlock()
   673  }