github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/format_major_version.go (about)

     1  // Copyright 2021 The LevelDB-Go and Pebble and Bitalostored Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package bitalostable
     6  
     7  import (
     8  	"fmt"
     9  	"strconv"
    10  
    11  	"github.com/cockroachdb/errors"
    12  	"github.com/zuoyebang/bitalostable/internal/base"
    13  	"github.com/zuoyebang/bitalostable/internal/manifest"
    14  	"github.com/zuoyebang/bitalostable/sstable"
    15  	"github.com/zuoyebang/bitalostable/vfs"
    16  	"github.com/zuoyebang/bitalostable/vfs/atomicfs"
    17  )
    18  
    19  // FormatMajorVersion is a constant controlling the format of persisted
    20  // data. Backwards incompatible changes to durable formats are gated
    21  // behind new format major versions.
    22  //
    23  // At any point, a database's format major version may be bumped.
    24  // However, once a database's format major version is increased,
    25  // previous versions of Pebble will refuse to open the database.
    26  //
    27  // The zero value format is the FormatDefault constant. The exact
    28  // FormatVersion that the default corresponds to may change with time.
    29  type FormatMajorVersion uint64
    30  
    31  // String implements fmt.Stringer.
    32  func (v FormatMajorVersion) String() string {
    33  	// NB: This must not change. It's used as the value for the the
    34  	// on-disk version marker file.
    35  	//
    36  	// Specifically, this value must always parse as a base 10 integer
    37  	// that fits in a uint64. We format it as zero-padded, 3-digit
    38  	// number today, but the padding may change.
    39  	return fmt.Sprintf("%03d", v)
    40  }
    41  
    42  const (
    43  	// 21.2 versions.
    44  
    45  	// FormatDefault leaves the format version unspecified. The
    46  	// FormatDefault constant may be ratcheted upwards over time.
    47  	FormatDefault FormatMajorVersion = iota
    48  	// FormatMostCompatible maintains the most backwards compatibility,
    49  	// maintaining bi-directional compatibility with RocksDB 6.2.1 in
    50  	// the particular configuration described in the Pebble README.
    51  	FormatMostCompatible
    52  	// formatVersionedManifestMarker is the first
    53  	// backwards-incompatible change made to Pebble, introducing the
    54  	// format-version marker file for handling backwards-incompatible
    55  	// changes more broadly, and replacing the `CURRENT` file with a
    56  	// marker file.
    57  	//
    58  	// This format version is intended as an intermediary version state.
    59  	// It is deliberately unexported to discourage direct use of this
    60  	// format major version.  Clients should use FormatVersioned which
    61  	// also ensures earlier versions of Pebble fail to open a database
    62  	// written in a future format major version.
    63  	formatVersionedManifestMarker
    64  	// FormatVersioned is a new format major version that replaces the
    65  	// old `CURRENT` file with a new 'marker' file scheme.  Previous
    66  	// Pebble versions will be unable to open the database unless
    67  	// they're aware of format versions.
    68  	FormatVersioned
    69  	// FormatSetWithDelete is a format major version that introduces a new key
    70  	// kind, base.InternalKeyKindSetWithDelete. Previous Pebble versions will be
    71  	// unable to open this database.
    72  	FormatSetWithDelete
    73  
    74  	// 22.1 versions.
    75  
    76  	// FormatBlockPropertyCollector is a format major version that introduces
    77  	// BlockPropertyCollectors.
    78  	FormatBlockPropertyCollector
    79  	// FormatSplitUserKeysMarked is a format major version that guarantees that
    80  	// all files that share user keys with neighbors are marked for compaction
    81  	// in the manifest. Ratcheting to FormatSplitUserKeysMarked will block
    82  	// (without holding mutexes) until the scan of the LSM is complete and the
    83  	// manifest has been rotated.
    84  	FormatSplitUserKeysMarked
    85  
    86  	// 22.2 versions.
    87  
    88  	// FormatSplitUserKeysMarkedCompacted is a format major version that
    89  	// guarantees that all files explicitly marked for compaction in the manifest
    90  	// have been compacted. Combined with the FormatSplitUserKeysMarked format
    91  	// major version, this version guarantees that there are no user keys split
    92  	// across multiple files within a level L1+. Ratcheting to this format version
    93  	// will block (without holding mutexes) until all necessary compactions for
    94  	// files marked for compaction are complete.
    95  	FormatSplitUserKeysMarkedCompacted
    96  	// FormatRangeKeys is a format major version that introduces range keys.
    97  	FormatRangeKeys
    98  	// FormatMinTableFormatPebblev1 is a format major version that guarantees that
    99  	// tables created by or ingested into the DB at or above this format major
   100  	// version will have a table format version of at least Pebblev1 (Block
   101  	// Properties).
   102  	FormatMinTableFormatPebblev1
   103  	// FormatPrePebblev1Marked is a format major version that guarantees that all
   104  	// sstables with a table format version pre-Pebblev1 (i.e. those that are
   105  	// guaranteed to not contain block properties) are marked for compaction in
   106  	// the manifest. Ratcheting to FormatPrePebblev1Marked will block (without
   107  	// holding mutexes) until the scan of the LSM is complete and the manifest has
   108  	// been rotated.
   109  	FormatPrePebblev1Marked
   110  
   111  	// 23.1 versions.
   112  
   113  	// FormatPrePebblev1MarkedCompacted is a format major version that
   114  	// guarantees that all sstables explicitly marked for compaction in the
   115  	// manifest have been compacted. Ratcheting to this format version will block
   116  	// (without holding mutexes) until all necessary compactions for files marked
   117  	// for compaction are complete.
   118  	FormatPrePebblev1MarkedCompacted
   119  
   120  	// FormatNewest always contains the most recent format major version.
   121  	// NB: When adding new versions, the MaxTableFormat method should also be
   122  	// updated to return the maximum allowable version for the new
   123  	// FormatMajorVersion.
   124  	FormatNewest FormatMajorVersion = FormatPrePebblev1MarkedCompacted
   125  )
   126  
   127  // MaxTableFormat returns the maximum sstable.TableFormat that can be used at
   128  // this FormatMajorVersion.
   129  func (v FormatMajorVersion) MaxTableFormat() sstable.TableFormat {
   130  	switch v {
   131  	case FormatDefault, FormatMostCompatible, formatVersionedManifestMarker,
   132  		FormatVersioned, FormatSetWithDelete:
   133  		return sstable.TableFormatRocksDBv2
   134  	case FormatBlockPropertyCollector, FormatSplitUserKeysMarked,
   135  		FormatSplitUserKeysMarkedCompacted:
   136  		return sstable.TableFormatPebblev1
   137  	case FormatRangeKeys, FormatMinTableFormatPebblev1, FormatPrePebblev1Marked,
   138  		FormatPrePebblev1MarkedCompacted:
   139  		return sstable.TableFormatPebblev2
   140  	default:
   141  		panic(fmt.Sprintf("bitalostable: unsupported format major version: %s", v))
   142  	}
   143  }
   144  
   145  // MinTableFormat returns the minimum sstable.TableFormat that can be used at
   146  // this FormatMajorVersion.
   147  func (v FormatMajorVersion) MinTableFormat() sstable.TableFormat {
   148  	switch v {
   149  	case FormatDefault, FormatMostCompatible, formatVersionedManifestMarker,
   150  		FormatVersioned, FormatSetWithDelete, FormatBlockPropertyCollector,
   151  		FormatSplitUserKeysMarked, FormatSplitUserKeysMarkedCompacted,
   152  		FormatRangeKeys:
   153  		return sstable.TableFormatLevelDB
   154  	case FormatMinTableFormatPebblev1, FormatPrePebblev1Marked,
   155  		FormatPrePebblev1MarkedCompacted:
   156  		return sstable.TableFormatPebblev1
   157  	default:
   158  		panic(fmt.Sprintf("bitalostable: unsupported format major version: %s", v))
   159  	}
   160  }
   161  
   162  // formatMajorVersionMigrations defines the migrations from one format
   163  // major version to the next. Each migration is defined as a closure
   164  // which will be invoked on the database before the new format major
   165  // version is committed. Migrations must be idempotent. Migrations are
   166  // invoked with d.mu locked.
   167  //
   168  // Each migration is responsible for invoking finalizeFormatVersUpgrade
   169  // to set the new format major version.  RatchetFormatMajorVersion will
   170  // panic if a migration returns a nil error but fails to finalize the
   171  // new format major version.
   172  var formatMajorVersionMigrations = map[FormatMajorVersion]func(*DB) error{
   173  	FormatMostCompatible: func(d *DB) error { return nil },
   174  	formatVersionedManifestMarker: func(d *DB) error {
   175  		// formatVersionedManifestMarker introduces the use of a marker
   176  		// file for pointing to the current MANIFEST file.
   177  
   178  		// Lock the manifest.
   179  		d.mu.versions.logLock()
   180  		defer d.mu.versions.logUnlock()
   181  
   182  		// Construct the filename of the currently active manifest and
   183  		// move the manifest marker to that filename. The marker is
   184  		// guaranteed to exist, because we unconditionally locate it
   185  		// during Open.
   186  		manifestFileNum := d.mu.versions.manifestFileNum
   187  		filename := base.MakeFilename(fileTypeManifest, manifestFileNum)
   188  		if err := d.mu.versions.manifestMarker.Move(filename); err != nil {
   189  			return errors.Wrap(err, "moving manifest marker")
   190  		}
   191  
   192  		// Now that we have a manifest marker file in place and pointing
   193  		// to the current MANIFEST, finalize the upgrade. If we fail for
   194  		// some reason, a retry of this migration is guaranteed to again
   195  		// move the manifest marker file to the latest manifest. If
   196  		// we're unable to finalize the upgrade, a subsequent call to
   197  		// Open will ignore the manifest marker.
   198  		if err := d.finalizeFormatVersUpgrade(formatVersionedManifestMarker); err != nil {
   199  			return err
   200  		}
   201  
   202  		// We've finalized the upgrade. All subsequent Open calls will
   203  		// ignore the CURRENT file and instead read the manifest marker.
   204  		// Before we unlock the manifest, we need to update versionSet
   205  		// to use the manifest marker on future rotations.
   206  		d.mu.versions.setCurrent = setCurrentFuncMarker(
   207  			d.mu.versions.manifestMarker,
   208  			d.mu.versions.fs,
   209  			d.mu.versions.dirname)
   210  		return nil
   211  	},
   212  	// The FormatVersioned version is split into two, each with their
   213  	// own migration to ensure the post-migration cleanup happens even
   214  	// if there's a crash immediately after finalizing the version. Once
   215  	// a new format major version is finalized, its migration will never
   216  	// run again. Post-migration cleanup like the one in the migration
   217  	// below must be performed in a separate migration or every time the
   218  	// database opens.
   219  	FormatVersioned: func(d *DB) error {
   220  		// Replace the `CURRENT` file with one that points to the
   221  		// nonexistent `MANIFEST-000000` file. If an earlier Pebble
   222  		// version that does not know about format major versions
   223  		// attempts to open the database, it will error avoiding
   224  		// accidental corruption.
   225  		if err := setCurrentFile(d.mu.versions.dirname, d.mu.versions.fs, 0); err != nil {
   226  			return err
   227  		}
   228  		return d.finalizeFormatVersUpgrade(FormatVersioned)
   229  	},
   230  	// As SetWithDelete is a new key kind, there is nothing to migrate. We can
   231  	// simply finalize the format version and we're done.
   232  	FormatSetWithDelete: func(d *DB) error {
   233  		return d.finalizeFormatVersUpgrade(FormatSetWithDelete)
   234  	},
   235  	FormatBlockPropertyCollector: func(d *DB) error {
   236  		return d.finalizeFormatVersUpgrade(FormatBlockPropertyCollector)
   237  	},
   238  	FormatSplitUserKeysMarked: func(d *DB) error {
   239  		// Mark any unmarked files with split-user keys. Note all format major
   240  		// versions migrations are invoked with DB.mu locked.
   241  		if err := d.markFilesLocked(markFilesWithSplitUserKeys(d.opts.Comparer.Equal)); err != nil {
   242  			return err
   243  		}
   244  		return d.finalizeFormatVersUpgrade(FormatSplitUserKeysMarked)
   245  	},
   246  	FormatSplitUserKeysMarkedCompacted: func(d *DB) error {
   247  		// Before finalizing the format major version, rewrite any sstables
   248  		// still marked for compaction. Note all format major versions
   249  		// migrations are invoked with DB.mu locked.
   250  		if err := d.compactMarkedFilesLocked(); err != nil {
   251  			return err
   252  		}
   253  		return d.finalizeFormatVersUpgrade(FormatSplitUserKeysMarkedCompacted)
   254  	},
   255  	FormatRangeKeys: func(d *DB) error {
   256  		return d.finalizeFormatVersUpgrade(FormatRangeKeys)
   257  	},
   258  	FormatMinTableFormatPebblev1: func(d *DB) error {
   259  		return d.finalizeFormatVersUpgrade(FormatMinTableFormatPebblev1)
   260  	},
   261  	FormatPrePebblev1Marked: func(d *DB) error {
   262  		// Mark any unmarked files that contain only table properties. Note all
   263  		// format major versions migrations are invoked with DB.mu locked.
   264  		if err := d.markFilesLocked(markFilesPrePebblev1(d.tableCache)); err != nil {
   265  			return err
   266  		}
   267  		return d.finalizeFormatVersUpgrade(FormatPrePebblev1Marked)
   268  	},
   269  	FormatPrePebblev1MarkedCompacted: func(d *DB) error {
   270  		// Before finalizing the format major version, rewrite any sstables
   271  		// still marked for compaction. Note all format major versions
   272  		// migrations are invoked with DB.mu locked.
   273  		if err := d.compactMarkedFilesLocked(); err != nil {
   274  			return err
   275  		}
   276  		return d.finalizeFormatVersUpgrade(FormatPrePebblev1MarkedCompacted)
   277  	},
   278  }
   279  
   280  const formatVersionMarkerName = `format-version`
   281  
   282  func lookupFormatMajorVersion(
   283  	fs vfs.FS, dirname string,
   284  ) (FormatMajorVersion, *atomicfs.Marker, error) {
   285  	m, versString, err := atomicfs.LocateMarker(fs, dirname, formatVersionMarkerName)
   286  	if err != nil {
   287  		return 0, nil, err
   288  	}
   289  	if versString == "" {
   290  		return FormatMostCompatible, m, nil
   291  	}
   292  	v, err := strconv.ParseUint(versString, 10, 64)
   293  	if err != nil {
   294  		return 0, nil, errors.Wrap(err, "parsing format major version")
   295  	}
   296  	vers := FormatMajorVersion(v)
   297  	if vers == FormatDefault {
   298  		return 0, nil, errors.Newf("bitalostable: default format major version should not persisted", vers)
   299  	}
   300  	if vers > FormatNewest {
   301  		return 0, nil, errors.Newf("bitalostable: database %q written in format major version %d", dirname, vers)
   302  	}
   303  	return vers, m, nil
   304  }
   305  
   306  // FormatMajorVersion returns the database's active format major
   307  // version. The format major version may be higher than the one
   308  // provided in Options when the database was opened if the existing
   309  // database was written with a higher format version.
   310  func (d *DB) FormatMajorVersion() FormatMajorVersion {
   311  	d.mu.Lock()
   312  	defer d.mu.Unlock()
   313  	return d.mu.formatVers.vers
   314  }
   315  
   316  // RatchetFormatMajorVersion ratchets the opened database's format major
   317  // version to the provided version. It errors if the provided format
   318  // major version is below the database's current version. Once a
   319  // database's format major version is upgraded, previous Pebble versions
   320  // that do not know of the format version will be unable to open the
   321  // database.
   322  func (d *DB) RatchetFormatMajorVersion(fmv FormatMajorVersion) error {
   323  	if err := d.closed.Load(); err != nil {
   324  		panic(err)
   325  	}
   326  
   327  	d.mu.Lock()
   328  	defer d.mu.Unlock()
   329  	return d.ratchetFormatMajorVersionLocked(fmv)
   330  }
   331  
   332  func (d *DB) ratchetFormatMajorVersionLocked(formatVers FormatMajorVersion) error {
   333  	if d.opts.ReadOnly {
   334  		return ErrReadOnly
   335  	}
   336  	if formatVers > FormatNewest {
   337  		// Guard against accidentally forgetting to update FormatNewest.
   338  		return errors.Errorf("bitalostable: unknown format version %d", formatVers)
   339  	}
   340  	if d.mu.formatVers.vers > formatVers {
   341  		return errors.Newf("bitalostable: database already at format major version %d; cannot reduce to %d",
   342  			d.mu.formatVers.vers, formatVers)
   343  	}
   344  	if d.mu.formatVers.ratcheting {
   345  		return errors.Newf("bitalostable: database format major version upgrade is in-progress")
   346  	}
   347  	d.mu.formatVers.ratcheting = true
   348  	defer func() { d.mu.formatVers.ratcheting = false }()
   349  
   350  	for nextVers := d.mu.formatVers.vers + 1; nextVers <= formatVers; nextVers++ {
   351  		if err := formatMajorVersionMigrations[nextVers](d); err != nil {
   352  			return errors.Wrapf(err, "migrating to version %d", nextVers)
   353  		}
   354  
   355  		// NB: The migration is responsible for calling
   356  		// finalizeFormatVersUpgrade to finalize the upgrade. This
   357  		// structure is necessary because some migrations may need to
   358  		// update in-memory state (without ever dropping locks) after
   359  		// the upgrade is finalized. Here we assert that the upgrade
   360  		// did occur.
   361  		if d.mu.formatVers.vers != nextVers {
   362  			d.opts.Logger.Fatalf("bitalostable: successful migration to format version %d never finalized the upgrade", nextVers)
   363  		}
   364  	}
   365  	return nil
   366  }
   367  
   368  // finalizeFormatVersUpgrade is typically only be called from within a
   369  // format major version migration.
   370  //
   371  // See formatMajorVersionMigrations.
   372  func (d *DB) finalizeFormatVersUpgrade(formatVers FormatMajorVersion) error {
   373  	// We use the marker to encode the active format version in the
   374  	// marker filename. Unlike other uses of the atomic marker, there is
   375  	// no file with the filename `formatVers.String()` on the
   376  	// filesystem.
   377  	if err := d.mu.formatVers.marker.Move(formatVers.String()); err != nil {
   378  		return err
   379  	}
   380  	d.mu.formatVers.vers = formatVers
   381  	d.opts.EventListener.FormatUpgrade(formatVers)
   382  	return nil
   383  }
   384  
   385  // compactMarkedFilesLocked performs a migration that schedules rewrite
   386  // compactions to compact away any sstables marked for compaction.
   387  // compactMarkedFilesLocked is run while ratcheting the database's format major
   388  // version to FormatSplitUserKeysMarkedCompacted.
   389  //
   390  // Note that while this method is called with the DB.mu held, and will not
   391  // return until all marked files have been compacted, the mutex is dropped while
   392  // waiting for compactions to complete (or for slots to free up).
   393  func (d *DB) compactMarkedFilesLocked() error {
   394  	curr := d.mu.versions.currentVersion()
   395  	for curr.Stats.MarkedForCompaction > 0 {
   396  		// Attempt to schedule a compaction to rewrite a file marked for
   397  		// compaction.
   398  		d.maybeScheduleCompactionPicker(func(picker compactionPicker, env compactionEnv) *pickedCompaction {
   399  			return picker.pickRewriteCompaction(env)
   400  		})
   401  
   402  		// The above attempt might succeed and schedule a rewrite compaction. Or
   403  		// there might not be available compaction concurrency to schedule the
   404  		// compaction.  Or compaction of the file might have already been in
   405  		// progress. In any scenario, wait until there's some change in the
   406  		// state of active compactions.
   407  
   408  		// Before waiting, check that the database hasn't been closed. Trying to
   409  		// schedule the compaction may have dropped d.mu while waiting for a
   410  		// manifest write to complete. In that dropped interim, the database may
   411  		// have been closed.
   412  		if err := d.closed.Load(); err != nil {
   413  			return err.(error)
   414  		}
   415  		// NB: Waiting on this condition variable drops d.mu while blocked.
   416  		d.mu.compact.cond.Wait()
   417  
   418  		// Some flush or compaction was scheduled or completed. Loop again to
   419  		// check again for files that must be compacted. The next iteration may
   420  		// find same file again, but that's okay. It'll eventually succeed in
   421  		// scheduling the compaction and eventually be woken by its completion.
   422  		curr = d.mu.versions.currentVersion()
   423  	}
   424  	return nil
   425  }
   426  
   427  // findFilesFunc scans the LSM for files, returning true if at least one
   428  // file was found. The returned array contains the matched files, if any, per
   429  // level.
   430  type findFilesFunc func(v *version) (found bool, files [numLevels][]*fileMetadata, _ error)
   431  
   432  // markFilesWithSplitUserKeys scans the LSM's levels 1 through 6 for adjacent
   433  // files that contain the same user key. Such arrangements of files were
   434  // permitted in RocksDB and in Pebble up to SHA a860bbad.
   435  var markFilesWithSplitUserKeys = func(equal Equal) findFilesFunc {
   436  	return func(v *version) (found bool, files [numLevels][]*fileMetadata, _ error) {
   437  		// Files with split user keys are expected to be rare and performing key
   438  		// comparisons for every file within the LSM is expensive, so drop the
   439  		// database lock while scanning the file metadata.
   440  		for l := numLevels - 1; l > 0; l-- {
   441  			iter := v.Levels[l].Iter()
   442  			var prevFile *fileMetadata
   443  			var prevUserKey []byte
   444  			for f := iter.First(); f != nil; f = iter.Next() {
   445  				if prevUserKey != nil && equal(prevUserKey, f.Smallest.UserKey) {
   446  					// NB: We may append a file twice, once as prevFile and once
   447  					// as f. That's okay, and handled below.
   448  					files[l] = append(files[l], prevFile, f)
   449  					found = true
   450  				}
   451  				if f.Largest.IsExclusiveSentinel() {
   452  					prevUserKey = nil
   453  					prevFile = nil
   454  				} else {
   455  					prevUserKey = f.Largest.UserKey
   456  					prevFile = f
   457  				}
   458  			}
   459  		}
   460  		return
   461  	}
   462  }
   463  
   464  // markFilesPrePebblev1 scans the LSM for files that do not support block
   465  // properties (i.e. a table format version pre-Pebblev1).
   466  var markFilesPrePebblev1 = func(tc *tableCacheContainer) findFilesFunc {
   467  	return func(v *version) (found bool, files [numLevels][]*fileMetadata, err error) {
   468  		for l := numLevels - 1; l > 0; l-- {
   469  			iter := v.Levels[l].Iter()
   470  			for f := iter.First(); f != nil; f = iter.Next() {
   471  				err = tc.withReader(f, func(r *sstable.Reader) error {
   472  					tf, err := r.TableFormat()
   473  					if err != nil {
   474  						return err
   475  					}
   476  					if tf < sstable.TableFormatPebblev1 {
   477  						found = true
   478  						files[l] = append(files[l], f)
   479  					}
   480  					return nil
   481  				})
   482  				if err != nil {
   483  					return
   484  				}
   485  			}
   486  		}
   487  		return
   488  	}
   489  }
   490  
   491  // markFilesLock durably marks the files that match the given findFilesFunc for
   492  // compaction.
   493  func (d *DB) markFilesLocked(findFn findFilesFunc) error {
   494  	jobID := d.mu.nextJobID
   495  	d.mu.nextJobID++
   496  
   497  	vers := d.mu.versions.currentVersion()
   498  	var (
   499  		found bool
   500  		files [numLevels][]*fileMetadata
   501  		err   error
   502  	)
   503  	func() {
   504  		// Note the unusual locking: unlock, defer Lock(). The scan of the files in
   505  		// the version does not need to block other operations that require the
   506  		// DB.mu. Drop it for the scan, before re-acquiring it.
   507  		d.mu.Unlock()
   508  		defer d.mu.Lock()
   509  		found, files, err = findFn(vers)
   510  	}()
   511  	if err != nil {
   512  		return err
   513  	}
   514  
   515  	// The database lock has been acquired again by the defer within the above
   516  	// anonymous function.
   517  	if !found {
   518  		// Nothing to do.
   519  		return nil
   520  	}
   521  
   522  	// After scanning, if we found files to mark, we fetch the current state of
   523  	// the LSM (which may have changed) and set MarkedForCompaction on the files,
   524  	// and update the version's Stats.MarkedForCompaction count, which are both
   525  	// protected by d.mu.
   526  
   527  	// Lock the manifest for a coherent view of the LSM. The database lock has
   528  	// been re-acquired by the defer within the above anonymous function.
   529  	d.mu.versions.logLock()
   530  	vers = d.mu.versions.currentVersion()
   531  	for l, filesToMark := range files {
   532  		if len(filesToMark) == 0 {
   533  			continue
   534  		}
   535  		for _, f := range filesToMark {
   536  			// Ignore files to be marked that have already been compacted or marked.
   537  			if f.CompactionState == manifest.CompactionStateCompacted ||
   538  				f.MarkedForCompaction {
   539  				continue
   540  			}
   541  			// Else, mark the file for compaction in this version.
   542  			vers.Stats.MarkedForCompaction++
   543  			f.MarkedForCompaction = true
   544  		}
   545  		// The compaction picker uses the markedForCompactionAnnotator to
   546  		// quickly find files marked for compaction, or to quickly determine
   547  		// that there are no such files marked for compaction within a level.
   548  		// A b-tree node may be annotated with an annotation recording that
   549  		// there are no files marked for compaction within the node's subtree,
   550  		// based on the assumption that it's static.
   551  		//
   552  		// Since we're marking files for compaction, these b-tree nodes'
   553  		// annotations will be out of date. Clear the compaction-picking
   554  		// annotation, so that it's recomputed the next time the compaction
   555  		// picker looks for a file marked for compaction.
   556  		vers.Levels[l].InvalidateAnnotation(markedForCompactionAnnotator{})
   557  	}
   558  
   559  	// The 'marked-for-compaction' bit is persisted in the MANIFEST file
   560  	// metadata. We've already modified the in-memory file metadata, but the
   561  	// manifest hasn't been updated. Force rotation to a new MANIFEST file,
   562  	// which will write every file metadata to the new manifest file and ensure
   563  	// that the now marked-for-compaction file metadata are persisted as marked.
   564  	// NB: This call to logAndApply will unlockthe MANIFEST, which we locked up
   565  	// above before obtaining `vers`.
   566  	return d.mu.versions.logAndApply(
   567  		jobID,
   568  		&manifest.VersionEdit{},
   569  		map[int]*LevelMetrics{},
   570  		true, /* forceRotation */
   571  		func() []compactionInfo { return d.getInProgressCompactionInfoLocked(nil) })
   572  }