github.com/petermattis/pebble@v0.0.0-20190905164901-ab51a2166067/version_set.go (about)

     1  // Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package pebble
     6  
     7  import (
     8  	"fmt"
     9  	"io"
    10  	"os"
    11  	"sync"
    12  	"sync/atomic"
    13  
    14  	"github.com/petermattis/pebble/internal/base"
    15  	"github.com/petermattis/pebble/internal/manifest"
    16  	"github.com/petermattis/pebble/internal/record"
    17  	"github.com/petermattis/pebble/vfs"
    18  )
    19  
    20  const numLevels = manifest.NumLevels
    21  
    22  // Provide type aliases for the various manifest structs.
    23  type bulkVersionEdit = manifest.BulkVersionEdit
    24  type deletedFileEntry = manifest.DeletedFileEntry
    25  type fileMetadata = manifest.FileMetadata
    26  type newFileEntry = manifest.NewFileEntry
    27  type version = manifest.Version
    28  type versionEdit = manifest.VersionEdit
    29  type versionList = manifest.VersionList
    30  
    31  // versionSet manages a collection of immutable versions, and manages the
    32  // creation of a new version from the most recent version. A new versions is
    33  // created from an existing version by applying a version edit which is just
    34  // like it sounds: a delta from the previous version. Version edits are logged
    35  // to the manifest file, which is replayed at startup.
    36  type versionSet struct {
    37  	// Immutable fields.
    38  	dirname string
    39  	mu      *sync.Mutex
    40  	opts    *Options
    41  	fs      vfs.FS
    42  	cmp     Compare
    43  	cmpName string
    44  	// Dynamic base level allows the dynamic base level computation to be
    45  	// disabled. Used by tests which want to create specific LSM structures.
    46  	dynamicBaseLevel bool
    47  
    48  	// Mutable fields.
    49  	versions versionList
    50  	picker   *compactionPicker
    51  
    52  	metrics VersionMetrics
    53  
    54  	// A pointer to versionSet.addObsoleteLocked. Avoids allocating a new closure
    55  	// on the creation of every version.
    56  	obsoleteFn        func(obsolete []uint64)
    57  	obsoleteTables    []uint64
    58  	obsoleteManifests []uint64
    59  	obsoleteOptions   []uint64
    60  
    61  	logNum          uint64
    62  	prevLogNum      uint64
    63  	nextFileNum     uint64
    64  	logSeqNum       uint64 // next seqNum to use for WAL writes
    65  	visibleSeqNum   uint64 // visible seqNum (<= logSeqNum)
    66  	manifestFileNum uint64
    67  
    68  	manifestFile vfs.File
    69  	manifest     *record.Writer
    70  
    71  	writing    bool
    72  	writerCond sync.Cond
    73  }
    74  
    75  // load loads the version set from the manifest file.
    76  func (vs *versionSet) load(dirname string, opts *Options, mu *sync.Mutex) error {
    77  	vs.dirname = dirname
    78  	vs.mu = mu
    79  	vs.writerCond.L = mu
    80  	vs.opts = opts
    81  	vs.fs = opts.FS
    82  	vs.cmp = opts.Comparer.Compare
    83  	vs.cmpName = opts.Comparer.Name
    84  	vs.dynamicBaseLevel = true
    85  	vs.versions.Init(mu)
    86  	vs.obsoleteFn = vs.addObsoleteLocked
    87  	// For historical reasons, the next file number is initialized to 2.
    88  	vs.nextFileNum = 2
    89  
    90  	// Read the CURRENT file to find the current manifest file.
    91  	current, err := vs.fs.Open(base.MakeFilename(dirname, fileTypeCurrent, 0))
    92  	if err != nil {
    93  		return fmt.Errorf("pebble: could not open CURRENT file for DB %q: %v", dirname, err)
    94  	}
    95  	defer current.Close()
    96  	stat, err := current.Stat()
    97  	if err != nil {
    98  		return err
    99  	}
   100  	n := stat.Size()
   101  	if n == 0 {
   102  		return fmt.Errorf("pebble: CURRENT file for DB %q is empty", dirname)
   103  	}
   104  	if n > 4096 {
   105  		return fmt.Errorf("pebble: CURRENT file for DB %q is too large", dirname)
   106  	}
   107  	b := make([]byte, n)
   108  	_, err = current.ReadAt(b, 0)
   109  	if err != nil {
   110  		return err
   111  	}
   112  	if b[n-1] != '\n' {
   113  		return fmt.Errorf("pebble: CURRENT file for DB %q is malformed", dirname)
   114  	}
   115  	b = b[:n-1]
   116  
   117  	// Read the versionEdits in the manifest file.
   118  	var bve bulkVersionEdit
   119  	manifest, err := vs.fs.Open(dirname + string(os.PathSeparator) + string(b))
   120  	if err != nil {
   121  		return fmt.Errorf("pebble: could not open manifest file %q for DB %q: %v", b, dirname, err)
   122  	}
   123  	defer manifest.Close()
   124  	rr := record.NewReader(manifest, 0 /* logNum */)
   125  	for {
   126  		r, err := rr.Next()
   127  		if err == io.EOF {
   128  			break
   129  		}
   130  		if err != nil {
   131  			return err
   132  		}
   133  		var ve versionEdit
   134  		err = ve.Decode(r)
   135  		if err != nil {
   136  			return err
   137  		}
   138  		if ve.ComparerName != "" {
   139  			if ve.ComparerName != vs.cmpName {
   140  				return fmt.Errorf("pebble: manifest file %q for DB %q: "+
   141  					"comparer name from file %q != comparer name from Options %q",
   142  					b, dirname, ve.ComparerName, vs.cmpName)
   143  			}
   144  		}
   145  		bve.Accumulate(&ve)
   146  		if ve.LogNum != 0 {
   147  			vs.logNum = ve.LogNum
   148  		}
   149  		if ve.PrevLogNum != 0 {
   150  			vs.prevLogNum = ve.PrevLogNum
   151  		}
   152  		if ve.NextFileNum != 0 {
   153  			vs.nextFileNum = ve.NextFileNum
   154  		}
   155  		if ve.LastSeqNum != 0 {
   156  			vs.logSeqNum = ve.LastSeqNum
   157  		}
   158  	}
   159  	if vs.logNum == 0 || vs.nextFileNum == 0 {
   160  		if vs.nextFileNum == 2 {
   161  			// We have a freshly created DB.
   162  		} else {
   163  			return fmt.Errorf("pebble: incomplete manifest file %q for DB %q", b, dirname)
   164  		}
   165  	}
   166  	vs.markFileNumUsed(vs.logNum)
   167  	vs.markFileNumUsed(vs.prevLogNum)
   168  
   169  	newVersion, err := bve.Apply(opts, nil, vs.cmp)
   170  	if err != nil {
   171  		return err
   172  	}
   173  	vs.append(newVersion)
   174  
   175  	for i := range vs.metrics.Levels {
   176  		l := &vs.metrics.Levels[i]
   177  		l.NumFiles = int64(len(newVersion.Files[i]))
   178  		l.Size = uint64(totalSize(newVersion.Files[i]))
   179  	}
   180  	return nil
   181  }
   182  
   183  // logAndApply logs the version edit to the manifest, applies the version edit
   184  // to the current version, and installs the new version. DB.mu must be held
   185  // when calling this method and will be released temporarily while performing
   186  // file I/O.
   187  func (vs *versionSet) logAndApply(
   188  	jobID int,
   189  	ve *versionEdit,
   190  	metrics map[int]*LevelMetrics,
   191  	dir vfs.File,
   192  ) error {
   193  	// Wait for any existing writing to the manifest to complete, then mark the
   194  	// manifest as busy.
   195  	for vs.writing {
   196  		vs.writerCond.Wait()
   197  	}
   198  	vs.writing = true
   199  	defer func() {
   200  		vs.writing = false
   201  		vs.writerCond.Signal()
   202  	}()
   203  
   204  	if ve.LogNum != 0 {
   205  		if ve.LogNum < vs.logNum || vs.nextFileNum <= ve.LogNum {
   206  			panic(fmt.Sprintf("pebble: inconsistent versionEdit logNumber %d", ve.LogNum))
   207  		}
   208  	}
   209  	ve.NextFileNum = vs.nextFileNum
   210  	ve.LastSeqNum = atomic.LoadUint64(&vs.logSeqNum)
   211  	currentVersion := vs.currentVersion()
   212  	var newVersion *version
   213  
   214  	// Generate a new manifest if we don't currently have one, or the current one
   215  	// is too large.
   216  	var newManifestFileNum uint64
   217  	if vs.manifest == nil || vs.manifest.Size() >= vs.opts.MaxManifestFileSize {
   218  		newManifestFileNum = vs.getNextFileNum()
   219  	}
   220  
   221  	var picker *compactionPicker
   222  	if err := func() error {
   223  		vs.mu.Unlock()
   224  		defer vs.mu.Lock()
   225  
   226  		var bve bulkVersionEdit
   227  		bve.Accumulate(ve)
   228  
   229  		var err error
   230  		newVersion, err = bve.Apply(vs.opts, currentVersion, vs.cmp)
   231  		if err != nil {
   232  			return err
   233  		}
   234  
   235  		if newManifestFileNum != 0 {
   236  			if err := vs.createManifest(vs.dirname, newManifestFileNum); err != nil {
   237  				if vs.opts.EventListener.ManifestCreated != nil {
   238  					vs.opts.EventListener.ManifestCreated(ManifestCreateInfo{
   239  						JobID:   jobID,
   240  						Path:    base.MakeFilename(vs.dirname, fileTypeManifest, newManifestFileNum),
   241  						FileNum: newManifestFileNum,
   242  						Err:     err,
   243  					})
   244  				}
   245  				return err
   246  			}
   247  		}
   248  
   249  		w, err := vs.manifest.Next()
   250  		if err != nil {
   251  			return err
   252  		}
   253  		// NB: Any error from this point on is considered fatal as we don't now if
   254  		// the MANIFEST write occurred or not. Trying to determine that is
   255  		// fraught. Instead we rely on the standard recovery mechanism run when a
   256  		// database is open. In particular, that mechanism generates a new MANIFEST
   257  		// and ensures it is synced.
   258  		if err := ve.Encode(w); err != nil {
   259  			vs.opts.Logger.Fatalf("MANIFEST write failed: %v", err)
   260  			return err
   261  		}
   262  		if err := vs.manifest.Flush(); err != nil {
   263  			vs.opts.Logger.Fatalf("MANIFEST flush failed: %v", err)
   264  			return err
   265  		}
   266  		if err := vs.manifestFile.Sync(); err != nil {
   267  			vs.opts.Logger.Fatalf("MANIFEST sync failed: %v", err)
   268  			return err
   269  		}
   270  		if newManifestFileNum != 0 {
   271  			if err := setCurrentFile(vs.dirname, vs.fs, newManifestFileNum); err != nil {
   272  				vs.opts.Logger.Fatalf("MANIFEST set current failed: %v", err)
   273  				return err
   274  			}
   275  			if err := dir.Sync(); err != nil {
   276  				vs.opts.Logger.Fatalf("MANIFEST dirsync failed: %v", err)
   277  				return err
   278  			}
   279  			if vs.opts.EventListener.ManifestCreated != nil {
   280  				vs.opts.EventListener.ManifestCreated(ManifestCreateInfo{
   281  					JobID:   jobID,
   282  					Path:    base.MakeFilename(vs.dirname, fileTypeManifest, newManifestFileNum),
   283  					FileNum: newManifestFileNum,
   284  				})
   285  			}
   286  		}
   287  		picker = newCompactionPicker(newVersion, vs.opts)
   288  		if !vs.dynamicBaseLevel {
   289  			picker.baseLevel = 1
   290  		}
   291  		return nil
   292  	}(); err != nil {
   293  		return err
   294  	}
   295  
   296  	// Install the new version.
   297  	vs.append(newVersion)
   298  	if ve.LogNum != 0 {
   299  		vs.logNum = ve.LogNum
   300  	}
   301  	if ve.PrevLogNum != 0 {
   302  		vs.prevLogNum = ve.PrevLogNum
   303  	}
   304  	if newManifestFileNum != 0 {
   305  		if vs.manifestFileNum != 0 {
   306  			vs.obsoleteManifests = append(vs.obsoleteManifests, vs.manifestFileNum)
   307  		}
   308  		vs.manifestFileNum = newManifestFileNum
   309  	}
   310  	vs.picker = picker
   311  
   312  	if metrics != nil {
   313  		for level, update := range metrics {
   314  			vs.metrics.Levels[level].Add(update)
   315  		}
   316  	}
   317  	for i := range vs.metrics.Levels {
   318  		l := &vs.metrics.Levels[i]
   319  		l.NumFiles = int64(len(newVersion.Files[i]))
   320  		l.Size = uint64(totalSize(newVersion.Files[i]))
   321  	}
   322  	return nil
   323  }
   324  
   325  // createManifest creates a manifest file that contains a snapshot of vs.
   326  func (vs *versionSet) createManifest(dirname string, fileNum uint64) (err error) {
   327  	var (
   328  		filename     = base.MakeFilename(dirname, fileTypeManifest, fileNum)
   329  		manifestFile vfs.File
   330  		manifest     *record.Writer
   331  	)
   332  	defer func() {
   333  		if manifest != nil {
   334  			manifest.Close()
   335  		}
   336  		if manifestFile != nil {
   337  			manifestFile.Close()
   338  		}
   339  		if err != nil {
   340  			vs.fs.Remove(filename)
   341  		}
   342  	}()
   343  	manifestFile, err = vs.fs.Create(filename)
   344  	if err != nil {
   345  		return err
   346  	}
   347  	manifest = record.NewWriter(manifestFile)
   348  
   349  	snapshot := versionEdit{
   350  		ComparerName: vs.cmpName,
   351  	}
   352  	for level, fileMetadata := range vs.currentVersion().Files {
   353  		for _, meta := range fileMetadata {
   354  			snapshot.NewFiles = append(snapshot.NewFiles, newFileEntry{
   355  				Level: level,
   356  				Meta:  meta,
   357  			})
   358  		}
   359  	}
   360  
   361  	w, err1 := manifest.Next()
   362  	if err1 != nil {
   363  		return err1
   364  	}
   365  	if err := snapshot.Encode(w); err != nil {
   366  		return err
   367  	}
   368  
   369  	vs.manifest, manifest = manifest, nil
   370  	vs.manifestFile, manifestFile = manifestFile, nil
   371  	return nil
   372  }
   373  
   374  func (vs *versionSet) markFileNumUsed(fileNum uint64) {
   375  	if vs.nextFileNum <= fileNum {
   376  		vs.nextFileNum = fileNum + 1
   377  	}
   378  }
   379  
   380  func (vs *versionSet) getNextFileNum() uint64 {
   381  	x := vs.nextFileNum
   382  	vs.nextFileNum++
   383  	return x
   384  }
   385  
   386  func (vs *versionSet) append(v *version) {
   387  	if v.Refs() != 0 {
   388  		panic("pebble: version should be unreferenced")
   389  	}
   390  	if !vs.versions.Empty() {
   391  		vs.versions.Back().UnrefLocked()
   392  	}
   393  	v.Deleted = vs.obsoleteFn
   394  	v.Ref()
   395  	vs.versions.PushBack(v)
   396  }
   397  
   398  func (vs *versionSet) currentVersion() *version {
   399  	return vs.versions.Back()
   400  }
   401  
   402  func (vs *versionSet) addLiveFileNums(m map[uint64]struct{}) {
   403  	current := vs.currentVersion()
   404  	for v := vs.versions.Front(); true; v = v.Next() {
   405  		for _, ff := range v.Files {
   406  			for _, f := range ff {
   407  				m[f.FileNum] = struct{}{}
   408  			}
   409  		}
   410  		if v == current {
   411  			break
   412  		}
   413  	}
   414  }
   415  
   416  func (vs *versionSet) addObsoleteLocked(obsolete []uint64) {
   417  	vs.obsoleteTables = append(vs.obsoleteTables, obsolete...)
   418  }