github.com/petermattis/pebble@v0.0.0-20190905164901-ab51a2166067/version_set.go (about) 1 // Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package pebble 6 7 import ( 8 "fmt" 9 "io" 10 "os" 11 "sync" 12 "sync/atomic" 13 14 "github.com/petermattis/pebble/internal/base" 15 "github.com/petermattis/pebble/internal/manifest" 16 "github.com/petermattis/pebble/internal/record" 17 "github.com/petermattis/pebble/vfs" 18 ) 19 20 const numLevels = manifest.NumLevels 21 22 // Provide type aliases for the various manifest structs. 23 type bulkVersionEdit = manifest.BulkVersionEdit 24 type deletedFileEntry = manifest.DeletedFileEntry 25 type fileMetadata = manifest.FileMetadata 26 type newFileEntry = manifest.NewFileEntry 27 type version = manifest.Version 28 type versionEdit = manifest.VersionEdit 29 type versionList = manifest.VersionList 30 31 // versionSet manages a collection of immutable versions, and manages the 32 // creation of a new version from the most recent version. A new versions is 33 // created from an existing version by applying a version edit which is just 34 // like it sounds: a delta from the previous version. Version edits are logged 35 // to the manifest file, which is replayed at startup. 36 type versionSet struct { 37 // Immutable fields. 38 dirname string 39 mu *sync.Mutex 40 opts *Options 41 fs vfs.FS 42 cmp Compare 43 cmpName string 44 // Dynamic base level allows the dynamic base level computation to be 45 // disabled. Used by tests which want to create specific LSM structures. 46 dynamicBaseLevel bool 47 48 // Mutable fields. 49 versions versionList 50 picker *compactionPicker 51 52 metrics VersionMetrics 53 54 // A pointer to versionSet.addObsoleteLocked. Avoids allocating a new closure 55 // on the creation of every version. 56 obsoleteFn func(obsolete []uint64) 57 obsoleteTables []uint64 58 obsoleteManifests []uint64 59 obsoleteOptions []uint64 60 61 logNum uint64 62 prevLogNum uint64 63 nextFileNum uint64 64 logSeqNum uint64 // next seqNum to use for WAL writes 65 visibleSeqNum uint64 // visible seqNum (<= logSeqNum) 66 manifestFileNum uint64 67 68 manifestFile vfs.File 69 manifest *record.Writer 70 71 writing bool 72 writerCond sync.Cond 73 } 74 75 // load loads the version set from the manifest file. 76 func (vs *versionSet) load(dirname string, opts *Options, mu *sync.Mutex) error { 77 vs.dirname = dirname 78 vs.mu = mu 79 vs.writerCond.L = mu 80 vs.opts = opts 81 vs.fs = opts.FS 82 vs.cmp = opts.Comparer.Compare 83 vs.cmpName = opts.Comparer.Name 84 vs.dynamicBaseLevel = true 85 vs.versions.Init(mu) 86 vs.obsoleteFn = vs.addObsoleteLocked 87 // For historical reasons, the next file number is initialized to 2. 88 vs.nextFileNum = 2 89 90 // Read the CURRENT file to find the current manifest file. 91 current, err := vs.fs.Open(base.MakeFilename(dirname, fileTypeCurrent, 0)) 92 if err != nil { 93 return fmt.Errorf("pebble: could not open CURRENT file for DB %q: %v", dirname, err) 94 } 95 defer current.Close() 96 stat, err := current.Stat() 97 if err != nil { 98 return err 99 } 100 n := stat.Size() 101 if n == 0 { 102 return fmt.Errorf("pebble: CURRENT file for DB %q is empty", dirname) 103 } 104 if n > 4096 { 105 return fmt.Errorf("pebble: CURRENT file for DB %q is too large", dirname) 106 } 107 b := make([]byte, n) 108 _, err = current.ReadAt(b, 0) 109 if err != nil { 110 return err 111 } 112 if b[n-1] != '\n' { 113 return fmt.Errorf("pebble: CURRENT file for DB %q is malformed", dirname) 114 } 115 b = b[:n-1] 116 117 // Read the versionEdits in the manifest file. 118 var bve bulkVersionEdit 119 manifest, err := vs.fs.Open(dirname + string(os.PathSeparator) + string(b)) 120 if err != nil { 121 return fmt.Errorf("pebble: could not open manifest file %q for DB %q: %v", b, dirname, err) 122 } 123 defer manifest.Close() 124 rr := record.NewReader(manifest, 0 /* logNum */) 125 for { 126 r, err := rr.Next() 127 if err == io.EOF { 128 break 129 } 130 if err != nil { 131 return err 132 } 133 var ve versionEdit 134 err = ve.Decode(r) 135 if err != nil { 136 return err 137 } 138 if ve.ComparerName != "" { 139 if ve.ComparerName != vs.cmpName { 140 return fmt.Errorf("pebble: manifest file %q for DB %q: "+ 141 "comparer name from file %q != comparer name from Options %q", 142 b, dirname, ve.ComparerName, vs.cmpName) 143 } 144 } 145 bve.Accumulate(&ve) 146 if ve.LogNum != 0 { 147 vs.logNum = ve.LogNum 148 } 149 if ve.PrevLogNum != 0 { 150 vs.prevLogNum = ve.PrevLogNum 151 } 152 if ve.NextFileNum != 0 { 153 vs.nextFileNum = ve.NextFileNum 154 } 155 if ve.LastSeqNum != 0 { 156 vs.logSeqNum = ve.LastSeqNum 157 } 158 } 159 if vs.logNum == 0 || vs.nextFileNum == 0 { 160 if vs.nextFileNum == 2 { 161 // We have a freshly created DB. 162 } else { 163 return fmt.Errorf("pebble: incomplete manifest file %q for DB %q", b, dirname) 164 } 165 } 166 vs.markFileNumUsed(vs.logNum) 167 vs.markFileNumUsed(vs.prevLogNum) 168 169 newVersion, err := bve.Apply(opts, nil, vs.cmp) 170 if err != nil { 171 return err 172 } 173 vs.append(newVersion) 174 175 for i := range vs.metrics.Levels { 176 l := &vs.metrics.Levels[i] 177 l.NumFiles = int64(len(newVersion.Files[i])) 178 l.Size = uint64(totalSize(newVersion.Files[i])) 179 } 180 return nil 181 } 182 183 // logAndApply logs the version edit to the manifest, applies the version edit 184 // to the current version, and installs the new version. DB.mu must be held 185 // when calling this method and will be released temporarily while performing 186 // file I/O. 187 func (vs *versionSet) logAndApply( 188 jobID int, 189 ve *versionEdit, 190 metrics map[int]*LevelMetrics, 191 dir vfs.File, 192 ) error { 193 // Wait for any existing writing to the manifest to complete, then mark the 194 // manifest as busy. 195 for vs.writing { 196 vs.writerCond.Wait() 197 } 198 vs.writing = true 199 defer func() { 200 vs.writing = false 201 vs.writerCond.Signal() 202 }() 203 204 if ve.LogNum != 0 { 205 if ve.LogNum < vs.logNum || vs.nextFileNum <= ve.LogNum { 206 panic(fmt.Sprintf("pebble: inconsistent versionEdit logNumber %d", ve.LogNum)) 207 } 208 } 209 ve.NextFileNum = vs.nextFileNum 210 ve.LastSeqNum = atomic.LoadUint64(&vs.logSeqNum) 211 currentVersion := vs.currentVersion() 212 var newVersion *version 213 214 // Generate a new manifest if we don't currently have one, or the current one 215 // is too large. 216 var newManifestFileNum uint64 217 if vs.manifest == nil || vs.manifest.Size() >= vs.opts.MaxManifestFileSize { 218 newManifestFileNum = vs.getNextFileNum() 219 } 220 221 var picker *compactionPicker 222 if err := func() error { 223 vs.mu.Unlock() 224 defer vs.mu.Lock() 225 226 var bve bulkVersionEdit 227 bve.Accumulate(ve) 228 229 var err error 230 newVersion, err = bve.Apply(vs.opts, currentVersion, vs.cmp) 231 if err != nil { 232 return err 233 } 234 235 if newManifestFileNum != 0 { 236 if err := vs.createManifest(vs.dirname, newManifestFileNum); err != nil { 237 if vs.opts.EventListener.ManifestCreated != nil { 238 vs.opts.EventListener.ManifestCreated(ManifestCreateInfo{ 239 JobID: jobID, 240 Path: base.MakeFilename(vs.dirname, fileTypeManifest, newManifestFileNum), 241 FileNum: newManifestFileNum, 242 Err: err, 243 }) 244 } 245 return err 246 } 247 } 248 249 w, err := vs.manifest.Next() 250 if err != nil { 251 return err 252 } 253 // NB: Any error from this point on is considered fatal as we don't now if 254 // the MANIFEST write occurred or not. Trying to determine that is 255 // fraught. Instead we rely on the standard recovery mechanism run when a 256 // database is open. In particular, that mechanism generates a new MANIFEST 257 // and ensures it is synced. 258 if err := ve.Encode(w); err != nil { 259 vs.opts.Logger.Fatalf("MANIFEST write failed: %v", err) 260 return err 261 } 262 if err := vs.manifest.Flush(); err != nil { 263 vs.opts.Logger.Fatalf("MANIFEST flush failed: %v", err) 264 return err 265 } 266 if err := vs.manifestFile.Sync(); err != nil { 267 vs.opts.Logger.Fatalf("MANIFEST sync failed: %v", err) 268 return err 269 } 270 if newManifestFileNum != 0 { 271 if err := setCurrentFile(vs.dirname, vs.fs, newManifestFileNum); err != nil { 272 vs.opts.Logger.Fatalf("MANIFEST set current failed: %v", err) 273 return err 274 } 275 if err := dir.Sync(); err != nil { 276 vs.opts.Logger.Fatalf("MANIFEST dirsync failed: %v", err) 277 return err 278 } 279 if vs.opts.EventListener.ManifestCreated != nil { 280 vs.opts.EventListener.ManifestCreated(ManifestCreateInfo{ 281 JobID: jobID, 282 Path: base.MakeFilename(vs.dirname, fileTypeManifest, newManifestFileNum), 283 FileNum: newManifestFileNum, 284 }) 285 } 286 } 287 picker = newCompactionPicker(newVersion, vs.opts) 288 if !vs.dynamicBaseLevel { 289 picker.baseLevel = 1 290 } 291 return nil 292 }(); err != nil { 293 return err 294 } 295 296 // Install the new version. 297 vs.append(newVersion) 298 if ve.LogNum != 0 { 299 vs.logNum = ve.LogNum 300 } 301 if ve.PrevLogNum != 0 { 302 vs.prevLogNum = ve.PrevLogNum 303 } 304 if newManifestFileNum != 0 { 305 if vs.manifestFileNum != 0 { 306 vs.obsoleteManifests = append(vs.obsoleteManifests, vs.manifestFileNum) 307 } 308 vs.manifestFileNum = newManifestFileNum 309 } 310 vs.picker = picker 311 312 if metrics != nil { 313 for level, update := range metrics { 314 vs.metrics.Levels[level].Add(update) 315 } 316 } 317 for i := range vs.metrics.Levels { 318 l := &vs.metrics.Levels[i] 319 l.NumFiles = int64(len(newVersion.Files[i])) 320 l.Size = uint64(totalSize(newVersion.Files[i])) 321 } 322 return nil 323 } 324 325 // createManifest creates a manifest file that contains a snapshot of vs. 326 func (vs *versionSet) createManifest(dirname string, fileNum uint64) (err error) { 327 var ( 328 filename = base.MakeFilename(dirname, fileTypeManifest, fileNum) 329 manifestFile vfs.File 330 manifest *record.Writer 331 ) 332 defer func() { 333 if manifest != nil { 334 manifest.Close() 335 } 336 if manifestFile != nil { 337 manifestFile.Close() 338 } 339 if err != nil { 340 vs.fs.Remove(filename) 341 } 342 }() 343 manifestFile, err = vs.fs.Create(filename) 344 if err != nil { 345 return err 346 } 347 manifest = record.NewWriter(manifestFile) 348 349 snapshot := versionEdit{ 350 ComparerName: vs.cmpName, 351 } 352 for level, fileMetadata := range vs.currentVersion().Files { 353 for _, meta := range fileMetadata { 354 snapshot.NewFiles = append(snapshot.NewFiles, newFileEntry{ 355 Level: level, 356 Meta: meta, 357 }) 358 } 359 } 360 361 w, err1 := manifest.Next() 362 if err1 != nil { 363 return err1 364 } 365 if err := snapshot.Encode(w); err != nil { 366 return err 367 } 368 369 vs.manifest, manifest = manifest, nil 370 vs.manifestFile, manifestFile = manifestFile, nil 371 return nil 372 } 373 374 func (vs *versionSet) markFileNumUsed(fileNum uint64) { 375 if vs.nextFileNum <= fileNum { 376 vs.nextFileNum = fileNum + 1 377 } 378 } 379 380 func (vs *versionSet) getNextFileNum() uint64 { 381 x := vs.nextFileNum 382 vs.nextFileNum++ 383 return x 384 } 385 386 func (vs *versionSet) append(v *version) { 387 if v.Refs() != 0 { 388 panic("pebble: version should be unreferenced") 389 } 390 if !vs.versions.Empty() { 391 vs.versions.Back().UnrefLocked() 392 } 393 v.Deleted = vs.obsoleteFn 394 v.Ref() 395 vs.versions.PushBack(v) 396 } 397 398 func (vs *versionSet) currentVersion() *version { 399 return vs.versions.Back() 400 } 401 402 func (vs *versionSet) addLiveFileNums(m map[uint64]struct{}) { 403 current := vs.currentVersion() 404 for v := vs.versions.Front(); true; v = v.Next() { 405 for _, ff := range v.Files { 406 for _, f := range ff { 407 m[f.FileNum] = struct{}{} 408 } 409 } 410 if v == current { 411 break 412 } 413 } 414 } 415 416 func (vs *versionSet) addObsoleteLocked(obsolete []uint64) { 417 vs.obsoleteTables = append(vs.obsoleteTables, obsolete...) 418 }