github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/checkpoint.go (about) 1 // Copyright 2019 The LevelDB-Go and Pebble and Bitalostored Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package bitalostable 6 7 import ( 8 "os" 9 10 "github.com/cockroachdb/errors/oserror" 11 "github.com/zuoyebang/bitalostable/internal/base" 12 "github.com/zuoyebang/bitalostable/vfs" 13 "github.com/zuoyebang/bitalostable/vfs/atomicfs" 14 ) 15 16 // checkpointOptions hold the optional parameters to construct checkpoint 17 // snapshots. 18 type checkpointOptions struct { 19 // flushWAL set to true will force a flush and sync of the WAL prior to 20 // checkpointing. 21 flushWAL bool 22 } 23 24 // CheckpointOption set optional parameters used by `DB.Checkpoint`. 25 type CheckpointOption func(*checkpointOptions) 26 27 // WithFlushedWAL enables flushing and syncing the WAL prior to constructing a 28 // checkpoint. This guarantees that any writes committed before calling 29 // DB.Checkpoint will be part of that checkpoint. 30 // 31 // Note that this setting can only be useful in cases when some writes are 32 // performed with Sync = false. Otherwise, the guarantee will already be met. 33 // 34 // Passing this option is functionally equivalent to calling 35 // DB.LogData(nil, Sync) right before DB.Checkpoint. 36 func WithFlushedWAL() CheckpointOption { 37 return func(opt *checkpointOptions) { 38 opt.flushWAL = true 39 } 40 } 41 42 // mkdirAllAndSyncParents creates destDir and any of its missing parents. 43 // Those missing parents, as well as the closest existing ancestor, are synced. 44 // Returns a handle to the directory created at destDir. 45 func mkdirAllAndSyncParents(fs vfs.FS, destDir string) (vfs.File, error) { 46 // Collect paths for all directories between destDir (excluded) and its 47 // closest existing ancestor (included). 48 var parentPaths []string 49 foundExistingAncestor := false 50 for parentPath := fs.PathDir(destDir); parentPath != "."; parentPath = fs.PathDir(parentPath) { 51 parentPaths = append(parentPaths, parentPath) 52 _, err := fs.Stat(parentPath) 53 if err == nil { 54 // Exit loop at the closest existing ancestor. 55 foundExistingAncestor = true 56 break 57 } 58 if !oserror.IsNotExist(err) { 59 return nil, err 60 } 61 } 62 // Handle empty filesystem edge case. 63 if !foundExistingAncestor { 64 parentPaths = append(parentPaths, "") 65 } 66 // Create destDir and any of its missing parents. 67 if err := fs.MkdirAll(destDir, 0755); err != nil { 68 return nil, err 69 } 70 // Sync all the parent directories up to the closest existing ancestor, 71 // included. 72 for _, parentPath := range parentPaths { 73 parentDir, err := fs.OpenDir(parentPath) 74 if err != nil { 75 return nil, err 76 } 77 err = parentDir.Sync() 78 if err != nil { 79 _ = parentDir.Close() 80 return nil, err 81 } 82 err = parentDir.Close() 83 if err != nil { 84 return nil, err 85 } 86 } 87 return fs.OpenDir(destDir) 88 } 89 90 // Checkpoint constructs a snapshot of the DB instance in the specified 91 // directory. The WAL, MANIFEST, OPTIONS, and sstables will be copied into the 92 // snapshot. Hard links will be used when possible. Beware of the significant 93 // space overhead for a checkpoint if hard links are disabled. Also beware that 94 // even if hard links are used, the space overhead for the checkpoint will 95 // increase over time as the DB performs compactions. 96 func (d *DB) Checkpoint( 97 destDir string, opts ...CheckpointOption, 98 ) ( 99 ckErr error, /* used in deferred cleanup */ 100 ) { 101 opt := &checkpointOptions{} 102 for _, fn := range opts { 103 fn(opt) 104 } 105 106 if _, err := d.opts.FS.Stat(destDir); !oserror.IsNotExist(err) { 107 if err == nil { 108 return &os.PathError{ 109 Op: "checkpoint", 110 Path: destDir, 111 Err: oserror.ErrExist, 112 } 113 } 114 return err 115 } 116 117 if opt.flushWAL && !d.opts.DisableWAL { 118 // Write an empty log-data record to flush and sync the WAL. 119 if err := d.LogData(nil /* data */, Sync); err != nil { 120 return err 121 } 122 } 123 124 // Disable file deletions. 125 d.mu.Lock() 126 d.disableFileDeletions() 127 defer func() { 128 d.mu.Lock() 129 defer d.mu.Unlock() 130 d.enableFileDeletions() 131 }() 132 133 // TODO(peter): RocksDB provides the option to roll the manifest if the 134 // MANIFEST size is too large. Should we do this too? 135 136 // Lock the manifest before getting the current version. We need the 137 // length of the manifest that we read to match the current version that 138 // we read, otherwise we might copy a versionEdit not reflected in the 139 // sstables we copy/link. 140 d.mu.versions.logLock() 141 // Get the unflushed log files, the current version, and the current manifest 142 // file number. 143 memQueue := d.mu.mem.queue 144 current := d.mu.versions.currentVersion() 145 formatVers := d.mu.formatVers.vers 146 manifestFileNum := d.mu.versions.manifestFileNum 147 manifestSize := d.mu.versions.manifest.Size() 148 optionsFileNum := d.optionsFileNum 149 150 // Release the manifest and DB.mu so we don't block other operations on 151 // the database. 152 d.mu.versions.logUnlock() 153 d.mu.Unlock() 154 155 // Wrap the normal filesystem with one which wraps newly created files with 156 // vfs.NewSyncingFile. 157 fs := syncingFS{ 158 FS: d.opts.FS, 159 syncOpts: vfs.SyncingFileOptions{ 160 NoSyncOnClose: d.opts.NoSyncOnClose, 161 BytesPerSync: d.opts.BytesPerSync, 162 }, 163 } 164 165 // Create the dir and its parents (if necessary), and sync them. 166 var dir vfs.File 167 defer func() { 168 if dir != nil { 169 _ = dir.Close() 170 } 171 if ckErr != nil { 172 // Attempt to cleanup on error. 173 paths, _ := fs.List(destDir) 174 for _, path := range paths { 175 _ = fs.Remove(path) 176 } 177 _ = fs.Remove(destDir) 178 } 179 }() 180 dir, ckErr = mkdirAllAndSyncParents(fs, destDir) 181 if ckErr != nil { 182 return ckErr 183 } 184 185 { 186 // Link or copy the OPTIONS. 187 srcPath := base.MakeFilepath(fs, d.dirname, fileTypeOptions, optionsFileNum) 188 destPath := fs.PathJoin(destDir, fs.PathBase(srcPath)) 189 ckErr = vfs.LinkOrCopy(fs, srcPath, destPath) 190 if ckErr != nil { 191 return ckErr 192 } 193 } 194 195 { 196 // Set the format major version in the destination directory. 197 var versionMarker *atomicfs.Marker 198 versionMarker, _, ckErr = atomicfs.LocateMarker(fs, destDir, formatVersionMarkerName) 199 if ckErr != nil { 200 return ckErr 201 } 202 203 // We use the marker to encode the active format version in the 204 // marker filename. Unlike other uses of the atomic marker, 205 // there is no file with the filename `formatVers.String()` on 206 // the filesystem. 207 ckErr = versionMarker.Move(formatVers.String()) 208 if ckErr != nil { 209 return ckErr 210 } 211 ckErr = versionMarker.Close() 212 if ckErr != nil { 213 return ckErr 214 } 215 } 216 217 { 218 // Copy the MANIFEST, and create a pointer to it. We copy rather 219 // than link because additional version edits added to the 220 // MANIFEST after we took our snapshot of the sstables will 221 // reference sstables that aren't in our checkpoint. For a 222 // similar reason, we need to limit how much of the MANIFEST we 223 // copy. 224 srcPath := base.MakeFilepath(fs, d.dirname, fileTypeManifest, manifestFileNum) 225 destPath := fs.PathJoin(destDir, fs.PathBase(srcPath)) 226 ckErr = vfs.LimitedCopy(fs, srcPath, destPath, manifestSize) 227 if ckErr != nil { 228 return ckErr 229 } 230 231 // Recent format versions use an atomic marker for setting the 232 // active manifest. Older versions use the CURRENT file. The 233 // setCurrentFunc function will return a closure that will 234 // take the appropriate action for the database's format 235 // version. 236 var manifestMarker *atomicfs.Marker 237 manifestMarker, _, ckErr = atomicfs.LocateMarker(fs, destDir, manifestMarkerName) 238 if ckErr != nil { 239 return ckErr 240 } 241 ckErr = setCurrentFunc(formatVers, manifestMarker, fs, destDir, dir)(manifestFileNum) 242 if ckErr != nil { 243 return ckErr 244 } 245 ckErr = manifestMarker.Close() 246 if ckErr != nil { 247 return ckErr 248 } 249 } 250 251 // Link or copy the sstables. 252 for l := range current.Levels { 253 iter := current.Levels[l].Iter() 254 for f := iter.First(); f != nil; f = iter.Next() { 255 srcPath := base.MakeFilepath(fs, d.dirname, fileTypeTable, f.FileNum) 256 destPath := fs.PathJoin(destDir, fs.PathBase(srcPath)) 257 ckErr = vfs.LinkOrCopy(fs, srcPath, destPath) 258 if ckErr != nil { 259 return ckErr 260 } 261 } 262 } 263 264 // Copy the WAL files. We copy rather than link because WAL file recycling 265 // will cause the WAL files to be reused which would invalidate the 266 // checkpoint. 267 for i := range memQueue { 268 logNum := memQueue[i].logNum 269 if logNum == 0 { 270 continue 271 } 272 srcPath := base.MakeFilepath(fs, d.walDirname, fileTypeLog, logNum) 273 destPath := fs.PathJoin(destDir, fs.PathBase(srcPath)) 274 ckErr = vfs.Copy(fs, srcPath, destPath) 275 if ckErr != nil { 276 return ckErr 277 } 278 } 279 280 // Sync and close the checkpoint directory. 281 ckErr = dir.Sync() 282 if ckErr != nil { 283 return ckErr 284 } 285 ckErr = dir.Close() 286 dir = nil 287 return ckErr 288 }