github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/vfs/syncing_file.go (about) 1 // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package vfs 6 7 import ( 8 "sync/atomic" 9 10 "github.com/cockroachdb/errors" 11 ) 12 13 // SyncingFileOptions holds the options for a syncingFile. 14 type SyncingFileOptions struct { 15 // NoSyncOnClose elides the automatic Sync during Close if it's not possible 16 // to sync the remainder of the file in a non-blocking way. 17 NoSyncOnClose bool 18 BytesPerSync int 19 PreallocateSize int 20 } 21 22 type syncingFile struct { 23 File 24 // fd can be InvalidFd if the underlying File does not support it. 25 fd uintptr 26 noSyncOnClose bool 27 bytesPerSync int64 28 preallocateSize int64 29 // The offset at which dirty data has been written. 30 offset atomic.Int64 31 // The offset at which data has been synced. Note that if SyncFileRange is 32 // being used, the periodic syncing of data during writing will only ever 33 // sync up to offset-1MB. This is done to avoid rewriting the tail of the 34 // file multiple times, but has the side effect of ensuring that Close will 35 // sync the file's metadata. 36 syncOffset atomic.Int64 37 preallocatedBlocks int64 38 } 39 40 // NewSyncingFile wraps a writable file and ensures that data is synced 41 // periodically as it is written. The syncing does not provide persistency 42 // guarantees for these periodic syncs, but is used to avoid latency spikes if 43 // the OS automatically decides to write out a large chunk of dirty filesystem 44 // buffers. The underlying file is fully synced upon close. 45 func NewSyncingFile(f File, opts SyncingFileOptions) File { 46 s := &syncingFile{ 47 File: f, 48 fd: f.Fd(), 49 noSyncOnClose: bool(opts.NoSyncOnClose), 50 bytesPerSync: int64(opts.BytesPerSync), 51 preallocateSize: int64(opts.PreallocateSize), 52 } 53 // Ensure a file that is opened and then closed will be synced, even if no 54 // data has been written to it. 55 s.syncOffset.Store(-1) 56 return s 57 } 58 59 // NB: syncingFile.Write is unsafe for concurrent use! 60 func (f *syncingFile) Write(p []byte) (n int, err error) { 61 _ = f.preallocate(f.offset.Load()) 62 63 n, err = f.File.Write(p) 64 if err != nil { 65 return n, errors.WithStack(err) 66 } 67 // The offset is updated atomically so that it can be accessed safely from 68 // Sync. 69 f.offset.Add(int64(n)) 70 if err := f.maybeSync(); err != nil { 71 return 0, err 72 } 73 return n, nil 74 } 75 76 func (f *syncingFile) preallocate(offset int64) error { 77 if f.fd == InvalidFd || f.preallocateSize == 0 { 78 return nil 79 } 80 81 newPreallocatedBlocks := (offset + f.preallocateSize - 1) / f.preallocateSize 82 if newPreallocatedBlocks <= f.preallocatedBlocks { 83 return nil 84 } 85 86 length := f.preallocateSize * (newPreallocatedBlocks - f.preallocatedBlocks) 87 offset = f.preallocateSize * f.preallocatedBlocks 88 f.preallocatedBlocks = newPreallocatedBlocks 89 return f.Preallocate(offset, length) 90 } 91 92 func (f *syncingFile) ratchetSyncOffset(offset int64) { 93 for { 94 syncOffset := f.syncOffset.Load() 95 if syncOffset >= offset { 96 return 97 } 98 if f.syncOffset.CompareAndSwap(syncOffset, offset) { 99 return 100 } 101 } 102 } 103 104 func (f *syncingFile) Sync() error { 105 // We update syncOffset (atomically) in order to avoid spurious syncs in 106 // maybeSync. Note that even if syncOffset is larger than the current file 107 // offset, we still need to call the underlying file's sync for persistence 108 // guarantees which are not provided by SyncTo (or by sync_file_range on 109 // Linux). 110 f.ratchetSyncOffset(f.offset.Load()) 111 return f.SyncData() 112 } 113 114 func (f *syncingFile) maybeSync() error { 115 if f.bytesPerSync <= 0 { 116 return nil 117 } 118 119 // From the RocksDB source: 120 // 121 // We try to avoid sync to the last 1MB of data. For two reasons: 122 // (1) avoid rewrite the same page that is modified later. 123 // (2) for older version of OS, write can block while writing out 124 // the page. 125 // Xfs does neighbor page flushing outside of the specified ranges. We 126 // need to make sure sync range is far from the write offset. 127 const syncRangeBuffer = 1 << 20 // 1 MB 128 offset := f.offset.Load() 129 if offset <= syncRangeBuffer { 130 return nil 131 } 132 133 const syncRangeAlignment = 4 << 10 // 4 KB 134 syncToOffset := offset - syncRangeBuffer 135 syncToOffset -= syncToOffset % syncRangeAlignment 136 syncOffset := f.syncOffset.Load() 137 if syncToOffset < 0 || (syncToOffset-syncOffset) < f.bytesPerSync { 138 return nil 139 } 140 141 if f.fd == InvalidFd { 142 return errors.WithStack(f.Sync()) 143 } 144 145 // Note that SyncTo will always be called with an offset < atomic.offset. 146 // The SyncTo implementation may choose to sync the entire file (i.e. on 147 // OSes which do not support syncing a portion of the file). 148 fullSync, err := f.SyncTo(syncToOffset) 149 if err != nil { 150 return errors.WithStack(err) 151 } 152 if fullSync { 153 f.ratchetSyncOffset(offset) 154 } else { 155 f.ratchetSyncOffset(syncToOffset) 156 } 157 return nil 158 } 159 160 func (f *syncingFile) Close() error { 161 // Sync any data that has been written but not yet synced unless the file 162 // has noSyncOnClose option explicitly set. 163 // 164 // NB: If the file is capable of non-durability-guarantee SyncTos, and the 165 // caller has not called Sync since the last write, syncOffset is guaranteed 166 // to be less than atomic.offset. This ensures we fall into the below 167 // conditional and perform a full sync to durably persist the file. 168 if off := f.offset.Load(); off > f.syncOffset.Load() { 169 // There's still remaining dirty data. 170 171 if f.noSyncOnClose { 172 // If NoSyncOnClose is set, only perform a SyncTo. On linux, SyncTo 173 // translates to a non-blocking `sync_file_range` call which 174 // provides no persistence guarantee. Since it's non-blocking, 175 // there's no latency hit of a blocking sync call, but we still 176 // ensure we're not allowing significant dirty data to accumulate. 177 if _, err := f.File.SyncTo(off); err != nil { 178 return err 179 } 180 f.ratchetSyncOffset(off) 181 } else if err := f.Sync(); err != nil { 182 return errors.WithStack(err) 183 } 184 } 185 return errors.WithStack(f.File.Close()) 186 } 187 188 // NewSyncingFS wraps a vfs.FS with one that wraps newly created files with 189 // vfs.NewSyncingFile. 190 func NewSyncingFS(fs FS, syncOpts SyncingFileOptions) FS { 191 return &syncingFS{ 192 FS: fs, 193 syncOpts: syncOpts, 194 } 195 } 196 197 type syncingFS struct { 198 FS 199 syncOpts SyncingFileOptions 200 } 201 202 var _ FS = (*syncingFS)(nil) 203 204 func (fs *syncingFS) Create(name string) (File, error) { 205 f, err := fs.FS.Create(name) 206 if err != nil { 207 return nil, err 208 } 209 return NewSyncingFile(f, fs.syncOpts), nil 210 } 211 212 func (fs *syncingFS) ReuseForWrite(oldname, newname string) (File, error) { 213 // TODO(radu): implement this if needed. 214 panic("unimplemented") 215 }