github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/vfs/syncing_file.go (about) 1 // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package vfs 6 7 import ( 8 "sync/atomic" 9 10 "github.com/cockroachdb/errors" 11 ) 12 13 // SyncingFileOptions holds the options for a syncingFile. 14 type SyncingFileOptions struct { 15 NoSyncOnClose bool 16 BytesPerSync int 17 PreallocateSize int 18 } 19 20 type syncingFile struct { 21 File 22 fd uintptr 23 useSyncRange bool 24 closing bool 25 noSyncOnClose bool 26 bytesPerSync int64 27 preallocateSize int64 28 atomic struct { 29 // The offset at which dirty data has been written. 30 offset int64 31 // The offset at which data has been synced. Note that if SyncFileRange is 32 // being used, the periodic syncing of data during writing will only ever 33 // sync up to offset-1MB. This is done to avoid rewriting the tail of the 34 // file multiple times, but has the side effect of ensuring that Close will 35 // sync the file's metadata. 36 syncOffset int64 37 } 38 preallocatedBlocks int64 39 syncData func() error 40 syncTo func(offset int64) error 41 timeDiskOp func(op func()) 42 } 43 44 // NewSyncingFile wraps a writable file and ensures that data is synced 45 // periodically as it is written. The syncing does not provide persistency 46 // guarantees for these periodic syncs, but is used to avoid latency spikes if 47 // the OS automatically decides to write out a large chunk of dirty filesystem 48 // buffers. The underlying file is fully synced upon close. 49 func NewSyncingFile(f File, opts SyncingFileOptions) File { 50 s := &syncingFile{ 51 File: f, 52 noSyncOnClose: bool(opts.NoSyncOnClose), 53 bytesPerSync: int64(opts.BytesPerSync), 54 preallocateSize: int64(opts.PreallocateSize), 55 } 56 // Ensure a file that is opened and then closed will be synced, even if no 57 // data has been written to it. 58 s.atomic.syncOffset = -1 59 60 type fd interface { 61 Fd() uintptr 62 } 63 if d, ok := f.(fd); ok { 64 s.fd = d.Fd() 65 } 66 type dhChecker interface { 67 timeDiskOp(op func()) 68 } 69 if d, ok := f.(dhChecker); ok { 70 s.timeDiskOp = d.timeDiskOp 71 } else { 72 s.timeDiskOp = func(op func()) { 73 op() 74 } 75 } 76 77 s.init() 78 79 if s.syncData == nil { 80 s.syncData = s.File.Sync 81 } 82 return WithFd(f, s) 83 } 84 85 // NB: syncingFile.Write is unsafe for concurrent use! 86 func (f *syncingFile) Write(p []byte) (n int, err error) { 87 _ = f.preallocate(atomic.LoadInt64(&f.atomic.offset)) 88 89 n, err = f.File.Write(p) 90 if err != nil { 91 return n, errors.WithStack(err) 92 } 93 // The offset is updated atomically so that it can be accessed safely from 94 // Sync. 95 atomic.AddInt64(&f.atomic.offset, int64(n)) 96 if err := f.maybeSync(); err != nil { 97 return 0, err 98 } 99 return n, nil 100 } 101 102 func (f *syncingFile) preallocate(offset int64) error { 103 if f.fd == 0 || f.preallocateSize == 0 { 104 return nil 105 } 106 107 newPreallocatedBlocks := (offset + f.preallocateSize - 1) / f.preallocateSize 108 if newPreallocatedBlocks <= f.preallocatedBlocks { 109 return nil 110 } 111 112 length := f.preallocateSize * (newPreallocatedBlocks - f.preallocatedBlocks) 113 offset = f.preallocateSize * f.preallocatedBlocks 114 f.preallocatedBlocks = newPreallocatedBlocks 115 return preallocExtend(f.fd, offset, length) 116 } 117 118 func (f *syncingFile) ratchetSyncOffset(offset int64) { 119 for { 120 syncOffset := atomic.LoadInt64(&f.atomic.syncOffset) 121 if syncOffset >= offset { 122 return 123 } 124 if atomic.CompareAndSwapInt64(&f.atomic.syncOffset, syncOffset, offset) { 125 return 126 } 127 } 128 } 129 130 func (f *syncingFile) Sync() error { 131 // We update syncOffset (atomically) in order to avoid spurious syncs in 132 // maybeSync. Note that even if syncOffset is larger than the current file 133 // offset, we still need to call the underlying file's sync for persistence 134 // guarantees (which are not provided by sync_file_range). 135 f.ratchetSyncOffset(atomic.LoadInt64(&f.atomic.offset)) 136 return f.syncData() 137 } 138 139 func (f *syncingFile) maybeSync() error { 140 if f.bytesPerSync <= 0 { 141 return nil 142 } 143 144 // From the RocksDB source: 145 // 146 // We try to avoid sync to the last 1MB of data. For two reasons: 147 // (1) avoid rewrite the same page that is modified later. 148 // (2) for older version of OS, write can block while writing out 149 // the page. 150 // Xfs does neighbor page flushing outside of the specified ranges. We 151 // need to make sure sync range is far from the write offset. 152 const syncRangeBuffer = 1 << 20 // 1 MB 153 offset := atomic.LoadInt64(&f.atomic.offset) 154 if offset <= syncRangeBuffer { 155 return nil 156 } 157 158 const syncRangeAlignment = 4 << 10 // 4 KB 159 syncToOffset := offset - syncRangeBuffer 160 syncToOffset -= syncToOffset % syncRangeAlignment 161 syncOffset := atomic.LoadInt64(&f.atomic.syncOffset) 162 if syncToOffset < 0 || (syncToOffset-syncOffset) < f.bytesPerSync { 163 return nil 164 } 165 166 if f.fd == 0 { 167 return errors.WithStack(f.Sync()) 168 } 169 170 // Note that syncTo will always be called with an offset < atomic.offset. The 171 // syncTo implementation may choose to sync the entire file (i.e. on OSes 172 // which do not support syncing a portion of the file). The syncTo 173 // implementation must call ratchetSyncOffset with as much of the file as it 174 // has synced. 175 return errors.WithStack(f.syncTo(syncToOffset)) 176 } 177 178 func (f *syncingFile) Close() error { 179 // Sync any data that has been written but not yet synced unless the file 180 // has noSyncOnClose option explicitly set. 181 // Note that if SyncFileRange was used, atomic.syncOffset will be less than 182 // atomic.offset. See syncingFile.syncToRange. 183 f.closing = true 184 if !f.noSyncOnClose || f.useSyncRange { 185 if atomic.LoadInt64(&f.atomic.offset) > atomic.LoadInt64(&f.atomic.syncOffset) { 186 if err := f.Sync(); err != nil { 187 return errors.WithStack(err) 188 } 189 } 190 } 191 return errors.WithStack(f.File.Close()) 192 }