github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/vfs/syncing_file_linux.go (about) 1 // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 //go:build linux && !arm 6 // +build linux,!arm 7 8 package vfs 9 10 import "syscall" 11 12 type syncFileRange func(fd int, off int64, n int64, flags int) (err error) 13 14 // sync_file_range depends on both the filesystem, and the broader kernel 15 // support. In particular, Windows Subsystem for Linux does not support 16 // sync_file_range, even when used with ext{2,3,4}. syncRangeSmokeTest performs 17 // a test of of sync_file_range, returning false on ENOSYS, and true otherwise. 18 func syncRangeSmokeTest(fd uintptr, fn syncFileRange) bool { 19 err := fn(int(fd), 0 /* offset */, 0 /* nbytes */, 0 /* flags */) 20 return err != syscall.ENOSYS 21 } 22 23 func isSyncRangeSupported(fd uintptr) bool { 24 var stat syscall.Statfs_t 25 if err := syscall.Fstatfs(int(fd), &stat); err != nil { 26 return false 27 } 28 29 // Allowlist which filesystems we allow using sync_file_range with as some 30 // filesystems treat that syscall as a noop (notably ZFS). A allowlist is 31 // used instead of a denylist in order to have a more graceful failure mode 32 // in case a filesystem we haven't tested is encountered. Currently only 33 // ext2/3/4 are known to work properly. 34 const extMagic = 0xef53 35 switch stat.Type { 36 case extMagic: 37 return syncRangeSmokeTest(fd, syscall.SyncFileRange) 38 } 39 return false 40 } 41 42 func (f *syncingFile) init() { 43 if f.fd == 0 { 44 return 45 } 46 f.timeDiskOp(func() { 47 f.useSyncRange = isSyncRangeSupported(f.fd) 48 }) 49 if f.useSyncRange { 50 f.syncTo = f.syncToRange 51 } else { 52 f.syncTo = f.syncToFdatasync 53 } 54 f.syncData = f.syncFdatasync 55 } 56 57 func (f *syncingFile) syncFdatasync() error { 58 if f.fd == 0 { 59 return f.File.Sync() 60 } 61 var err error 62 f.timeDiskOp(func() { 63 err = syscall.Fdatasync(int(f.fd)) 64 }) 65 return err 66 } 67 68 func (f *syncingFile) syncToFdatasync(_ int64) error { 69 return f.Sync() 70 } 71 72 func (f *syncingFile) syncToRange(offset int64) error { 73 const ( 74 waitBefore = 0x1 75 write = 0x2 76 // waitAfter = 0x4 77 ) 78 79 // The flags for the sync_file_range system call. Unless the file has 80 // noSyncOnClose explicitly set and it is being closed, the waitBefore 81 // flag will be set which may block the call. 82 flags := write 83 if !f.noSyncOnClose || !f.closing { 84 flags |= waitBefore 85 } 86 87 // Note that syncToRange is only called with an offset that is guaranteed to 88 // be less than atomic.offset (i.e. the write offset). This implies the 89 // syncingFile.Close will Sync the rest of the data, as well as the file's 90 // metadata. 91 f.ratchetSyncOffset(offset) 92 93 // By specifying write|waitBefore for the flags, we're instructing 94 // SyncFileRange to a) wait for any outstanding data being written to finish, 95 // and b) to queue any other dirty data blocks in the range [0,offset] for 96 // writing. The actual writing of this data will occur asynchronously. The 97 // use of `waitBefore` is to limit how much dirty data is allowed to 98 // accumulate. Linux sometimes behaves poorly when a large amount of dirty 99 // data accumulates, impacting other I/O operations. 100 var err error 101 f.timeDiskOp(func() { 102 err = syscall.SyncFileRange(int(f.fd), 0, offset, flags) 103 }) 104 return err 105 }