github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/internal/vfs/syncing_file_linux.go (about) 1 // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //go:build linux && !arm 16 17 package vfs 18 19 import "syscall" 20 21 type syncFileRange func(fd int, off int64, n int64, flags int) (err error) 22 23 // sync_file_range depends on both the filesystem, and the broader kernel 24 // support. In particular, Windows Subsystem for Linux does not support 25 // sync_file_range, even when used with ext{2,3,4}. syncRangeSmokeTest performs 26 // a test of of sync_file_range, returning false on ENOSYS, and true otherwise. 27 func syncRangeSmokeTest(fd uintptr, fn syncFileRange) bool { 28 err := fn(int(fd), 0 /* offset */, 0 /* nbytes */, 0 /* flags */) 29 return err != syscall.ENOSYS 30 } 31 32 func isSyncRangeSupported(fd uintptr) bool { 33 var stat syscall.Statfs_t 34 if err := syscall.Fstatfs(int(fd), &stat); err != nil { 35 return false 36 } 37 38 // Allowlist which filesystems we allow using sync_file_range with as some 39 // filesystems treat that syscall as a noop (notably ZFS). A allowlist is 40 // used instead of a denylist in order to have a more graceful failure mode 41 // in case a filesystem we haven't tested is encountered. Currently only 42 // ext2/3/4 are known to work properly. 43 const extMagic = 0xef53 44 switch stat.Type { 45 case extMagic: 46 return syncRangeSmokeTest(fd, syscall.SyncFileRange) 47 } 48 return false 49 } 50 51 func (f *syncingFile) init() { 52 if f.fd == 0 { 53 return 54 } 55 f.timeDiskOp(func() { 56 f.useSyncRange = isSyncRangeSupported(f.fd) 57 }) 58 if f.useSyncRange { 59 f.syncTo = f.syncToRange 60 } else { 61 f.syncTo = f.syncToFdatasync 62 } 63 f.syncData = f.syncFdatasync 64 } 65 66 func (f *syncingFile) syncFdatasync() error { 67 if f.fd == 0 { 68 return f.File.Sync() 69 } 70 var err error 71 f.timeDiskOp(func() { 72 err = syscall.Fdatasync(int(f.fd)) 73 }) 74 return err 75 } 76 77 func (f *syncingFile) syncToFdatasync(_ int64) error { 78 return f.Sync() 79 } 80 81 func (f *syncingFile) syncToRange(offset int64) error { 82 const ( 83 waitBefore = 0x1 84 write = 0x2 85 // waitAfter = 0x4 86 ) 87 88 // Note that syncToRange is only called with an offset that is guaranteed to 89 // be less than atomic.offset (i.e. the write offset). This implies the 90 // syncingFile.Close will Sync the rest of the data, as well as the file's 91 // metadata. 92 f.ratchetSyncOffset(offset) 93 94 // By specifying write|waitBefore for the flags, we're instructing 95 // SyncFileRange to a) wait for any outstanding data being written to finish, 96 // and b) to queue any other dirty data blocks in the range [0,offset] for 97 // writing. The actual writing of this data will occur asynchronously. The 98 // use of `waitBefore` is to limit how much dirty data is allowed to 99 // accumulate. Linux sometimes behaves poorly when a large amount of dirty 100 // data accumulates, impacting other I/O operations. 101 var err error 102 f.timeDiskOp(func() { 103 err = syscall.SyncFileRange(int(f.fd), 0, offset, write|waitBefore) 104 }) 105 return err 106 }