github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/vfs/syncing_file.go (about)

     1  // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package vfs
     6  
     7  import (
     8  	"sync/atomic"
     9  
    10  	"github.com/cockroachdb/errors"
    11  )
    12  
    13  // SyncingFileOptions holds the options for a syncingFile.
    14  type SyncingFileOptions struct {
    15  	NoSyncOnClose   bool
    16  	BytesPerSync    int
    17  	PreallocateSize int
    18  }
    19  
    20  type syncingFile struct {
    21  	File
    22  	fd              uintptr
    23  	useSyncRange    bool
    24  	closing         bool
    25  	noSyncOnClose   bool
    26  	bytesPerSync    int64
    27  	preallocateSize int64
    28  	atomic          struct {
    29  		// The offset at which dirty data has been written.
    30  		offset int64
    31  		// The offset at which data has been synced. Note that if SyncFileRange is
    32  		// being used, the periodic syncing of data during writing will only ever
    33  		// sync up to offset-1MB. This is done to avoid rewriting the tail of the
    34  		// file multiple times, but has the side effect of ensuring that Close will
    35  		// sync the file's metadata.
    36  		syncOffset int64
    37  	}
    38  	preallocatedBlocks int64
    39  	syncData           func() error
    40  	syncTo             func(offset int64) error
    41  	timeDiskOp         func(op func())
    42  }
    43  
    44  // NewSyncingFile wraps a writable file and ensures that data is synced
    45  // periodically as it is written. The syncing does not provide persistency
    46  // guarantees for these periodic syncs, but is used to avoid latency spikes if
    47  // the OS automatically decides to write out a large chunk of dirty filesystem
    48  // buffers. The underlying file is fully synced upon close.
    49  func NewSyncingFile(f File, opts SyncingFileOptions) File {
    50  	s := &syncingFile{
    51  		File:            f,
    52  		noSyncOnClose:   bool(opts.NoSyncOnClose),
    53  		bytesPerSync:    int64(opts.BytesPerSync),
    54  		preallocateSize: int64(opts.PreallocateSize),
    55  	}
    56  	// Ensure a file that is opened and then closed will be synced, even if no
    57  	// data has been written to it.
    58  	s.atomic.syncOffset = -1
    59  
    60  	type fd interface {
    61  		Fd() uintptr
    62  	}
    63  	if d, ok := f.(fd); ok {
    64  		s.fd = d.Fd()
    65  	}
    66  	type dhChecker interface {
    67  		timeDiskOp(op func())
    68  	}
    69  	if d, ok := f.(dhChecker); ok {
    70  		s.timeDiskOp = d.timeDiskOp
    71  	} else {
    72  		s.timeDiskOp = func(op func()) {
    73  			op()
    74  		}
    75  	}
    76  
    77  	s.init()
    78  
    79  	if s.syncData == nil {
    80  		s.syncData = s.File.Sync
    81  	}
    82  	return WithFd(f, s)
    83  }
    84  
    85  // NB: syncingFile.Write is unsafe for concurrent use!
    86  func (f *syncingFile) Write(p []byte) (n int, err error) {
    87  	_ = f.preallocate(atomic.LoadInt64(&f.atomic.offset))
    88  
    89  	n, err = f.File.Write(p)
    90  	if err != nil {
    91  		return n, errors.WithStack(err)
    92  	}
    93  	// The offset is updated atomically so that it can be accessed safely from
    94  	// Sync.
    95  	atomic.AddInt64(&f.atomic.offset, int64(n))
    96  	if err := f.maybeSync(); err != nil {
    97  		return 0, err
    98  	}
    99  	return n, nil
   100  }
   101  
   102  func (f *syncingFile) preallocate(offset int64) error {
   103  	if f.fd == 0 || f.preallocateSize == 0 {
   104  		return nil
   105  	}
   106  
   107  	newPreallocatedBlocks := (offset + f.preallocateSize - 1) / f.preallocateSize
   108  	if newPreallocatedBlocks <= f.preallocatedBlocks {
   109  		return nil
   110  	}
   111  
   112  	length := f.preallocateSize * (newPreallocatedBlocks - f.preallocatedBlocks)
   113  	offset = f.preallocateSize * f.preallocatedBlocks
   114  	f.preallocatedBlocks = newPreallocatedBlocks
   115  	return preallocExtend(f.fd, offset, length)
   116  }
   117  
   118  func (f *syncingFile) ratchetSyncOffset(offset int64) {
   119  	for {
   120  		syncOffset := atomic.LoadInt64(&f.atomic.syncOffset)
   121  		if syncOffset >= offset {
   122  			return
   123  		}
   124  		if atomic.CompareAndSwapInt64(&f.atomic.syncOffset, syncOffset, offset) {
   125  			return
   126  		}
   127  	}
   128  }
   129  
   130  func (f *syncingFile) Sync() error {
   131  	// We update syncOffset (atomically) in order to avoid spurious syncs in
   132  	// maybeSync. Note that even if syncOffset is larger than the current file
   133  	// offset, we still need to call the underlying file's sync for persistence
   134  	// guarantees (which are not provided by sync_file_range).
   135  	f.ratchetSyncOffset(atomic.LoadInt64(&f.atomic.offset))
   136  	return f.syncData()
   137  }
   138  
   139  func (f *syncingFile) maybeSync() error {
   140  	if f.bytesPerSync <= 0 {
   141  		return nil
   142  	}
   143  
   144  	// From the RocksDB source:
   145  	//
   146  	//   We try to avoid sync to the last 1MB of data. For two reasons:
   147  	//   (1) avoid rewrite the same page that is modified later.
   148  	//   (2) for older version of OS, write can block while writing out
   149  	//       the page.
   150  	//   Xfs does neighbor page flushing outside of the specified ranges. We
   151  	//   need to make sure sync range is far from the write offset.
   152  	const syncRangeBuffer = 1 << 20 // 1 MB
   153  	offset := atomic.LoadInt64(&f.atomic.offset)
   154  	if offset <= syncRangeBuffer {
   155  		return nil
   156  	}
   157  
   158  	const syncRangeAlignment = 4 << 10 // 4 KB
   159  	syncToOffset := offset - syncRangeBuffer
   160  	syncToOffset -= syncToOffset % syncRangeAlignment
   161  	syncOffset := atomic.LoadInt64(&f.atomic.syncOffset)
   162  	if syncToOffset < 0 || (syncToOffset-syncOffset) < f.bytesPerSync {
   163  		return nil
   164  	}
   165  
   166  	if f.fd == 0 {
   167  		return errors.WithStack(f.Sync())
   168  	}
   169  
   170  	// Note that syncTo will always be called with an offset < atomic.offset. The
   171  	// syncTo implementation may choose to sync the entire file (i.e. on OSes
   172  	// which do not support syncing a portion of the file). The syncTo
   173  	// implementation must call ratchetSyncOffset with as much of the file as it
   174  	// has synced.
   175  	return errors.WithStack(f.syncTo(syncToOffset))
   176  }
   177  
   178  func (f *syncingFile) Close() error {
   179  	// Sync any data that has been written but not yet synced unless the file
   180  	// has noSyncOnClose option explicitly set.
   181  	// Note that if SyncFileRange was used, atomic.syncOffset will be less than
   182  	// atomic.offset. See syncingFile.syncToRange.
   183  	f.closing = true
   184  	if !f.noSyncOnClose || f.useSyncRange {
   185  		if atomic.LoadInt64(&f.atomic.offset) > atomic.LoadInt64(&f.atomic.syncOffset) {
   186  			if err := f.Sync(); err != nil {
   187  				return errors.WithStack(err)
   188  			}
   189  		}
   190  	}
   191  	return errors.WithStack(f.File.Close())
   192  }