github.com/anacrolix/torrent@v1.61.0/storage/file-io-mmap.go (about)

     1  //go:build !wasm
     2  
     3  package storage
     4  
     5  import (
     6  	"errors"
     7  	"fmt"
     8  	"github.com/anacrolix/sync"
     9  	"io"
    10  	"io/fs"
    11  	"os"
    12  	"sync/atomic"
    13  
    14  	g "github.com/anacrolix/generics"
    15  	"github.com/anacrolix/missinggo/v2/panicif"
    16  	"github.com/edsrzf/mmap-go"
    17  )
    18  
    19  // Lock uses of shared handles, instead of having a lifetime RLock. Because sync.RWMutex is not safe
    20  // for recursive RLocks, you can't have both.
    21  const lockHandleOperations = false
    22  
    23  func init() {
    24  	s, ok := os.LookupEnv("TORRENT_STORAGE_DEFAULT_FILE_IO")
    25  	if !ok {
    26  		defaultFileIo = func() fileIo {
    27  			return &mmapFileIo{}
    28  		}
    29  		return
    30  	}
    31  	switch s {
    32  	case "mmap":
    33  		defaultFileIo = func() fileIo {
    34  			return &mmapFileIo{}
    35  		}
    36  	case "classic":
    37  		defaultFileIo = func() fileIo {
    38  			return classicFileIo{}
    39  		}
    40  	default:
    41  		panic(s)
    42  	}
    43  }
    44  
    45  type mmapFileIo struct {
    46  	mu sync.RWMutex
    47  	// We could automatically expire fileMmaps by using weak.Pointers? Currently the store never
    48  	// relinquishes its extra ref so we never clean up anyway.
    49  	paths map[string]*fileMmap
    50  }
    51  
    52  func (me *mmapFileIo) rename(from, to string) (err error) {
    53  	me.mu.Lock()
    54  	defer me.mu.Unlock()
    55  	me.close(from)
    56  	me.close(to)
    57  	return os.Rename(from, to)
    58  }
    59  
    60  func (me *mmapFileIo) close(name string) {
    61  	v, ok := me.paths[name]
    62  	if ok {
    63  		// We're forcibly closing the handle. Leave the store's ref intact so we're the only one
    64  		// that closes it, then delete it anyway. We must be holding the IO context lock to be doing
    65  		// this if we're not using operation locks.
    66  		panicif.Err(v.close())
    67  		g.MustDelete(me.paths, name)
    68  	}
    69  }
    70  
    71  func (me *mmapFileIo) flush(name string, offset, nbytes int64) error {
    72  	// Since we are only flushing writes that we created, and we don't currently unmap files after
    73  	// we've opened them, then if the mmap doesn't exist yet then there's nothing to flush.
    74  	me.mu.RLock()
    75  	defer me.mu.RUnlock()
    76  	v, ok := me.paths[name]
    77  	if !ok {
    78  		return nil
    79  	}
    80  	if !v.writable {
    81  		return nil
    82  	}
    83  	// Darwin doesn't have sync for file-offsets?!
    84  	return msync(v.m, int(offset), int(nbytes))
    85  }
    86  
    87  // Shared file access.
    88  type fileMmap struct {
    89  	// Read lock held for each handle. Write lock taken for destructive action like close.
    90  	mu       sync.RWMutex
    91  	m        mmap.MMap
    92  	f        *os.File
    93  	refs     atomic.Int32
    94  	writable bool
    95  }
    96  
    97  func (me *fileMmap) dec() error {
    98  	if me.refs.Add(-1) == 0 {
    99  		return me.close()
   100  	}
   101  	return nil
   102  }
   103  
   104  func (me *fileMmap) close() (err error) {
   105  	// I can't see any way to avoid this. We need to forcibly alter the actual state of the handle
   106  	// underneath other consumers to kick them off. Additionally, we need to exclude users of its raw
   107  	// file descriptor. This is a potential deadlock zone if handles have lifetimes that escape the
   108  	// file storage implementation (like with NewReader, which don't provide for it).
   109  	me.mu.Lock()
   110  	defer me.mu.Unlock()
   111  	// There's no double-close protection here. Not sure if that's an issue. Probably not since we
   112  	// don't evict the store's reference anywhere for now.
   113  	return errors.Join(me.m.Unmap(), me.f.Close())
   114  }
   115  
   116  func (me *fileMmap) inc() {
   117  	panicif.LessThanOrEqual(me.refs.Add(1), 0)
   118  }
   119  
   120  func (me *mmapFileIo) openForSharedRead(name string) (_ sharedFileIf, err error) {
   121  	return me.openReadOnly(name)
   122  }
   123  
   124  func (me *mmapFileIo) openForRead(name string) (_ fileReader, err error) {
   125  	sh, err := me.openReadOnly(name)
   126  	if err != nil {
   127  		return
   128  	}
   129  	return &mmapFileHandle{
   130  		shared: sh,
   131  	}, nil
   132  }
   133  
   134  func (me *mmapFileIo) openReadOnly(name string) (_ *mmapSharedFileHandle, err error) {
   135  	me.mu.Lock()
   136  	defer me.mu.Unlock()
   137  	v, ok := me.paths[name]
   138  	if ok {
   139  		return newMmapFile(v), nil
   140  	}
   141  	f, err := os.Open(name)
   142  	if err != nil {
   143  		return
   144  	}
   145  	mm, err := mmap.Map(f, mmap.RDONLY, 0)
   146  	if err != nil {
   147  		f.Close()
   148  		err = fmt.Errorf("mapping file: %w", err)
   149  		return
   150  	}
   151  	v = me.addNewMmap(name, mm, false, f)
   152  	return newMmapFile(v), nil
   153  }
   154  
   155  func (me *mmapFileIo) openForWrite(name string, size int64) (_ fileWriter, err error) {
   156  	me.mu.Lock()
   157  	defer me.mu.Unlock()
   158  	v, ok := me.paths[name]
   159  	if ok {
   160  		if int64(len(v.m)) == size && v.writable {
   161  			return newMmapFile(v), nil
   162  		} else {
   163  			// Drop the cache ref. We aren't presuming to require it to be closed here, hmm...
   164  			v.dec()
   165  			g.MustDelete(me.paths, name)
   166  		}
   167  	}
   168  	// TODO: A bunch of this can be done without holding the lock.
   169  	f, err := openFileExtra(name, os.O_RDWR)
   170  	if err != nil {
   171  		return
   172  	}
   173  	closeFile := true
   174  	defer func() {
   175  		if closeFile {
   176  			f.Close()
   177  		}
   178  	}()
   179  	err = f.Truncate(size)
   180  	if err != nil {
   181  		err = fmt.Errorf("error truncating file: %w", err)
   182  		return
   183  	}
   184  	mm, err := mmap.Map(f, mmap.RDWR, 0)
   185  	if err != nil {
   186  		return
   187  	}
   188  	// This can happen due to filesystem changes outside our control. Don't be naive.
   189  	if int64(len(mm)) != size {
   190  		err = fmt.Errorf("new mmap has wrong size %v, expected %v", len(mm), size)
   191  		mm.Unmap()
   192  		return
   193  	}
   194  	closeFile = false
   195  	return newMmapFile(me.addNewMmap(name, mm, true, f)), nil
   196  }
   197  
   198  func newMmapFile(f *fileMmap) *mmapSharedFileHandle {
   199  	if !lockHandleOperations {
   200  		// This can't fail because we have to be holding the IO context lock to be here.
   201  		panicif.False(f.mu.TryRLock())
   202  	}
   203  	ret := &mmapSharedFileHandle{
   204  		f: f,
   205  		close: sync.OnceValue[error](func() error {
   206  			if !lockHandleOperations {
   207  				f.mu.RUnlock()
   208  			}
   209  			return f.dec()
   210  		}),
   211  	}
   212  	ret.f.inc()
   213  	return ret
   214  }
   215  
   216  func (me *mmapFileIo) addNewMmap(name string, mm mmap.MMap, writable bool, f *os.File) *fileMmap {
   217  	v := &fileMmap{
   218  		m:        mm,
   219  		f:        f,
   220  		writable: writable,
   221  	}
   222  	// One for the store, one for the caller.
   223  	v.refs.Store(1)
   224  	g.MakeMapIfNil(&me.paths)
   225  	g.MapMustAssignNew(me.paths, name, v)
   226  	return v
   227  }
   228  
   229  var _ fileIo = (*mmapFileIo)(nil)
   230  
   231  type mmapSharedFileHandle struct {
   232  	f     *fileMmap
   233  	close func() error
   234  }
   235  
   236  func (me *mmapSharedFileHandle) WriteAt(p []byte, off int64) (n int, err error) {
   237  	// It's not actually worth the hassle to write using mmap here since the caller provided the
   238  	// buffer already.
   239  	return me.f.f.WriteAt(p, off)
   240  }
   241  
   242  func (me *mmapSharedFileHandle) ReadAt(p []byte, off int64) (n int, err error) {
   243  	n = copy(p, me.f.m[off:])
   244  	if n < len(p) {
   245  		if off < 0 {
   246  			err = fs.ErrInvalid
   247  			return
   248  		}
   249  	}
   250  	if off+int64(n) == int64(len(me.f.m)) {
   251  		err = io.EOF
   252  	}
   253  	return
   254  }
   255  
   256  func (me *mmapSharedFileHandle) Close() error {
   257  	return me.close()
   258  }
   259  
   260  type mmapFileHandle struct {
   261  	shared *mmapSharedFileHandle
   262  	pos    int64
   263  }
   264  
   265  func (me *mmapFileHandle) WriteTo(w io.Writer) (n int64, err error) {
   266  	b := me.shared.f.m
   267  	if me.pos >= int64(len(b)) {
   268  		return
   269  	}
   270  	n1, err := w.Write(b[me.pos:])
   271  	n = int64(n1)
   272  	me.pos += n
   273  	return
   274  }
   275  
   276  func (me *mmapFileHandle) writeToN(w io.Writer, n int64) (written int64, err error) {
   277  	mu := &me.shared.f.mu
   278  	// If this panics we need a close error.
   279  	if lockHandleOperations {
   280  		mu.RLock()
   281  	}
   282  	b := me.shared.f.m
   283  	panicif.Nil(b) // It's been closed and we need to signal that.
   284  	if me.pos >= int64(len(b)) {
   285  		return
   286  	}
   287  	b = b[me.pos:]
   288  	b = b[:min(int64(len(b)), n)]
   289  	i, err := w.Write(b)
   290  	if lockHandleOperations {
   291  		mu.RUnlock()
   292  	}
   293  	written = int64(i)
   294  	me.pos += written
   295  	return
   296  }
   297  
   298  func (me *mmapFileHandle) Close() error {
   299  	return me.shared.Close()
   300  }
   301  
   302  func (me *mmapFileHandle) Read(p []byte) (n int, err error) {
   303  	if me.pos > int64(len(me.shared.f.m)) {
   304  		err = io.EOF
   305  		return
   306  	}
   307  	n = copy(p, me.shared.f.m[me.pos:])
   308  	me.pos += int64(n)
   309  	if me.pos >= int64(len(me.shared.f.m)) {
   310  		err = io.EOF
   311  	}
   312  	return
   313  }
   314  
   315  func (me *mmapFileHandle) seekDataOrEof(offset int64) (ret int64, err error) {
   316  	// This should be fine as it's an atomic operation, on a shared file handle, so nobody will be
   317  	// relying non-atomic operations on the file. TODO: Does this require msync first so we don't
   318  	// skip our own writes.
   319  
   320  	//  We do need to protect the file descriptor as that's not synchronized outside os.File. If
   321  	//  it's already closed before we call this, that's fine, we'll get EBADF. Don't recursively
   322  	//  RLock here if we're RLocking at the reference level.
   323  	mu := &me.shared.f.mu
   324  	if lockHandleOperations {
   325  		mu.RLock()
   326  	}
   327  	ret, err = seekData(me.shared.f.f, offset)
   328  	if lockHandleOperations {
   329  		mu.RUnlock()
   330  	}
   331  	if err == nil {
   332  		me.pos = ret
   333  	} else if err == io.EOF {
   334  		err = nil
   335  		ret = int64(len(me.shared.f.m))
   336  		me.pos = ret
   337  	} else {
   338  		ret = me.pos
   339  	}
   340  	return
   341  }