github.com/VertebrateResequencing/muxfys@v3.0.5+incompatible/file.go (about)

     1  // Copyright © 2017, 2018 Genome Research Limited
     2  // Author: Sendu Bala <sb10@sanger.ac.uk>.
     3  // Some of the read code in this file is inspired by the work of Ka-Hing Cheung
     4  // in https://github.com/kahing/goofys
     5  //
     6  //  This file is part of muxfys.
     7  //
     8  //  muxfys is free software: you can redistribute it and/or modify
     9  //  it under the terms of the GNU Lesser General Public License as published by
    10  //  the Free Software Foundation, either version 3 of the License, or
    11  //  (at your option) any later version.
    12  //
    13  //  muxfys is distributed in the hope that it will be useful,
    14  //  but WITHOUT ANY WARRANTY; without even the implied warranty of
    15  //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    16  //  GNU Lesser General Public License for more details.
    17  //
    18  //  You should have received a copy of the GNU Lesser General Public License
    19  //  along with muxfys. If not, see <http://www.gnu.org/licenses/>.
    20  
    21  package muxfys
    22  
    23  // This file implements pathfs.File methods for remote and cached files.
    24  
    25  import (
    26  	"io"
    27  	"os"
    28  	"strings"
    29  	"sync"
    30  	"time"
    31  
    32  	"github.com/hanwen/go-fuse/fuse"
    33  	"github.com/hanwen/go-fuse/fuse/nodefs"
    34  	"github.com/inconshreveable/log15"
    35  )
    36  
    37  // remoteFile struct is muxfys' implementation of pathfs.File for reading data
    38  // directly from a remote file system or object store.
    39  type remoteFile struct {
    40  	nodefs.File
    41  	r             *remote
    42  	path          string
    43  	mutex         sync.Mutex
    44  	attr          *fuse.Attr
    45  	readOffset    int64
    46  	readWorked    bool
    47  	readRetries   int
    48  	reader        io.ReadCloser
    49  	rpipe         *io.PipeReader
    50  	wpipe         *io.PipeWriter
    51  	writeOffset   int64
    52  	writeComplete chan bool
    53  	skips         map[int64][]byte
    54  	log15.Logger
    55  }
    56  
    57  // newRemoteFile creates a new RemoteFile. For all the methods not yet
    58  // implemented, fuse will get a not yet implemented error.
    59  func newRemoteFile(r *remote, path string, attr *fuse.Attr, create bool, logger log15.Logger) nodefs.File {
    60  	f := &remoteFile{
    61  		File:   nodefs.NewDefaultFile(),
    62  		r:      r,
    63  		path:   path,
    64  		attr:   attr,
    65  		skips:  make(map[int64][]byte),
    66  		Logger: logger.New("path", path),
    67  	}
    68  
    69  	if create {
    70  		f.rpipe, f.wpipe = io.Pipe()
    71  		ready, finished := r.uploadData(f.rpipe, path)
    72  		<-ready
    73  		f.writeComplete = finished
    74  	}
    75  
    76  	return f
    77  }
    78  
    79  // Read supports random reading of data from the file. This gets called as many
    80  // times as are needed to get through all the desired data len(buf) bytes at a
    81  // time.
    82  func (f *remoteFile) Read(buf []byte, offset int64) (fuse.ReadResult, fuse.Status) {
    83  	f.mutex.Lock()
    84  	defer f.mutex.Unlock()
    85  
    86  	if uint64(offset) >= f.attr.Size {
    87  		// nothing to read
    88  		return nil, fuse.OK
    89  	}
    90  
    91  	// handle out-of-order reads, which happen even when the user request is a
    92  	// serial read: we get offsets out of order
    93  	if f.readOffset != offset {
    94  		if f.reader != nil {
    95  			// it's really expensive dealing with constant slightly out-of-order
    96  			// requests, so we handle the typical case of the current expected
    97  			// offset being skipped for the next offset, then coming back to the
    98  			// expected one
    99  			if offset > f.readOffset && (offset-f.readOffset) < int64((len(buf)*6)) {
   100  				// read from reader until we get to the correct position,
   101  				// storing what we skipped
   102  				skippedPos := f.readOffset
   103  				skipSize := offset - f.readOffset
   104  				skipped := make([]byte, skipSize)
   105  				status := f.fillBuffer(skipped, f.readOffset)
   106  				if status != fuse.OK {
   107  					return nil, status
   108  				}
   109  				lb := int64(len(buf))
   110  				if skipSize <= lb {
   111  					f.skips[skippedPos] = skipped
   112  				} else {
   113  					var o int64
   114  					for p := skippedPos; p < offset; p += lb {
   115  						if o+lb > skipSize {
   116  							f.skips[p] = skipped[o:]
   117  							break
   118  						}
   119  						f.skips[p] = skipped[o : o+lb]
   120  						o += lb
   121  					}
   122  				}
   123  			} else if skipped, existed := f.skips[offset]; existed && len(buf) == len(skipped) {
   124  				// service the request from the bytes we previously skipped
   125  				copy(buf, skipped)
   126  				delete(f.skips, offset)
   127  				return fuse.ReadResultData(buf), fuse.OK
   128  			} else {
   129  				// we'll have to seek and wipe our skips
   130  				var status fuse.Status
   131  				f.reader, status = f.r.seek(f.reader, offset, f.path)
   132  				if status != fuse.OK {
   133  					return nil, status
   134  				}
   135  				f.skips = make(map[int64][]byte)
   136  			}
   137  		}
   138  		f.readOffset = offset
   139  	}
   140  
   141  	// if opened previously, read from existing reader and return
   142  	if f.reader != nil {
   143  		status := f.fillBuffer(buf, offset)
   144  		return fuse.ReadResultData(buf), status
   145  	}
   146  
   147  	// otherwise open remote object (if it doesn't exist, we only get an error
   148  	// when we try to fillBuffer, but that's OK)
   149  	reader, status := f.r.getObject(f.path, offset)
   150  	if status != fuse.OK {
   151  		return fuse.ReadResultData([]byte{}), status
   152  	}
   153  
   154  	// store the reader to read from later
   155  	f.reader = reader
   156  
   157  	status = f.fillBuffer(buf, offset)
   158  	if status != fuse.OK {
   159  		return fuse.ReadResultData([]byte{}), status
   160  	}
   161  	return fuse.ReadResultData(buf), status
   162  }
   163  
   164  // fillBuffer reads from our remote reader to the Read() buffer.
   165  func (f *remoteFile) fillBuffer(buf []byte, offset int64) (status fuse.Status) {
   166  	// io.ReadFull throws away errors if enough bytes were read; implement our
   167  	// own just in case weird stuff happens. It's also annoying in converting
   168  	// EOF errors to ErrUnexpectedEOF, which we don't do here
   169  	var bytesRead int
   170  	min := len(buf)
   171  	var err error
   172  	for bytesRead < min && err == nil {
   173  		var nn int
   174  		nn, err = f.reader.Read(buf[bytesRead:])
   175  		bytesRead += nn
   176  	}
   177  
   178  	if err != nil {
   179  		errc := f.reader.Close()
   180  		if errc != nil {
   181  			f.Warn("fillBuffer reader close failed", "err", errc)
   182  		}
   183  		f.reader = nil
   184  		if err == io.EOF && (int64(bytesRead)+f.readOffset == int64(f.attr.Size)) {
   185  			f.Info("fillBuffer read reached eof")
   186  			status = fuse.OK
   187  		} else {
   188  			f.Error("fillBuffer read failed", "err", err, "bytesRead", bytesRead, "readOffset", f.readOffset, "offset", offset, "buffer", len(buf), "atEOF", err == io.EOF)
   189  			if f.readWorked && f.readRetries <= 20 && strings.Contains(err.Error(), "reset by peer") {
   190  				// if connection reset by peer and a read previously worked
   191  				// we try getting a new object before trying again, to cope with
   192  				// temporary networking issues
   193  				reader, goStatus := f.r.getObject(f.path, offset)
   194  				if goStatus == fuse.OK {
   195  					f.Info("fillBuffer retry got the object")
   196  					f.reader = reader
   197  					f.readRetries++
   198  					<-time.After(1 * time.Second)
   199  					return f.fillBuffer(buf, offset)
   200  				}
   201  				f.Error("fillBuffer retry failed to get the object")
   202  			}
   203  			f.Error("fillBuffer read failed and will no longer retry")
   204  			status = f.r.statusFromErr("Read("+f.path+")", err)
   205  		}
   206  		f.readOffset = 0
   207  		return
   208  	}
   209  	f.readWorked = true
   210  	if f.readRetries > 0 {
   211  		f.Warn("fillBuffer read succeeded after retrying", "retries", f.readRetries)
   212  		f.readRetries = 0
   213  	}
   214  	f.readOffset += int64(bytesRead)
   215  	return fuse.OK
   216  }
   217  
   218  // Write supports serial writes of data directly to a remote file, where
   219  // remoteFile was made with newRemoteFile() with the create boolean set to true.
   220  func (f *remoteFile) Write(data []byte, offset int64) (uint32, fuse.Status) {
   221  	f.mutex.Lock()
   222  	defer f.mutex.Unlock()
   223  
   224  	if len(data) == 0 {
   225  		// ignore zero-length writes that come in for some reason
   226  		return uint32(0), fuse.OK
   227  	}
   228  
   229  	if offset != f.writeOffset {
   230  		// we can't handle non-serial writes
   231  		f.Warn("Write can't handle non-serial writes")
   232  		return uint32(0), fuse.EIO
   233  	}
   234  
   235  	if f.wpipe == nil {
   236  		// shouldn't happen: trying to write after close (Flush()), or without
   237  		// making the remoteFile with create true.
   238  		f.Warn("Write when wipipe nil")
   239  		return uint32(0), fuse.EIO
   240  	}
   241  
   242  	n, err := f.wpipe.Write(data)
   243  
   244  	f.writeOffset += int64(n)
   245  	f.attr.Size += uint64(n)
   246  	mTime := uint64(time.Now().Unix())
   247  	f.attr.Mtime = mTime
   248  	f.attr.Atime = mTime
   249  
   250  	return uint32(n), fuse.ToStatus(err)
   251  }
   252  
   253  // Flush, despite the name, is called for close() calls on file descriptors. It
   254  // may be called more than once at the end, and may be called at the start,
   255  // however.
   256  func (f *remoteFile) Flush() fuse.Status {
   257  	f.mutex.Lock()
   258  	defer f.mutex.Unlock()
   259  
   260  	if f.readOffset > 0 && f.reader != nil {
   261  		errc := f.reader.Close()
   262  		if errc != nil {
   263  			f.Warn("Flush reader close failed", "err", errc)
   264  		}
   265  		f.reader = nil
   266  	}
   267  
   268  	if f.writeOffset > 0 && f.wpipe != nil {
   269  		errc := f.wpipe.Close()
   270  		if errc != nil {
   271  			f.Warn("Flush wpipe close failed", "err", errc)
   272  		}
   273  		f.writeOffset = 0
   274  		worked := <-f.writeComplete
   275  		if worked {
   276  			errc = f.rpipe.Close()
   277  			if errc != nil {
   278  				f.Warn("Flush rpipe close failed", "err", errc)
   279  			}
   280  		}
   281  		f.wpipe = nil
   282  		f.rpipe = nil
   283  	}
   284  
   285  	return fuse.OK
   286  }
   287  
   288  // Release is called before the file handle is forgotten, so we do final
   289  // cleanup not done in Flush().
   290  func (f *remoteFile) Release() {
   291  	f.mutex.Lock()
   292  	defer f.mutex.Unlock()
   293  	f.skips = make(map[int64][]byte)
   294  }
   295  
   296  // Fsync always returns OK as opposed to "not implemented" so that write-sync-
   297  // write works.
   298  func (f *remoteFile) Fsync(flags int) fuse.Status {
   299  	return fuse.OK
   300  }
   301  
   302  // Truncate could be called as a prelude to writing and in alternative to
   303  // making this remoteFile in create mode.
   304  func (f *remoteFile) Truncate(size uint64) fuse.Status {
   305  	f.mutex.Lock()
   306  	defer f.mutex.Unlock()
   307  	f.attr.Size = size
   308  	if f.wpipe == nil {
   309  		f.rpipe, f.wpipe = io.Pipe()
   310  		ready, finished := f.r.uploadData(f.rpipe, f.path)
   311  		<-ready
   312  		f.writeComplete = finished
   313  	}
   314  	return fuse.OK
   315  }
   316  
   317  // cachedFile is used as a wrapper around a nodefs.loopbackFile, the only
   318  // difference being that on Write it updates the given attr's Size, Mtime and
   319  // Atime, and on Read it copies data from remote to local disk if not requested
   320  // before.
   321  type cachedFile struct {
   322  	nodefs.File
   323  	r          *remote
   324  	remotePath string
   325  	localPath  string
   326  	flags      int
   327  	attr       *fuse.Attr
   328  	remoteFile *remoteFile
   329  	openedRW   bool
   330  	mutex      sync.Mutex
   331  	log15.Logger
   332  }
   333  
   334  // newCachedFile makes a CachedFile that reads each byte from remotePath only
   335  // once, returning subsequent reads from and writing to localPath.
   336  func newCachedFile(r *remote, remotePath, localPath string, attr *fuse.Attr, flags uint32, logger log15.Logger) nodefs.File {
   337  	f := &cachedFile{
   338  		r:          r,
   339  		remotePath: remotePath,
   340  		localPath:  localPath,
   341  		flags:      int(flags),
   342  		attr:       attr,
   343  		Logger:     logger.New("rpath", remotePath, "lpath", localPath),
   344  	}
   345  	f.makeLoopback()
   346  	f.remoteFile = newRemoteFile(r, remotePath, attr, false, logger).(*remoteFile)
   347  	return f
   348  }
   349  
   350  func (f *cachedFile) makeLoopback() {
   351  	localFile, err := os.OpenFile(f.localPath, f.flags, os.FileMode(fileMode))
   352  	if err != nil {
   353  		f.Error("Could not open file", "err", err)
   354  	}
   355  
   356  	if f.flags&os.O_RDWR != 0 {
   357  		f.openedRW = true
   358  	} else {
   359  		f.openedRW = false
   360  	}
   361  
   362  	f.File = nodefs.NewLoopbackFile(localFile)
   363  }
   364  
   365  // InnerFile returns the loopbackFile that deals with local files on disk.
   366  func (f *cachedFile) InnerFile() nodefs.File {
   367  	return f.File
   368  }
   369  
   370  // Write passes the real work to our InnerFile(), also updating our cached
   371  // attr.
   372  func (f *cachedFile) Write(data []byte, offset int64) (uint32, fuse.Status) {
   373  	n, s := f.InnerFile().Write(data, offset)
   374  	size := uint64(offset) + uint64(n)
   375  	if size > f.attr.Size {
   376  		f.attr.Size = size // instead of += n, since offsets could come out of order
   377  	}
   378  	mTime := uint64(time.Now().Unix())
   379  	f.attr.Mtime = mTime
   380  	f.attr.Atime = mTime
   381  	f.r.Cached(f.localPath, NewInterval(offset, int64(n)))
   382  	return n, s
   383  }
   384  
   385  // Utimens gets called by things like `touch -d "2006-01-02 15:04:05" filename`,
   386  // and we need to update our cached attr as well as the local file.
   387  func (f *cachedFile) Utimens(Atime *time.Time, Mtime *time.Time) (status fuse.Status) {
   388  	status = f.InnerFile().Utimens(Atime, Mtime)
   389  	if status == fuse.OK {
   390  		f.attr.Atime = uint64(Atime.Unix())
   391  		f.attr.Mtime = uint64(Mtime.Unix())
   392  	}
   393  	return status
   394  }
   395  
   396  // Read checks to see if we've previously stored these bytes in our local
   397  // cached file, and if so just defers to our InnerFile(). If not, gets the data
   398  // from the remote file and stores it in the cache file.
   399  func (f *cachedFile) Read(buf []byte, offset int64) (fuse.ReadResult, fuse.Status) {
   400  	f.mutex.Lock()
   401  	defer f.mutex.Unlock()
   402  
   403  	if uint64(offset) >= f.attr.Size {
   404  		// nothing to read
   405  		return nil, fuse.OK
   406  	}
   407  
   408  	// find which bytes we haven't previously read
   409  	request := NewInterval(offset, int64(len(buf)))
   410  	if request.End >= int64(f.attr.Size-1) {
   411  		request.End = int64(f.attr.Size - 1)
   412  	}
   413  	newIvs := f.r.Uncached(f.localPath, request)
   414  
   415  	// *** have tried using a single RemoteFile per remote, and also trying to
   416  	// combine sets of reads on the same file, but performance is best just
   417  	// letting different reads on the same file interleave
   418  
   419  	// read remote data and store in cache file for the previously unread parts
   420  	for _, iv := range newIvs {
   421  		ivBuf := make([]byte, iv.Length())
   422  		_, status := f.remoteFile.Read(ivBuf, iv.Start)
   423  		if status != fuse.OK {
   424  			// we warn instead of error because this is a "normal" situation
   425  			// when trying to read from non-existent files
   426  			f.Warn("Read failed", "status", status)
   427  			return nil, status
   428  		}
   429  
   430  		// write the data to our cache file
   431  		if !f.openedRW {
   432  			f.flags = f.flags | os.O_RDWR
   433  			f.makeLoopback()
   434  		}
   435  		n, s := f.InnerFile().Write(ivBuf, iv.Start)
   436  		if s == fuse.OK && int64(n) == iv.Length() {
   437  			f.r.Cached(f.localPath, iv)
   438  		} else {
   439  			f.Error("Failed to write bytes to cache file", "read", iv.Length(), "wrote", n, "status", s)
   440  			return nil, s
   441  		}
   442  	}
   443  
   444  	// read the whole region from the cache file and return
   445  	return f.InnerFile().Read(buf, offset)
   446  }