github.com/xhghs/rclone@v1.51.1-0.20200430155106-e186a28cced8/vfs/read.go (about)

     1  package vfs
     2  
     3  import (
     4  	"context"
     5  	"io"
     6  	"os"
     7  	"sync"
     8  	"sync/atomic"
     9  	"time"
    10  
    11  	"github.com/pkg/errors"
    12  	"github.com/rclone/rclone/fs"
    13  	"github.com/rclone/rclone/fs/accounting"
    14  	"github.com/rclone/rclone/fs/chunkedreader"
    15  	"github.com/rclone/rclone/fs/hash"
    16  )
    17  
    18  // ReadFileHandle is an open for read file handle on a File
    19  type ReadFileHandle struct {
    20  	baseHandle
    21  	done        func(err error)
    22  	mu          sync.Mutex
    23  	cond        *sync.Cond // cond lock for out of sequence reads
    24  	closed      bool       // set if handle has been closed
    25  	r           *accounting.Account
    26  	readCalled  bool  // set if read has been called
    27  	size        int64 // size of the object (0 for unknown length)
    28  	offset      int64 // offset of read of o
    29  	roffset     int64 // offset of Read() calls
    30  	noSeek      bool
    31  	sizeUnknown bool // set if size of source is not known
    32  	file        *File
    33  	hash        *hash.MultiHasher
    34  	opened      bool
    35  	remote      string
    36  }
    37  
    38  // Check interfaces
    39  var (
    40  	_ io.Reader   = (*ReadFileHandle)(nil)
    41  	_ io.ReaderAt = (*ReadFileHandle)(nil)
    42  	_ io.Seeker   = (*ReadFileHandle)(nil)
    43  	_ io.Closer   = (*ReadFileHandle)(nil)
    44  )
    45  
    46  func newReadFileHandle(f *File) (*ReadFileHandle, error) {
    47  	var mhash *hash.MultiHasher
    48  	var err error
    49  	o := f.getObject()
    50  	if !f.d.vfs.Opt.NoChecksum {
    51  		hashes := hash.NewHashSet(o.Fs().Hashes().GetOne()) // just pick one hash
    52  		mhash, err = hash.NewMultiHasherTypes(hashes)
    53  		if err != nil {
    54  			fs.Errorf(o.Fs(), "newReadFileHandle hash error: %v", err)
    55  		}
    56  	}
    57  
    58  	fh := &ReadFileHandle{
    59  		remote:      o.Remote(),
    60  		noSeek:      f.d.vfs.Opt.NoSeek,
    61  		file:        f,
    62  		hash:        mhash,
    63  		size:        nonNegative(o.Size()),
    64  		sizeUnknown: o.Size() < 0,
    65  	}
    66  	fh.cond = sync.NewCond(&fh.mu)
    67  	return fh, nil
    68  }
    69  
    70  // openPending opens the file if there is a pending open
    71  // call with the lock held
    72  func (fh *ReadFileHandle) openPending() (err error) {
    73  	if fh.opened {
    74  		return nil
    75  	}
    76  	o := fh.file.getObject()
    77  	r, err := chunkedreader.New(context.TODO(), o, int64(fh.file.d.vfs.Opt.ChunkSize), int64(fh.file.d.vfs.Opt.ChunkSizeLimit)).Open()
    78  	if err != nil {
    79  		return err
    80  	}
    81  	tr := accounting.GlobalStats().NewTransfer(o)
    82  	fh.done = tr.Done
    83  	fh.r = tr.Account(r).WithBuffer() // account the transfer
    84  	fh.opened = true
    85  
    86  	return nil
    87  }
    88  
    89  // String converts it to printable
    90  func (fh *ReadFileHandle) String() string {
    91  	if fh == nil {
    92  		return "<nil *ReadFileHandle>"
    93  	}
    94  	fh.mu.Lock()
    95  	defer fh.mu.Unlock()
    96  	if fh.file == nil {
    97  		return "<nil *ReadFileHandle.file>"
    98  	}
    99  	return fh.file.String() + " (r)"
   100  }
   101  
   102  // Node returns the Node assocuated with this - satisfies Noder interface
   103  func (fh *ReadFileHandle) Node() Node {
   104  	fh.mu.Lock()
   105  	defer fh.mu.Unlock()
   106  	return fh.file
   107  }
   108  
   109  // seek to a new offset
   110  //
   111  // if reopen is true, then we won't attempt to use an io.Seeker interface
   112  //
   113  // Must be called with fh.mu held
   114  func (fh *ReadFileHandle) seek(offset int64, reopen bool) (err error) {
   115  	if fh.noSeek {
   116  		return ESPIPE
   117  	}
   118  	fh.hash = nil
   119  	if !reopen {
   120  		ar := fh.r.GetAsyncReader()
   121  		// try to fullfill the seek with buffer discard
   122  		if ar != nil && ar.SkipBytes(int(offset-fh.offset)) {
   123  			fh.offset = offset
   124  			return nil
   125  		}
   126  	}
   127  	fh.r.StopBuffering() // stop the background reading first
   128  	oldReader := fh.r.GetReader()
   129  	r, ok := oldReader.(*chunkedreader.ChunkedReader)
   130  	if !ok {
   131  		fs.Logf(fh.remote, "ReadFileHandle.Read expected reader to be a ChunkedReader, got %T", oldReader)
   132  		reopen = true
   133  	}
   134  	if !reopen {
   135  		fs.Debugf(fh.remote, "ReadFileHandle.seek from %d to %d (fs.RangeSeeker)", fh.offset, offset)
   136  		_, err = r.RangeSeek(context.TODO(), offset, io.SeekStart, -1)
   137  		if err != nil {
   138  			fs.Debugf(fh.remote, "ReadFileHandle.Read fs.RangeSeeker failed: %v", err)
   139  			return err
   140  		}
   141  	} else {
   142  		fs.Debugf(fh.remote, "ReadFileHandle.seek from %d to %d", fh.offset, offset)
   143  		// close old one
   144  		err = oldReader.Close()
   145  		if err != nil {
   146  			fs.Debugf(fh.remote, "ReadFileHandle.Read seek close old failed: %v", err)
   147  		}
   148  		// re-open with a seek
   149  		o := fh.file.getObject()
   150  		r = chunkedreader.New(context.TODO(), o, int64(fh.file.d.vfs.Opt.ChunkSize), int64(fh.file.d.vfs.Opt.ChunkSizeLimit))
   151  		_, err := r.Seek(offset, 0)
   152  		if err != nil {
   153  			fs.Debugf(fh.remote, "ReadFileHandle.Read seek failed: %v", err)
   154  			return err
   155  		}
   156  		r, err = r.Open()
   157  		if err != nil {
   158  			fs.Debugf(fh.remote, "ReadFileHandle.Read seek failed: %v", err)
   159  			return err
   160  		}
   161  	}
   162  	fh.r.UpdateReader(r)
   163  	fh.offset = offset
   164  	return nil
   165  }
   166  
   167  // Seek the file - returns ESPIPE if seeking isn't possible
   168  func (fh *ReadFileHandle) Seek(offset int64, whence int) (n int64, err error) {
   169  	fh.mu.Lock()
   170  	defer fh.mu.Unlock()
   171  	if fh.noSeek {
   172  		return 0, ESPIPE
   173  	}
   174  	size := fh.size
   175  	switch whence {
   176  	case io.SeekStart:
   177  		fh.roffset = 0
   178  	case io.SeekEnd:
   179  		fh.roffset = size
   180  	}
   181  	fh.roffset += offset
   182  	// we don't check the offset - the next Read will
   183  	return fh.roffset, nil
   184  }
   185  
   186  // ReadAt reads len(p) bytes into p starting at offset off in the
   187  // underlying input source. It returns the number of bytes read (0 <=
   188  // n <= len(p)) and any error encountered.
   189  //
   190  // When ReadAt returns n < len(p), it returns a non-nil error
   191  // explaining why more bytes were not returned. In this respect,
   192  // ReadAt is stricter than Read.
   193  //
   194  // Even if ReadAt returns n < len(p), it may use all of p as scratch
   195  // space during the call. If some data is available but not len(p)
   196  // bytes, ReadAt blocks until either all the data is available or an
   197  // error occurs. In this respect ReadAt is different from Read.
   198  //
   199  // If the n = len(p) bytes returned by ReadAt are at the end of the
   200  // input source, ReadAt may return either err == EOF or err == nil.
   201  //
   202  // If ReadAt is reading from an input source with a seek offset,
   203  // ReadAt should not affect nor be affected by the underlying seek
   204  // offset.
   205  //
   206  // Clients of ReadAt can execute parallel ReadAt calls on the same
   207  // input source.
   208  //
   209  // Implementations must not retain p.
   210  func (fh *ReadFileHandle) ReadAt(p []byte, off int64) (n int, err error) {
   211  	fh.mu.Lock()
   212  	defer fh.mu.Unlock()
   213  	return fh.readAt(p, off)
   214  }
   215  
   216  // Implementation of ReadAt - call with lock held
   217  func (fh *ReadFileHandle) readAt(p []byte, off int64) (n int, err error) {
   218  	// defer log.Trace(fh.remote, "p[%d], off=%d", len(p), off)("n=%d, err=%v", &n, &err)
   219  	err = fh.openPending() // FIXME pending open could be more efficient in the presense of seek (and retries)
   220  	if err != nil {
   221  		return 0, err
   222  	}
   223  	// fs.Debugf(fh.remote, "ReadFileHandle.Read size %d offset %d", reqSize, off)
   224  	if fh.closed {
   225  		fs.Errorf(fh.remote, "ReadFileHandle.Read error: %v", EBADF)
   226  		return 0, ECLOSED
   227  	}
   228  	maxBuf := 1024 * 1024
   229  	if len(p) < maxBuf {
   230  		maxBuf = len(p)
   231  	}
   232  	if gap := off - fh.offset; gap > 0 && gap < int64(8*maxBuf) {
   233  		// Set a background timer so we don't wait for long
   234  		// Waits here potentially affect all seeks so need to keep them short
   235  		// This time here was made by finding the smallest when mounting a local backend
   236  		// that didn't cause seeks.
   237  		const maxWait = 5 * time.Millisecond
   238  		timeout := time.NewTimer(maxWait)
   239  		done := make(chan struct{})
   240  		abort := int32(0)
   241  		go func() {
   242  			select {
   243  			case <-timeout.C:
   244  				// set abort flag an give all the waiting goroutines a kick on timeout
   245  				atomic.StoreInt32(&abort, 1)
   246  				fs.Debugf(fh.remote, "aborting in-sequence read wait, off=%d", off)
   247  				fh.cond.Broadcast()
   248  			case <-done:
   249  			}
   250  		}()
   251  		for fh.offset != off && atomic.LoadInt32(&abort) == 0 {
   252  			fs.Debugf(fh.remote, "waiting for in-sequence read to %d for %v", off, maxWait)
   253  			fh.cond.Wait()
   254  		}
   255  		// tidy up end timer
   256  		close(done)
   257  		timeout.Stop()
   258  		if fh.offset != off {
   259  			fs.Debugf(fh.remote, "failed to wait for in-sequence read to %d", off)
   260  		}
   261  	}
   262  	doSeek := off != fh.offset
   263  	if doSeek && fh.noSeek {
   264  		return 0, ESPIPE
   265  	}
   266  	var newOffset int64
   267  	retries := 0
   268  	reqSize := len(p)
   269  	doReopen := false
   270  	for {
   271  		if doSeek {
   272  			// Are we attempting to seek beyond the end of the
   273  			// file - if so just return EOF leaving the underlying
   274  			// file in an unchanged state.
   275  			if off >= fh.size {
   276  				fs.Debugf(fh.remote, "ReadFileHandle.Read attempt to read beyond end of file: %d > %d", off, fh.size)
   277  				return 0, io.EOF
   278  			}
   279  			// Otherwise do the seek
   280  			err = fh.seek(off, doReopen)
   281  		} else {
   282  			err = nil
   283  		}
   284  		if err == nil {
   285  			if reqSize > 0 {
   286  				fh.readCalled = true
   287  			}
   288  			n, err = io.ReadFull(fh.r, p)
   289  			newOffset = fh.offset + int64(n)
   290  			// if err == nil && rand.Intn(10) == 0 {
   291  			// 	err = errors.New("random error")
   292  			// }
   293  			if err == nil {
   294  				break
   295  			} else if (err == io.ErrUnexpectedEOF || err == io.EOF) && (newOffset == fh.size || fh.sizeUnknown) {
   296  				if fh.sizeUnknown {
   297  					// size is now known since we have read to the end
   298  					fh.sizeUnknown = false
   299  					fh.size = newOffset
   300  				}
   301  				// Have read to end of file - reset error
   302  				err = nil
   303  				break
   304  			}
   305  		}
   306  		if retries >= fs.Config.LowLevelRetries {
   307  			break
   308  		}
   309  		retries++
   310  		fs.Errorf(fh.remote, "ReadFileHandle.Read error: low level retry %d/%d: %v", retries, fs.Config.LowLevelRetries, err)
   311  		doSeek = true
   312  		doReopen = true
   313  	}
   314  	if err != nil {
   315  		fs.Errorf(fh.remote, "ReadFileHandle.Read error: %v", err)
   316  	} else {
   317  		fh.offset = newOffset
   318  		// fs.Debugf(fh.remote, "ReadFileHandle.Read OK")
   319  
   320  		if fh.hash != nil {
   321  			_, err = fh.hash.Write(p[:n])
   322  			if err != nil {
   323  				fs.Errorf(fh.remote, "ReadFileHandle.Read HashError: %v", err)
   324  				return 0, err
   325  			}
   326  		}
   327  
   328  		// If we have no error and we didn't fill the buffer, must be EOF
   329  		if n != len(p) {
   330  			err = io.EOF
   331  		}
   332  	}
   333  	fh.cond.Broadcast() // wake everyone up waiting for an in-sequence read
   334  	return n, err
   335  }
   336  
   337  func (fh *ReadFileHandle) checkHash() error {
   338  	if fh.hash == nil || !fh.readCalled || fh.offset < fh.size {
   339  		return nil
   340  	}
   341  
   342  	o := fh.file.getObject()
   343  	for hashType, dstSum := range fh.hash.Sums() {
   344  		srcSum, err := o.Hash(context.TODO(), hashType)
   345  		if err != nil {
   346  			return err
   347  		}
   348  		if !hash.Equals(dstSum, srcSum) {
   349  			return errors.Errorf("corrupted on transfer: %v hash differ %q vs %q", hashType, dstSum, srcSum)
   350  		}
   351  	}
   352  
   353  	return nil
   354  }
   355  
   356  // Read reads up to len(p) bytes into p. It returns the number of bytes read (0
   357  // <= n <= len(p)) and any error encountered. Even if Read returns n < len(p),
   358  // it may use all of p as scratch space during the call. If some data is
   359  // available but not len(p) bytes, Read conventionally returns what is
   360  // available instead of waiting for more.
   361  //
   362  // When Read encounters an error or end-of-file condition after successfully
   363  // reading n > 0 bytes, it returns the number of bytes read. It may return the
   364  // (non-nil) error from the same call or return the error (and n == 0) from a
   365  // subsequent call. An instance of this general case is that a Reader returning
   366  // a non-zero number of bytes at the end of the input stream may return either
   367  // err == EOF or err == nil. The next Read should return 0, EOF.
   368  //
   369  // Callers should always process the n > 0 bytes returned before considering
   370  // the error err. Doing so correctly handles I/O errors that happen after
   371  // reading some bytes and also both of the allowed EOF behaviors.
   372  //
   373  // Implementations of Read are discouraged from returning a zero byte count
   374  // with a nil error, except when len(p) == 0. Callers should treat a return of
   375  // 0 and nil as indicating that nothing happened; in particular it does not
   376  // indicate EOF.
   377  //
   378  // Implementations must not retain p.
   379  func (fh *ReadFileHandle) Read(p []byte) (n int, err error) {
   380  	fh.mu.Lock()
   381  	defer fh.mu.Unlock()
   382  	if fh.roffset >= fh.size && !fh.sizeUnknown {
   383  		return 0, io.EOF
   384  	}
   385  	n, err = fh.readAt(p, fh.roffset)
   386  	fh.roffset += int64(n)
   387  	return n, err
   388  }
   389  
   390  // close the file handle returning EBADF if it has been
   391  // closed already.
   392  //
   393  // Must be called with fh.mu held
   394  func (fh *ReadFileHandle) close() error {
   395  	if fh.closed {
   396  		return ECLOSED
   397  	}
   398  	fh.closed = true
   399  
   400  	if fh.opened {
   401  		var err error
   402  		defer func() {
   403  			fh.done(err)
   404  		}()
   405  		// Close first so that we have hashes
   406  		err = fh.r.Close()
   407  		if err != nil {
   408  			return err
   409  		}
   410  		// Now check the hash
   411  		err = fh.checkHash()
   412  		if err != nil {
   413  			return err
   414  		}
   415  	}
   416  	return nil
   417  }
   418  
   419  // Close closes the file
   420  func (fh *ReadFileHandle) Close() error {
   421  	fh.mu.Lock()
   422  	defer fh.mu.Unlock()
   423  	return fh.close()
   424  }
   425  
   426  // Flush is called each time the file or directory is closed.
   427  // Because there can be multiple file descriptors referring to a
   428  // single opened file, Flush can be called multiple times.
   429  func (fh *ReadFileHandle) Flush() error {
   430  	fh.mu.Lock()
   431  	defer fh.mu.Unlock()
   432  	if !fh.opened {
   433  		return nil
   434  	}
   435  	// fs.Debugf(fh.remote, "ReadFileHandle.Flush")
   436  
   437  	if err := fh.checkHash(); err != nil {
   438  		fs.Errorf(fh.remote, "ReadFileHandle.Flush error: %v", err)
   439  		return err
   440  	}
   441  
   442  	// fs.Debugf(fh.remote, "ReadFileHandle.Flush OK")
   443  	return nil
   444  }
   445  
   446  // Release is called when we are finished with the file handle
   447  //
   448  // It isn't called directly from userspace so the error is ignored by
   449  // the kernel
   450  func (fh *ReadFileHandle) Release() error {
   451  	fh.mu.Lock()
   452  	defer fh.mu.Unlock()
   453  	if !fh.opened {
   454  		return nil
   455  	}
   456  	if fh.closed {
   457  		fs.Debugf(fh.remote, "ReadFileHandle.Release nothing to do")
   458  		return nil
   459  	}
   460  	fs.Debugf(fh.remote, "ReadFileHandle.Release closing")
   461  	err := fh.close()
   462  	if err != nil {
   463  		fs.Errorf(fh.remote, "ReadFileHandle.Release error: %v", err)
   464  	} else {
   465  		// fs.Debugf(fh.remote, "ReadFileHandle.Release OK")
   466  	}
   467  	return err
   468  }
   469  
   470  // Size returns the size of the underlying file
   471  func (fh *ReadFileHandle) Size() int64 {
   472  	fh.mu.Lock()
   473  	defer fh.mu.Unlock()
   474  	return fh.size
   475  }
   476  
   477  // Stat returns info about the file
   478  func (fh *ReadFileHandle) Stat() (os.FileInfo, error) {
   479  	fh.mu.Lock()
   480  	defer fh.mu.Unlock()
   481  	return fh.file, nil
   482  }