github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/vfs/read.go (about)

     1  package vfs
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"sync"
    10  	"sync/atomic"
    11  	"time"
    12  
    13  	"github.com/rclone/rclone/fs"
    14  	"github.com/rclone/rclone/fs/accounting"
    15  	"github.com/rclone/rclone/fs/chunkedreader"
    16  	"github.com/rclone/rclone/fs/hash"
    17  )
    18  
    19  // ReadFileHandle is an open for read file handle on a File
    20  type ReadFileHandle struct {
    21  	baseHandle
    22  	done        func(ctx context.Context, err error)
    23  	mu          sync.Mutex
    24  	cond        sync.Cond // cond lock for out of sequence reads
    25  	r           *accounting.Account
    26  	size        int64 // size of the object (0 for unknown length)
    27  	offset      int64 // offset of read of o
    28  	roffset     int64 // offset of Read() calls
    29  	file        *File
    30  	hash        *hash.MultiHasher
    31  	remote      string
    32  	closed      bool // set if handle has been closed
    33  	readCalled  bool // set if read has been called
    34  	noSeek      bool
    35  	sizeUnknown bool // set if size of source is not known
    36  	opened      bool
    37  }
    38  
    39  // Check interfaces
    40  var (
    41  	_ io.Reader   = (*ReadFileHandle)(nil)
    42  	_ io.ReaderAt = (*ReadFileHandle)(nil)
    43  	_ io.Seeker   = (*ReadFileHandle)(nil)
    44  	_ io.Closer   = (*ReadFileHandle)(nil)
    45  )
    46  
    47  func newReadFileHandle(f *File) (*ReadFileHandle, error) {
    48  	var mhash *hash.MultiHasher
    49  	var err error
    50  	o := f.getObject()
    51  	if !f.VFS().Opt.NoChecksum {
    52  		hashes := hash.NewHashSet(o.Fs().Hashes().GetOne()) // just pick one hash
    53  		mhash, err = hash.NewMultiHasherTypes(hashes)
    54  		if err != nil {
    55  			fs.Errorf(o.Fs(), "newReadFileHandle hash error: %v", err)
    56  		}
    57  	}
    58  
    59  	fh := &ReadFileHandle{
    60  		remote:      o.Remote(),
    61  		noSeek:      f.VFS().Opt.NoSeek,
    62  		file:        f,
    63  		hash:        mhash,
    64  		size:        nonNegative(o.Size()),
    65  		sizeUnknown: o.Size() < 0,
    66  	}
    67  	fh.cond = sync.Cond{L: &fh.mu}
    68  	return fh, nil
    69  }
    70  
    71  // openPending opens the file if there is a pending open
    72  // call with the lock held
    73  func (fh *ReadFileHandle) openPending() (err error) {
    74  	if fh.opened {
    75  		return nil
    76  	}
    77  	o := fh.file.getObject()
    78  	r, err := chunkedreader.New(context.TODO(), o, int64(fh.file.VFS().Opt.ChunkSize), int64(fh.file.VFS().Opt.ChunkSizeLimit)).Open()
    79  	if err != nil {
    80  		return err
    81  	}
    82  	tr := accounting.GlobalStats().NewTransfer(o, nil)
    83  	fh.done = tr.Done
    84  	fh.r = tr.Account(context.TODO(), r).WithBuffer() // account the transfer
    85  	fh.opened = true
    86  
    87  	return nil
    88  }
    89  
    90  // String converts it to printable
    91  func (fh *ReadFileHandle) String() string {
    92  	if fh == nil {
    93  		return "<nil *ReadFileHandle>"
    94  	}
    95  	fh.mu.Lock()
    96  	defer fh.mu.Unlock()
    97  	if fh.file == nil {
    98  		return "<nil *ReadFileHandle.file>"
    99  	}
   100  	return fh.file.String() + " (r)"
   101  }
   102  
   103  // Node returns the Node associated with this - satisfies Noder interface
   104  func (fh *ReadFileHandle) Node() Node {
   105  	fh.mu.Lock()
   106  	defer fh.mu.Unlock()
   107  	return fh.file
   108  }
   109  
   110  // seek to a new offset
   111  //
   112  // if reopen is true, then we won't attempt to use an io.Seeker interface
   113  //
   114  // Must be called with fh.mu held
   115  func (fh *ReadFileHandle) seek(offset int64, reopen bool) (err error) {
   116  	if fh.noSeek {
   117  		return ESPIPE
   118  	}
   119  	fh.hash = nil
   120  	if !reopen {
   121  		ar := fh.r.GetAsyncReader()
   122  		// try to fulfill the seek with buffer discard
   123  		if ar != nil && ar.SkipBytes(int(offset-fh.offset)) {
   124  			fh.offset = offset
   125  			return nil
   126  		}
   127  	}
   128  	fh.r.StopBuffering() // stop the background reading first
   129  	oldReader := fh.r.GetReader()
   130  	r, ok := oldReader.(*chunkedreader.ChunkedReader)
   131  	if !ok {
   132  		fs.Logf(fh.remote, "ReadFileHandle.Read expected reader to be a ChunkedReader, got %T", oldReader)
   133  		reopen = true
   134  	}
   135  	if !reopen {
   136  		fs.Debugf(fh.remote, "ReadFileHandle.seek from %d to %d (fs.RangeSeeker)", fh.offset, offset)
   137  		_, err = r.RangeSeek(context.TODO(), offset, io.SeekStart, -1)
   138  		if err != nil {
   139  			fs.Debugf(fh.remote, "ReadFileHandle.Read fs.RangeSeeker failed: %v", err)
   140  			return err
   141  		}
   142  	} else {
   143  		fs.Debugf(fh.remote, "ReadFileHandle.seek from %d to %d", fh.offset, offset)
   144  		// close old one
   145  		err = oldReader.Close()
   146  		if err != nil {
   147  			fs.Debugf(fh.remote, "ReadFileHandle.Read seek close old failed: %v", err)
   148  		}
   149  		// re-open with a seek
   150  		o := fh.file.getObject()
   151  		r = chunkedreader.New(context.TODO(), o, int64(fh.file.VFS().Opt.ChunkSize), int64(fh.file.VFS().Opt.ChunkSizeLimit))
   152  		_, err := r.Seek(offset, 0)
   153  		if err != nil {
   154  			fs.Debugf(fh.remote, "ReadFileHandle.Read seek failed: %v", err)
   155  			return err
   156  		}
   157  		r, err = r.Open()
   158  		if err != nil {
   159  			fs.Debugf(fh.remote, "ReadFileHandle.Read seek failed: %v", err)
   160  			return err
   161  		}
   162  	}
   163  	fh.r.UpdateReader(context.TODO(), r)
   164  	fh.offset = offset
   165  	return nil
   166  }
   167  
   168  // Seek the file - returns ESPIPE if seeking isn't possible
   169  func (fh *ReadFileHandle) Seek(offset int64, whence int) (n int64, err error) {
   170  	fh.mu.Lock()
   171  	defer fh.mu.Unlock()
   172  	if fh.noSeek {
   173  		return 0, ESPIPE
   174  	}
   175  	size := fh.size
   176  	switch whence {
   177  	case io.SeekStart:
   178  		fh.roffset = 0
   179  	case io.SeekEnd:
   180  		fh.roffset = size
   181  	}
   182  	fh.roffset += offset
   183  	// we don't check the offset - the next Read will
   184  	return fh.roffset, nil
   185  }
   186  
   187  // ReadAt reads len(p) bytes into p starting at offset off in the
   188  // underlying input source. It returns the number of bytes read (0 <=
   189  // n <= len(p)) and any error encountered.
   190  //
   191  // When ReadAt returns n < len(p), it returns a non-nil error
   192  // explaining why more bytes were not returned. In this respect,
   193  // ReadAt is stricter than Read.
   194  //
   195  // Even if ReadAt returns n < len(p), it may use all of p as scratch
   196  // space during the call. If some data is available but not len(p)
   197  // bytes, ReadAt blocks until either all the data is available or an
   198  // error occurs. In this respect ReadAt is different from Read.
   199  //
   200  // If the n = len(p) bytes returned by ReadAt are at the end of the
   201  // input source, ReadAt may return either err == EOF or err == nil.
   202  //
   203  // If ReadAt is reading from an input source with a seek offset,
   204  // ReadAt should not affect nor be affected by the underlying seek
   205  // offset.
   206  //
   207  // Clients of ReadAt can execute parallel ReadAt calls on the same
   208  // input source.
   209  //
   210  // Implementations must not retain p.
   211  func (fh *ReadFileHandle) ReadAt(p []byte, off int64) (n int, err error) {
   212  	fh.mu.Lock()
   213  	defer fh.mu.Unlock()
   214  	return fh.readAt(p, off)
   215  }
   216  
   217  // This waits for *poff to equal off or aborts after the timeout.
   218  //
   219  // Waits here potentially affect all seeks so need to keep them short.
   220  //
   221  // Call with fh.mu Locked
   222  func waitSequential(what string, remote string, cond *sync.Cond, maxWait time.Duration, poff *int64, off int64) {
   223  	var (
   224  		timeout = time.NewTimer(maxWait)
   225  		done    = make(chan struct{})
   226  		abort   atomic.Int32
   227  	)
   228  	go func() {
   229  		select {
   230  		case <-timeout.C:
   231  			// take the lock to make sure that cond.Wait() is called before
   232  			// cond.Broadcast. NB cond.L == mu
   233  			cond.L.Lock()
   234  			// set abort flag and give all the waiting goroutines a kick on timeout
   235  			abort.Store(1)
   236  			fs.Debugf(remote, "aborting in-sequence %s wait, off=%d", what, off)
   237  			cond.Broadcast()
   238  			cond.L.Unlock()
   239  		case <-done:
   240  		}
   241  	}()
   242  	for *poff != off && abort.Load() == 0 {
   243  		fs.Debugf(remote, "waiting for in-sequence %s to %d for %v", what, off, maxWait)
   244  		cond.Wait()
   245  	}
   246  	// tidy up end timer
   247  	close(done)
   248  	timeout.Stop()
   249  	if *poff != off {
   250  		fs.Debugf(remote, "failed to wait for in-sequence %s to %d", what, off)
   251  	}
   252  }
   253  
   254  // Implementation of ReadAt - call with lock held
   255  func (fh *ReadFileHandle) readAt(p []byte, off int64) (n int, err error) {
   256  	// defer log.Trace(fh.remote, "p[%d], off=%d", len(p), off)("n=%d, err=%v", &n, &err)
   257  	err = fh.openPending() // FIXME pending open could be more efficient in the presence of seek (and retries)
   258  	if err != nil {
   259  		return 0, err
   260  	}
   261  	// fs.Debugf(fh.remote, "ReadFileHandle.Read size %d offset %d", reqSize, off)
   262  	if fh.closed {
   263  		fs.Errorf(fh.remote, "ReadFileHandle.Read error: %v", EBADF)
   264  		return 0, ECLOSED
   265  	}
   266  	maxBuf := 1024 * 1024
   267  	if len(p) < maxBuf {
   268  		maxBuf = len(p)
   269  	}
   270  	if gap := off - fh.offset; gap > 0 && gap < int64(8*maxBuf) {
   271  		waitSequential("read", fh.remote, &fh.cond, fh.file.VFS().Opt.ReadWait, &fh.offset, off)
   272  	}
   273  	doSeek := off != fh.offset
   274  	if doSeek && fh.noSeek {
   275  		return 0, ESPIPE
   276  	}
   277  	var newOffset int64
   278  	retries := 0
   279  	reqSize := len(p)
   280  	doReopen := false
   281  	lowLevelRetries := fs.GetConfig(context.TODO()).LowLevelRetries
   282  	for {
   283  		if doSeek {
   284  			// Are we attempting to seek beyond the end of the
   285  			// file - if so just return EOF leaving the underlying
   286  			// file in an unchanged state.
   287  			if off >= fh.size {
   288  				fs.Debugf(fh.remote, "ReadFileHandle.Read attempt to read beyond end of file: %d > %d", off, fh.size)
   289  				return 0, io.EOF
   290  			}
   291  			// Otherwise do the seek
   292  			err = fh.seek(off, doReopen)
   293  		} else {
   294  			err = nil
   295  		}
   296  		if err == nil {
   297  			if reqSize > 0 {
   298  				fh.readCalled = true
   299  			}
   300  			n, err = io.ReadFull(fh.r, p)
   301  			newOffset = fh.offset + int64(n)
   302  			// if err == nil && rand.Intn(10) == 0 {
   303  			// 	err = errors.New("random error")
   304  			// }
   305  			if err == nil {
   306  				break
   307  			} else if (err == io.ErrUnexpectedEOF || err == io.EOF) && (newOffset == fh.size || fh.sizeUnknown) {
   308  				if fh.sizeUnknown {
   309  					// size is now known since we have read to the end
   310  					fh.sizeUnknown = false
   311  					fh.size = newOffset
   312  				}
   313  				// Have read to end of file - reset error
   314  				err = nil
   315  				break
   316  			}
   317  		}
   318  		if retries >= lowLevelRetries {
   319  			break
   320  		}
   321  		retries++
   322  		fs.Errorf(fh.remote, "ReadFileHandle.Read error: low level retry %d/%d: %v", retries, lowLevelRetries, err)
   323  		doSeek = true
   324  		doReopen = true
   325  	}
   326  	if err != nil {
   327  		fs.Errorf(fh.remote, "ReadFileHandle.Read error: %v", err)
   328  	} else {
   329  		fh.offset = newOffset
   330  		// fs.Debugf(fh.remote, "ReadFileHandle.Read OK")
   331  
   332  		if fh.hash != nil {
   333  			_, err = fh.hash.Write(p[:n])
   334  			if err != nil {
   335  				fs.Errorf(fh.remote, "ReadFileHandle.Read HashError: %v", err)
   336  				return 0, err
   337  			}
   338  		}
   339  
   340  		// If we have no error and we didn't fill the buffer, must be EOF
   341  		if n != len(p) {
   342  			err = io.EOF
   343  		}
   344  	}
   345  	fh.cond.Broadcast() // wake everyone up waiting for an in-sequence read
   346  	return n, err
   347  }
   348  
   349  func (fh *ReadFileHandle) checkHash() error {
   350  	if fh.hash == nil || !fh.readCalled || fh.offset < fh.size {
   351  		return nil
   352  	}
   353  
   354  	o := fh.file.getObject()
   355  	for hashType, dstSum := range fh.hash.Sums() {
   356  		srcSum, err := o.Hash(context.TODO(), hashType)
   357  		if err != nil {
   358  			if errors.Is(err, os.ErrNotExist) {
   359  				// if it was file not found then at
   360  				// this point we don't care any more
   361  				continue
   362  			}
   363  			return err
   364  		}
   365  		if !hash.Equals(dstSum, srcSum) {
   366  			return fmt.Errorf("corrupted on transfer: %v hashes differ src %q vs dst %q", hashType, srcSum, dstSum)
   367  		}
   368  	}
   369  
   370  	return nil
   371  }
   372  
   373  // Read reads up to len(p) bytes into p. It returns the number of bytes read (0
   374  // <= n <= len(p)) and any error encountered. Even if Read returns n < len(p),
   375  // it may use all of p as scratch space during the call. If some data is
   376  // available but not len(p) bytes, Read conventionally returns what is
   377  // available instead of waiting for more.
   378  //
   379  // When Read encounters an error or end-of-file condition after successfully
   380  // reading n > 0 bytes, it returns the number of bytes read. It may return the
   381  // (non-nil) error from the same call or return the error (and n == 0) from a
   382  // subsequent call. An instance of this general case is that a Reader returning
   383  // a non-zero number of bytes at the end of the input stream may return either
   384  // err == EOF or err == nil. The next Read should return 0, EOF.
   385  //
   386  // Callers should always process the n > 0 bytes returned before considering
   387  // the error err. Doing so correctly handles I/O errors that happen after
   388  // reading some bytes and also both of the allowed EOF behaviors.
   389  //
   390  // Implementations of Read are discouraged from returning a zero byte count
   391  // with a nil error, except when len(p) == 0. Callers should treat a return of
   392  // 0 and nil as indicating that nothing happened; in particular it does not
   393  // indicate EOF.
   394  //
   395  // Implementations must not retain p.
   396  func (fh *ReadFileHandle) Read(p []byte) (n int, err error) {
   397  	fh.mu.Lock()
   398  	defer fh.mu.Unlock()
   399  	if fh.roffset >= fh.size && !fh.sizeUnknown {
   400  		return 0, io.EOF
   401  	}
   402  	n, err = fh.readAt(p, fh.roffset)
   403  	fh.roffset += int64(n)
   404  	return n, err
   405  }
   406  
   407  // close the file handle returning EBADF if it has been
   408  // closed already.
   409  //
   410  // Must be called with fh.mu held
   411  func (fh *ReadFileHandle) close() error {
   412  	if fh.closed {
   413  		return ECLOSED
   414  	}
   415  	fh.closed = true
   416  
   417  	if fh.opened {
   418  		var err error
   419  		defer func() {
   420  			fh.done(context.TODO(), err)
   421  		}()
   422  		// Close first so that we have hashes
   423  		err = fh.r.Close()
   424  		if err != nil {
   425  			return err
   426  		}
   427  		// Now check the hash
   428  		err = fh.checkHash()
   429  		if err != nil {
   430  			return err
   431  		}
   432  	}
   433  	return nil
   434  }
   435  
   436  // Close closes the file
   437  func (fh *ReadFileHandle) Close() error {
   438  	fh.mu.Lock()
   439  	defer fh.mu.Unlock()
   440  	return fh.close()
   441  }
   442  
   443  // Flush is called each time the file or directory is closed.
   444  // Because there can be multiple file descriptors referring to a
   445  // single opened file, Flush can be called multiple times.
   446  func (fh *ReadFileHandle) Flush() error {
   447  	fh.mu.Lock()
   448  	defer fh.mu.Unlock()
   449  	if !fh.opened {
   450  		return nil
   451  	}
   452  	// fs.Debugf(fh.remote, "ReadFileHandle.Flush")
   453  
   454  	if err := fh.checkHash(); err != nil {
   455  		fs.Errorf(fh.remote, "ReadFileHandle.Flush error: %v", err)
   456  		return err
   457  	}
   458  
   459  	// fs.Debugf(fh.remote, "ReadFileHandle.Flush OK")
   460  	return nil
   461  }
   462  
   463  // Release is called when we are finished with the file handle
   464  //
   465  // It isn't called directly from userspace so the error is ignored by
   466  // the kernel
   467  func (fh *ReadFileHandle) Release() error {
   468  	fh.mu.Lock()
   469  	defer fh.mu.Unlock()
   470  	if !fh.opened {
   471  		return nil
   472  	}
   473  	if fh.closed {
   474  		fs.Debugf(fh.remote, "ReadFileHandle.Release nothing to do")
   475  		return nil
   476  	}
   477  	fs.Debugf(fh.remote, "ReadFileHandle.Release closing")
   478  	err := fh.close()
   479  	if err != nil {
   480  		fs.Errorf(fh.remote, "ReadFileHandle.Release error: %v", err)
   481  		//} else {
   482  		// fs.Debugf(fh.remote, "ReadFileHandle.Release OK")
   483  	}
   484  	return err
   485  }
   486  
   487  // Name returns the name of the file from the underlying Object.
   488  func (fh *ReadFileHandle) Name() string {
   489  	return fh.file.String()
   490  }
   491  
   492  // Size returns the size of the underlying file
   493  func (fh *ReadFileHandle) Size() int64 {
   494  	fh.mu.Lock()
   495  	defer fh.mu.Unlock()
   496  	return fh.size
   497  }
   498  
   499  // Stat returns info about the file
   500  func (fh *ReadFileHandle) Stat() (os.FileInfo, error) {
   501  	fh.mu.Lock()
   502  	defer fh.mu.Unlock()
   503  	return fh.file, nil
   504  }