github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/fs/operations/reopen.go (about)

     1  package operations
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"io"
     7  	"sync"
     8  
     9  	"github.com/rclone/rclone/fs"
    10  	"github.com/rclone/rclone/fs/fserrors"
    11  )
    12  
    13  // AccountFn is a function which will be called after every read
    14  // from the ReOpen.
    15  //
    16  // It may return an error which will be passed back to the user.
    17  type AccountFn func(n int) error
    18  
    19  // ReOpen is a wrapper for an object reader which reopens the stream on error
    20  type ReOpen struct {
    21  	ctx         context.Context
    22  	mu          sync.Mutex      // mutex to protect the below
    23  	src         fs.Object       // object to open
    24  	baseOptions []fs.OpenOption // options to pass to initial open and where offset == 0
    25  	options     []fs.OpenOption // option to pass on subsequent opens where offset != 0
    26  	rangeOption fs.RangeOption  // adjust this range option on re-opens
    27  	rc          io.ReadCloser   // underlying stream
    28  	size        int64           // total size of object - can be -ve
    29  	start       int64           // absolute position to start reading from
    30  	end         int64           // absolute position to end reading (exclusive)
    31  	offset      int64           // offset in the file we are at, offset from start
    32  	newOffset   int64           // if different to offset, reopen needed
    33  	maxTries    int             // maximum number of retries
    34  	tries       int             // number of retries we've had so far in this stream
    35  	err         error           // if this is set then Read/Close calls will return it
    36  	opened      bool            // if set then rc is valid and needs closing
    37  	account     AccountFn       // account for a read
    38  	reads       int             // count how many times the data has been read
    39  	accountOn   int             // only account on or after this read
    40  }
    41  
    42  var (
    43  	errFileClosed    = errors.New("file already closed")
    44  	errTooManyTries  = errors.New("failed to reopen: too many retries")
    45  	errInvalidWhence = errors.New("reopen Seek: invalid whence")
    46  	errNegativeSeek  = errors.New("reopen Seek: negative position")
    47  	errSeekPastEnd   = errors.New("reopen Seek: attempt to seek past end of data")
    48  	errBadEndSeek    = errors.New("reopen Seek: can't seek from end with unknown sized object")
    49  )
    50  
    51  // NewReOpen makes a handle which will reopen itself and seek to where
    52  // it was on errors up to maxTries times.
    53  //
    54  // If an fs.HashesOption is set this will be applied when reading from
    55  // the start.
    56  //
    57  // If an fs.RangeOption is set then this will applied when reading from
    58  // the start, and updated on retries.
    59  func NewReOpen(ctx context.Context, src fs.Object, maxTries int, options ...fs.OpenOption) (rc *ReOpen, err error) {
    60  	h := &ReOpen{
    61  		ctx:         ctx,
    62  		src:         src,
    63  		maxTries:    maxTries,
    64  		baseOptions: options,
    65  		size:        src.Size(),
    66  		start:       0,
    67  		offset:      0,
    68  		newOffset:   -1, // -1 means no seek required
    69  	}
    70  	h.mu.Lock()
    71  	defer h.mu.Unlock()
    72  
    73  	// Filter the options for subsequent opens
    74  	h.options = make([]fs.OpenOption, 0, len(options)+1)
    75  	var limit int64 = -1
    76  	for _, option := range options {
    77  		switch x := option.(type) {
    78  		case *fs.HashesOption:
    79  			// leave hash option out when ranging
    80  		case *fs.RangeOption:
    81  			h.start, limit = x.Decode(h.end)
    82  		case *fs.SeekOption:
    83  			h.start, limit = x.Offset, -1
    84  		default:
    85  			h.options = append(h.options, option)
    86  		}
    87  	}
    88  
    89  	// Put our RangeOption on the end
    90  	h.rangeOption.Start = h.start
    91  	h.options = append(h.options, &h.rangeOption)
    92  
    93  	// If a size range is set then set the end point of the file to that
    94  	if limit >= 0 && h.size >= 0 {
    95  		h.end = h.start + limit
    96  		h.rangeOption.End = h.end - 1 // remember range options are inclusive
    97  	} else {
    98  		h.end = h.size
    99  		h.rangeOption.End = -1
   100  	}
   101  
   102  	err = h.open()
   103  	if err != nil {
   104  		return nil, err
   105  	}
   106  	return h, nil
   107  }
   108  
   109  // Open makes a handle which will reopen itself and seek to where it
   110  // was on errors.
   111  //
   112  // If an fs.HashesOption is set this will be applied when reading from
   113  // the start.
   114  //
   115  // If an fs.RangeOption is set then this will applied when reading from
   116  // the start, and updated on retries.
   117  //
   118  // It will obey LowLevelRetries in the ctx as the maximum number of
   119  // tries.
   120  //
   121  // Use this instead of calling the Open method on fs.Objects
   122  func Open(ctx context.Context, src fs.Object, options ...fs.OpenOption) (rc *ReOpen, err error) {
   123  	maxTries := fs.GetConfig(ctx).LowLevelRetries
   124  	return NewReOpen(ctx, src, maxTries, options...)
   125  }
   126  
   127  // open the underlying handle - call with lock held
   128  //
   129  // we don't retry here as the Open() call will itself have low level retries
   130  func (h *ReOpen) open() error {
   131  	var opts []fs.OpenOption
   132  	if h.offset == 0 {
   133  		// if reading from the start using the initial options
   134  		opts = h.baseOptions
   135  	} else {
   136  		// otherwise use the filtered options
   137  		opts = h.options
   138  		// Adjust range start to where we have got to
   139  		h.rangeOption.Start = h.start + h.offset
   140  	}
   141  	// Make a copy of the options as fs.FixRangeOption modifies them :-(
   142  	opts = append(make([]fs.OpenOption, 0, len(opts)), opts...)
   143  	h.tries++
   144  	if h.tries > h.maxTries {
   145  		h.err = errTooManyTries
   146  	} else {
   147  		h.rc, h.err = h.src.Open(h.ctx, opts...)
   148  	}
   149  	if h.err != nil {
   150  		if h.tries > 1 {
   151  			fs.Debugf(h.src, "Reopen failed after offset %d bytes read: %v", h.offset, h.err)
   152  		}
   153  		return h.err
   154  	}
   155  	h.opened = true
   156  	return nil
   157  }
   158  
   159  // reopen the underlying handle by closing it and reopening it.
   160  func (h *ReOpen) reopen() (err error) {
   161  	// close underlying stream if needed
   162  	if h.opened {
   163  		h.opened = false
   164  		_ = h.rc.Close()
   165  	}
   166  	return h.open()
   167  }
   168  
   169  // account for n bytes being read
   170  func (h *ReOpen) accountRead(n int) error {
   171  	if h.account == nil {
   172  		return nil
   173  	}
   174  	// Don't start accounting until we've reached this many reads
   175  	//
   176  	// rw.reads will be 1 the first time this is called
   177  	// rw.accountOn 2 means start accounting on the 2nd read through
   178  	if h.reads >= h.accountOn {
   179  		return h.account(n)
   180  	}
   181  	return nil
   182  }
   183  
   184  // Read bytes retrying as necessary
   185  func (h *ReOpen) Read(p []byte) (n int, err error) {
   186  	h.mu.Lock()
   187  	defer h.mu.Unlock()
   188  	if h.err != nil {
   189  		// return a previous error if there is one
   190  		return n, h.err
   191  	}
   192  
   193  	// re-open if seek needed
   194  	if h.newOffset >= 0 {
   195  		if h.offset != h.newOffset {
   196  			fs.Debugf(h.src, "Seek from %d to %d", h.offset, h.newOffset)
   197  			h.offset = h.newOffset
   198  			err = h.reopen()
   199  			if err != nil {
   200  				return 0, err
   201  			}
   202  		}
   203  		h.newOffset = -1
   204  	}
   205  
   206  	// Read a full buffer
   207  	startOffset := h.offset
   208  	var nn int
   209  	for n < len(p) && err == nil {
   210  		nn, err = h.rc.Read(p[n:])
   211  		n += nn
   212  		h.offset += int64(nn)
   213  		if err != nil && err != io.EOF {
   214  			h.err = err
   215  			if !fserrors.IsNoLowLevelRetryError(err) {
   216  				fs.Debugf(h.src, "Reopening on read failure after offset %d bytes: retry %d/%d: %v", h.offset, h.tries, h.maxTries, err)
   217  				if h.reopen() == nil {
   218  					err = nil
   219  				}
   220  			}
   221  		}
   222  	}
   223  	// Count a read of the data if we read from the start successfully
   224  	if startOffset == 0 && n != 0 {
   225  		h.reads++
   226  	}
   227  	// Account the read
   228  	accErr := h.accountRead(n)
   229  	if err == nil {
   230  		err = accErr
   231  	}
   232  	return n, err
   233  }
   234  
   235  // Seek sets the offset for the next Read or Write to offset, interpreted
   236  // according to whence: SeekStart means relative to the start of the file,
   237  // SeekCurrent means relative to the current offset, and SeekEnd means relative
   238  // to the end (for example, offset = -2 specifies the penultimate byte of the
   239  // file). Seek returns the new offset relative to the start of the file or an
   240  // error, if any.
   241  //
   242  // Seeking to an offset before the start of the file is an error. Seeking
   243  // to any positive offset may be allowed, but if the new offset exceeds the
   244  // size of the underlying object the behavior of subsequent I/O operations is
   245  // implementation-dependent.
   246  func (h *ReOpen) Seek(offset int64, whence int) (int64, error) {
   247  	h.mu.Lock()
   248  	defer h.mu.Unlock()
   249  	if h.err != nil {
   250  		// return a previous error if there is one
   251  		return 0, h.err
   252  	}
   253  	var abs int64
   254  	var size = h.end - h.start
   255  	switch whence {
   256  	case io.SeekStart:
   257  		abs = offset
   258  	case io.SeekCurrent:
   259  		if h.newOffset >= 0 {
   260  			abs = h.newOffset + offset
   261  		} else {
   262  			abs = h.offset + offset
   263  		}
   264  	case io.SeekEnd:
   265  		if h.size < 0 {
   266  			return 0, errBadEndSeek
   267  		}
   268  		abs = size + offset
   269  	default:
   270  		return 0, errInvalidWhence
   271  	}
   272  	if abs < 0 {
   273  		return 0, errNegativeSeek
   274  	}
   275  	if h.size >= 0 && abs > size {
   276  		return size, errSeekPastEnd
   277  	}
   278  
   279  	h.tries = 0       // Reset open count on seek
   280  	h.newOffset = abs // New offset - applied in Read
   281  	return abs, nil
   282  }
   283  
   284  // Close the stream
   285  func (h *ReOpen) Close() error {
   286  	h.mu.Lock()
   287  	defer h.mu.Unlock()
   288  	if !h.opened {
   289  		return errFileClosed
   290  	}
   291  	h.opened = false
   292  	h.err = errFileClosed
   293  	return h.rc.Close()
   294  }
   295  
   296  // SetAccounting should be provided with a function which will be
   297  // called after every read from the RW.
   298  //
   299  // It may return an error which will be passed back to the user.
   300  func (h *ReOpen) SetAccounting(account AccountFn) *ReOpen {
   301  	h.account = account
   302  	return h
   303  }
   304  
   305  // DelayAccounting makes sure the accounting function only gets called
   306  // on the i-th or later read of the data from this point (counting
   307  // from 1).
   308  //
   309  // This is useful so that we don't account initial reads of the data
   310  // e.g. when calculating hashes.
   311  //
   312  // Set this to 0 to account everything.
   313  func (h *ReOpen) DelayAccounting(i int) {
   314  	h.accountOn = i
   315  	h.reads = 0
   316  }