github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/fs/chunkedreader/chunkedreader.go (about)

     1  // Package chunkedreader provides functionality for reading in chunks.
     2  package chunkedreader
     3  
     4  import (
     5  	"context"
     6  	"errors"
     7  	"io"
     8  	"sync"
     9  
    10  	"github.com/rclone/rclone/fs"
    11  	"github.com/rclone/rclone/fs/hash"
    12  )
    13  
    14  // io related errors returned by ChunkedReader
    15  var (
    16  	ErrorFileClosed  = errors.New("file already closed")
    17  	ErrorInvalidSeek = errors.New("invalid seek position")
    18  )
    19  
    20  // ChunkedReader is a reader for an Object with the possibility
    21  // of reading the source in chunks of given size
    22  //
    23  // An initialChunkSize of <= 0 will disable chunked reading.
    24  type ChunkedReader struct {
    25  	ctx              context.Context
    26  	mu               sync.Mutex    // protects following fields
    27  	o                fs.Object     // source to read from
    28  	rc               io.ReadCloser // reader for the current open chunk
    29  	offset           int64         // offset the next Read will start. -1 forces a reopen of o
    30  	chunkOffset      int64         // beginning of the current or next chunk
    31  	chunkSize        int64         // length of the current or next chunk. -1 will open o from chunkOffset to the end
    32  	initialChunkSize int64         // default chunkSize after the chunk specified by RangeSeek is complete
    33  	maxChunkSize     int64         // consecutive read chunks will double in size until reached. -1 means no limit
    34  	customChunkSize  bool          // is the current chunkSize set by RangeSeek?
    35  	closed           bool          // has Close been called?
    36  }
    37  
    38  // New returns a ChunkedReader for the Object.
    39  //
    40  // An initialChunkSize of <= 0 will disable chunked reading.
    41  // If maxChunkSize is greater than initialChunkSize, the chunk size will be
    42  // doubled after each chunk read with a maximum of maxChunkSize.
    43  // A Seek or RangeSeek will reset the chunk size to it's initial value
    44  func New(ctx context.Context, o fs.Object, initialChunkSize int64, maxChunkSize int64) *ChunkedReader {
    45  	if initialChunkSize <= 0 {
    46  		initialChunkSize = -1
    47  	}
    48  	if maxChunkSize != -1 && maxChunkSize < initialChunkSize {
    49  		maxChunkSize = initialChunkSize
    50  	}
    51  	return &ChunkedReader{
    52  		ctx:              ctx,
    53  		o:                o,
    54  		offset:           -1,
    55  		chunkSize:        initialChunkSize,
    56  		initialChunkSize: initialChunkSize,
    57  		maxChunkSize:     maxChunkSize,
    58  	}
    59  }
    60  
    61  // Read from the file - for details see io.Reader
    62  func (cr *ChunkedReader) Read(p []byte) (n int, err error) {
    63  	cr.mu.Lock()
    64  	defer cr.mu.Unlock()
    65  
    66  	if cr.closed {
    67  		return 0, ErrorFileClosed
    68  	}
    69  
    70  	for reqSize := int64(len(p)); reqSize > 0; reqSize = int64(len(p)) {
    71  		// the current chunk boundary. valid only when chunkSize > 0
    72  		chunkEnd := cr.chunkOffset + cr.chunkSize
    73  
    74  		fs.Debugf(cr.o, "ChunkedReader.Read at %d length %d chunkOffset %d chunkSize %d", cr.offset, reqSize, cr.chunkOffset, cr.chunkSize)
    75  
    76  		switch {
    77  		case cr.chunkSize > 0 && cr.offset == chunkEnd: // last chunk read completely
    78  			cr.chunkOffset = cr.offset
    79  			if cr.customChunkSize { // last chunkSize was set by RangeSeek
    80  				cr.customChunkSize = false
    81  				cr.chunkSize = cr.initialChunkSize
    82  			} else {
    83  				cr.chunkSize *= 2
    84  				if cr.chunkSize > cr.maxChunkSize && cr.maxChunkSize != -1 {
    85  					cr.chunkSize = cr.maxChunkSize
    86  				}
    87  			}
    88  			// recalculate the chunk boundary. valid only when chunkSize > 0
    89  			chunkEnd = cr.chunkOffset + cr.chunkSize
    90  			fallthrough
    91  		case cr.offset == -1: // first Read or Read after RangeSeek
    92  			err = cr.openRange()
    93  			if err != nil {
    94  				return
    95  			}
    96  		}
    97  
    98  		var buf []byte
    99  		chunkRest := chunkEnd - cr.offset
   100  		// limit read to chunk boundaries if chunkSize > 0
   101  		if reqSize > chunkRest && cr.chunkSize > 0 {
   102  			buf, p = p[0:chunkRest], p[chunkRest:]
   103  		} else {
   104  			buf, p = p, nil
   105  		}
   106  		var rn int
   107  		rn, err = io.ReadFull(cr.rc, buf)
   108  		n += rn
   109  		cr.offset += int64(rn)
   110  		if err != nil {
   111  			if err == io.ErrUnexpectedEOF {
   112  				err = io.EOF
   113  			}
   114  			return
   115  		}
   116  	}
   117  	return n, nil
   118  }
   119  
   120  // Close the file - for details see io.Closer
   121  //
   122  // All methods on ChunkedReader will return ErrorFileClosed afterwards
   123  func (cr *ChunkedReader) Close() error {
   124  	cr.mu.Lock()
   125  	defer cr.mu.Unlock()
   126  
   127  	if cr.closed {
   128  		return ErrorFileClosed
   129  	}
   130  	cr.closed = true
   131  
   132  	return cr.resetReader(nil, 0)
   133  }
   134  
   135  // Seek the file - for details see io.Seeker
   136  func (cr *ChunkedReader) Seek(offset int64, whence int) (int64, error) {
   137  	return cr.RangeSeek(context.TODO(), offset, whence, -1)
   138  }
   139  
   140  // RangeSeek the file - for details see RangeSeeker
   141  //
   142  // The specified length will only apply to the next chunk opened.
   143  // RangeSeek will not reopen the source until Read is called.
   144  func (cr *ChunkedReader) RangeSeek(ctx context.Context, offset int64, whence int, length int64) (int64, error) {
   145  	cr.mu.Lock()
   146  	defer cr.mu.Unlock()
   147  
   148  	fs.Debugf(cr.o, "ChunkedReader.RangeSeek from %d to %d length %d", cr.offset, offset, length)
   149  
   150  	if cr.closed {
   151  		return 0, ErrorFileClosed
   152  	}
   153  
   154  	size := cr.o.Size()
   155  	switch whence {
   156  	case io.SeekStart:
   157  		cr.offset = 0
   158  	case io.SeekEnd:
   159  		cr.offset = size
   160  	}
   161  	// set the new chunk start
   162  	cr.chunkOffset = cr.offset + offset
   163  	// force reopen on next Read
   164  	cr.offset = -1
   165  	if length > 0 {
   166  		cr.customChunkSize = true
   167  		cr.chunkSize = length
   168  	} else {
   169  		cr.chunkSize = cr.initialChunkSize
   170  	}
   171  	if cr.chunkOffset < 0 || cr.chunkOffset >= size {
   172  		cr.chunkOffset = 0
   173  		return 0, ErrorInvalidSeek
   174  	}
   175  	return cr.chunkOffset, nil
   176  }
   177  
   178  // Open forces the connection to be opened
   179  func (cr *ChunkedReader) Open() (*ChunkedReader, error) {
   180  	cr.mu.Lock()
   181  	defer cr.mu.Unlock()
   182  
   183  	if cr.rc != nil && cr.offset != -1 {
   184  		return cr, nil
   185  	}
   186  	return cr, cr.openRange()
   187  }
   188  
   189  // openRange will open the source Object with the current chunk range
   190  //
   191  // If the current open reader implements RangeSeeker, it is tried first.
   192  // When RangeSeek fails, o.Open with a RangeOption is used.
   193  //
   194  // A length <= 0 will request till the end of the file
   195  func (cr *ChunkedReader) openRange() error {
   196  	offset, length := cr.chunkOffset, cr.chunkSize
   197  	fs.Debugf(cr.o, "ChunkedReader.openRange at %d length %d", offset, length)
   198  
   199  	if cr.closed {
   200  		return ErrorFileClosed
   201  	}
   202  
   203  	if rs, ok := cr.rc.(fs.RangeSeeker); ok {
   204  		n, err := rs.RangeSeek(cr.ctx, offset, io.SeekStart, length)
   205  		if err == nil && n == offset {
   206  			cr.offset = offset
   207  			return nil
   208  		}
   209  		if err != nil {
   210  			fs.Debugf(cr.o, "ChunkedReader.openRange seek failed (%s). Trying Open", err)
   211  		} else {
   212  			fs.Debugf(cr.o, "ChunkedReader.openRange seeked to wrong offset. Wanted %d, got %d. Trying Open", offset, n)
   213  		}
   214  	}
   215  
   216  	var rc io.ReadCloser
   217  	var err error
   218  	if length <= 0 {
   219  		if offset == 0 {
   220  			rc, err = cr.o.Open(cr.ctx, &fs.HashesOption{Hashes: hash.Set(hash.None)})
   221  		} else {
   222  			rc, err = cr.o.Open(cr.ctx, &fs.HashesOption{Hashes: hash.Set(hash.None)}, &fs.RangeOption{Start: offset, End: -1})
   223  		}
   224  	} else {
   225  		rc, err = cr.o.Open(cr.ctx, &fs.HashesOption{Hashes: hash.Set(hash.None)}, &fs.RangeOption{Start: offset, End: offset + length - 1})
   226  	}
   227  	if err != nil {
   228  		return err
   229  	}
   230  	return cr.resetReader(rc, offset)
   231  }
   232  
   233  // resetReader switches the current reader to the given reader.
   234  // The old reader will be Close'd before setting the new reader.
   235  func (cr *ChunkedReader) resetReader(rc io.ReadCloser, offset int64) error {
   236  	if cr.rc != nil {
   237  		if err := cr.rc.Close(); err != nil {
   238  			return err
   239  		}
   240  	}
   241  	cr.rc = rc
   242  	cr.offset = offset
   243  	return nil
   244  }
   245  
   246  var (
   247  	_ io.ReadCloser  = (*ChunkedReader)(nil)
   248  	_ io.Seeker      = (*ChunkedReader)(nil)
   249  	_ fs.RangeSeeker = (*ChunkedReader)(nil)
   250  )