github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/fs/asyncreader/asyncreader.go (about)

     1  // Package asyncreader provides an asynchronous reader which reads
     2  // independently of write
     3  package asyncreader
     4  
     5  import (
     6  	"context"
     7  	"errors"
     8  	"io"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/rclone/rclone/fs"
    13  	"github.com/rclone/rclone/lib/pool"
    14  	"github.com/rclone/rclone/lib/readers"
    15  )
    16  
    17  const (
    18  	// BufferSize is the default size of the async buffer
    19  	BufferSize           = 1024 * 1024
    20  	softStartInitial     = 4 * 1024
    21  	bufferCacheSize      = 64              // max number of buffers to keep in cache
    22  	bufferCacheFlushTime = 5 * time.Second // flush the cached buffers after this long
    23  )
    24  
    25  // ErrorStreamAbandoned is returned when the input is closed before the end of the stream
    26  var ErrorStreamAbandoned = errors.New("stream abandoned")
    27  
    28  // AsyncReader will do async read-ahead from the input reader
    29  // and make the data available as an io.Reader.
    30  // This should be fully transparent, except that once an error
    31  // has been returned from the Reader, it will not recover.
    32  type AsyncReader struct {
    33  	in      io.ReadCloser  // Input reader
    34  	ready   chan *buffer   // Buffers ready to be handed to the reader
    35  	token   chan struct{}  // Tokens which allow a buffer to be taken
    36  	exit    chan struct{}  // Closes when finished
    37  	buffers int            // Number of buffers
    38  	err     error          // If an error has occurred it is here
    39  	cur     *buffer        // Current buffer being served
    40  	exited  chan struct{}  // Channel is closed been the async reader shuts down
    41  	size    int            // size of buffer to use
    42  	closed  bool           // whether we have closed the underlying stream
    43  	mu      sync.Mutex     // lock for Read/WriteTo/Abandon/Close
    44  	ci      *fs.ConfigInfo // for reading config
    45  }
    46  
    47  // New returns a reader that will asynchronously read from
    48  // the supplied Reader into a number of buffers each of size BufferSize
    49  // It will start reading from the input at once, maybe even before this
    50  // function has returned.
    51  // The input can be read from the returned reader.
    52  // When done use Close to release the buffers and close the supplied input.
    53  func New(ctx context.Context, rd io.ReadCloser, buffers int) (*AsyncReader, error) {
    54  	if buffers <= 0 {
    55  		return nil, errors.New("number of buffers too small")
    56  	}
    57  	if rd == nil {
    58  		return nil, errors.New("nil reader supplied")
    59  	}
    60  	a := &AsyncReader{
    61  		ci: fs.GetConfig(ctx),
    62  	}
    63  	a.init(rd, buffers)
    64  	return a, nil
    65  }
    66  
    67  func (a *AsyncReader) init(rd io.ReadCloser, buffers int) {
    68  	a.in = rd
    69  	a.ready = make(chan *buffer, buffers)
    70  	a.token = make(chan struct{}, buffers)
    71  	a.exit = make(chan struct{})
    72  	a.exited = make(chan struct{})
    73  	a.buffers = buffers
    74  	a.cur = nil
    75  	a.size = softStartInitial
    76  
    77  	// Create tokens
    78  	for i := 0; i < buffers; i++ {
    79  		a.token <- struct{}{}
    80  	}
    81  
    82  	// Start async reader
    83  	go func() {
    84  		// Ensure that when we exit this is signalled.
    85  		defer close(a.exited)
    86  		defer close(a.ready)
    87  		for {
    88  			select {
    89  			case <-a.token:
    90  				b := a.getBuffer()
    91  				if a.size < BufferSize {
    92  					b.buf = b.buf[:a.size]
    93  					a.size <<= 1
    94  				}
    95  				err := b.read(a.in)
    96  				a.ready <- b
    97  				if err != nil {
    98  					return
    99  				}
   100  			case <-a.exit:
   101  				return
   102  			}
   103  		}
   104  	}()
   105  }
   106  
   107  // bufferPool is a global pool of buffers
   108  var bufferPool *pool.Pool
   109  var bufferPoolOnce sync.Once
   110  
   111  // return the buffer to the pool (clearing it)
   112  func (a *AsyncReader) putBuffer(b *buffer) {
   113  	bufferPool.Put(b.buf)
   114  	b.buf = nil
   115  }
   116  
   117  // get a buffer from the pool
   118  func (a *AsyncReader) getBuffer() *buffer {
   119  	bufferPoolOnce.Do(func() {
   120  		// Initialise the buffer pool when used
   121  		bufferPool = pool.New(bufferCacheFlushTime, BufferSize, bufferCacheSize, a.ci.UseMmap)
   122  	})
   123  	return &buffer{
   124  		buf: bufferPool.Get(),
   125  	}
   126  }
   127  
   128  // Read will return the next available data.
   129  func (a *AsyncReader) fill() (err error) {
   130  	if a.cur.isEmpty() {
   131  		if a.cur != nil {
   132  			a.putBuffer(a.cur)
   133  			a.token <- struct{}{}
   134  			a.cur = nil
   135  		}
   136  		b, ok := <-a.ready
   137  		if !ok {
   138  			// Return an error to show fill failed
   139  			if a.err == nil {
   140  				return ErrorStreamAbandoned
   141  			}
   142  			return a.err
   143  		}
   144  		a.cur = b
   145  	}
   146  	return nil
   147  }
   148  
   149  // Read will return the next available data.
   150  func (a *AsyncReader) Read(p []byte) (n int, err error) {
   151  	a.mu.Lock()
   152  	defer a.mu.Unlock()
   153  
   154  	// Swap buffer and maybe return error
   155  	err = a.fill()
   156  	if err != nil {
   157  		return 0, err
   158  	}
   159  
   160  	// Copy what we can
   161  	n = copy(p, a.cur.buffer())
   162  	a.cur.increment(n)
   163  
   164  	// If at end of buffer, return any error, if present
   165  	if a.cur.isEmpty() {
   166  		a.err = a.cur.err
   167  		return n, a.err
   168  	}
   169  	return n, nil
   170  }
   171  
   172  // WriteTo writes data to w until there's no more data to write or when an error occurs.
   173  // The return value n is the number of bytes written.
   174  // Any error encountered during the write is also returned.
   175  func (a *AsyncReader) WriteTo(w io.Writer) (n int64, err error) {
   176  	a.mu.Lock()
   177  	defer a.mu.Unlock()
   178  
   179  	n = 0
   180  	for {
   181  		err = a.fill()
   182  		if err == io.EOF {
   183  			return n, nil
   184  		}
   185  		if err != nil {
   186  			return n, err
   187  		}
   188  		n2, err := w.Write(a.cur.buffer())
   189  		a.cur.increment(n2)
   190  		n += int64(n2)
   191  		if err != nil {
   192  			return n, err
   193  		}
   194  		if a.cur.err == io.EOF {
   195  			a.err = a.cur.err
   196  			return n, err
   197  		}
   198  		if a.cur.err != nil {
   199  			a.err = a.cur.err
   200  			return n, a.cur.err
   201  		}
   202  	}
   203  }
   204  
   205  // SkipBytes will try to seek 'skip' bytes relative to the current position.
   206  // On success it returns true. If 'skip' is outside the current buffer data or
   207  // an error occurs, Abandon is called and false is returned.
   208  func (a *AsyncReader) SkipBytes(skip int) (ok bool) {
   209  	a.mu.Lock()
   210  	defer func() {
   211  		a.mu.Unlock()
   212  		if !ok {
   213  			a.Abandon()
   214  		}
   215  	}()
   216  
   217  	if a.err != nil {
   218  		return false
   219  	}
   220  	if skip < 0 {
   221  		// seek backwards if skip is inside current buffer
   222  		if a.cur != nil && a.cur.offset+skip >= 0 {
   223  			a.cur.offset += skip
   224  			return true
   225  		}
   226  		return false
   227  	}
   228  	// early return if skip is past the maximum buffer capacity
   229  	if skip >= (len(a.ready)+1)*BufferSize {
   230  		return false
   231  	}
   232  
   233  	refillTokens := 0
   234  	for {
   235  		if a.cur.isEmpty() {
   236  			if a.cur != nil {
   237  				a.putBuffer(a.cur)
   238  				refillTokens++
   239  				a.cur = nil
   240  			}
   241  			select {
   242  			case b, ok := <-a.ready:
   243  				if !ok {
   244  					return false
   245  				}
   246  				a.cur = b
   247  			default:
   248  				return false
   249  			}
   250  		}
   251  
   252  		n := len(a.cur.buffer())
   253  		if n > skip {
   254  			n = skip
   255  		}
   256  		a.cur.increment(n)
   257  		skip -= n
   258  		if skip == 0 {
   259  			for ; refillTokens > 0; refillTokens-- {
   260  				a.token <- struct{}{}
   261  			}
   262  			// If at end of buffer, store any error, if present
   263  			if a.cur.isEmpty() && a.cur.err != nil {
   264  				a.err = a.cur.err
   265  			}
   266  			return true
   267  		}
   268  		if a.cur.err != nil {
   269  			a.err = a.cur.err
   270  			return false
   271  		}
   272  	}
   273  }
   274  
   275  // StopBuffering will ensure that the underlying async reader is shut
   276  // down so no more is read from the input.
   277  //
   278  // This does not free the memory so Abandon() or Close() need to be
   279  // called on the input.
   280  //
   281  // This does not wait for Read/WriteTo to complete so can be called
   282  // concurrently to those.
   283  func (a *AsyncReader) StopBuffering() {
   284  	select {
   285  	case <-a.exit:
   286  		// Do nothing if reader routine already exited
   287  		return
   288  	default:
   289  	}
   290  	// Close and wait for go routine
   291  	close(a.exit)
   292  	<-a.exited
   293  }
   294  
   295  // Abandon will ensure that the underlying async reader is shut down
   296  // and memory is returned. It does everything but close the input.
   297  //
   298  // It will NOT close the input supplied on New.
   299  func (a *AsyncReader) Abandon() {
   300  	a.StopBuffering()
   301  	// take the lock to wait for Read/WriteTo to complete
   302  	a.mu.Lock()
   303  	defer a.mu.Unlock()
   304  	// Return any outstanding buffers to the Pool
   305  	if a.cur != nil {
   306  		a.putBuffer(a.cur)
   307  		a.cur = nil
   308  	}
   309  	for b := range a.ready {
   310  		a.putBuffer(b)
   311  	}
   312  }
   313  
   314  // Close will ensure that the underlying async reader is shut down.
   315  // It will also close the input supplied on New.
   316  func (a *AsyncReader) Close() (err error) {
   317  	a.Abandon()
   318  	if a.closed {
   319  		return nil
   320  	}
   321  	a.closed = true
   322  	return a.in.Close()
   323  }
   324  
   325  // Internal buffer
   326  // If an error is present, it must be returned
   327  // once all buffer content has been served.
   328  type buffer struct {
   329  	buf    []byte
   330  	err    error
   331  	offset int
   332  }
   333  
   334  // isEmpty returns true is offset is at end of
   335  // buffer, or
   336  func (b *buffer) isEmpty() bool {
   337  	if b == nil {
   338  		return true
   339  	}
   340  	if len(b.buf)-b.offset <= 0 {
   341  		return true
   342  	}
   343  	return false
   344  }
   345  
   346  // read into start of the buffer from the supplied reader,
   347  // resets the offset and updates the size of the buffer.
   348  // Any error encountered during the read is returned.
   349  func (b *buffer) read(rd io.Reader) error {
   350  	var n int
   351  	n, b.err = readers.ReadFill(rd, b.buf)
   352  	b.buf = b.buf[0:n]
   353  	b.offset = 0
   354  	return b.err
   355  }
   356  
   357  // Return the buffer at current offset
   358  func (b *buffer) buffer() []byte {
   359  	return b.buf[b.offset:]
   360  }
   361  
   362  // increment the offset
   363  func (b *buffer) increment(n int) {
   364  	b.offset += n
   365  }