github.com/artpar/rclone@v1.67.3/backend/cache/handle.go (about)

     1  //go:build !plan9 && !js
     2  
     3  package cache
     4  
     5  import (
     6  	"context"
     7  	"errors"
     8  	"fmt"
     9  	"io"
    10  	"path"
    11  	"runtime"
    12  	"strings"
    13  	"sync"
    14  	"time"
    15  
    16  	"github.com/artpar/rclone/fs"
    17  	"github.com/artpar/rclone/fs/operations"
    18  )
    19  
    20  var uploaderMap = make(map[string]*backgroundWriter)
    21  var uploaderMapMx sync.Mutex
    22  
    23  // initBackgroundUploader returns a single instance
    24  func initBackgroundUploader(fs *Fs) (*backgroundWriter, error) {
    25  	// write lock to create one
    26  	uploaderMapMx.Lock()
    27  	defer uploaderMapMx.Unlock()
    28  	if b, ok := uploaderMap[fs.String()]; ok {
    29  		// if it was already started we close it so that it can be started again
    30  		if b.running {
    31  			b.close()
    32  		} else {
    33  			return b, nil
    34  		}
    35  	}
    36  
    37  	bb := newBackgroundWriter(fs)
    38  	uploaderMap[fs.String()] = bb
    39  	return uploaderMap[fs.String()], nil
    40  }
    41  
    42  // Handle is managing the read/write/seek operations on an open handle
    43  type Handle struct {
    44  	ctx            context.Context
    45  	cachedObject   *Object
    46  	cfs            *Fs
    47  	memory         *Memory
    48  	preloadQueue   chan int64
    49  	preloadOffset  int64
    50  	offset         int64
    51  	seenOffsets    map[int64]bool
    52  	mu             sync.Mutex
    53  	workersWg      sync.WaitGroup
    54  	confirmReading chan bool
    55  	workers        int
    56  	maxWorkerID    int
    57  	UseMemory      bool
    58  	closed         bool
    59  	reading        bool
    60  }
    61  
    62  // NewObjectHandle returns a new Handle for an existing Object
    63  func NewObjectHandle(ctx context.Context, o *Object, cfs *Fs) *Handle {
    64  	r := &Handle{
    65  		ctx:           ctx,
    66  		cachedObject:  o,
    67  		cfs:           cfs,
    68  		offset:        0,
    69  		preloadOffset: -1, // -1 to trigger the first preload
    70  
    71  		UseMemory: !cfs.opt.ChunkNoMemory,
    72  		reading:   false,
    73  	}
    74  	r.seenOffsets = make(map[int64]bool)
    75  	r.memory = NewMemory(-1)
    76  
    77  	// create a larger buffer to queue up requests
    78  	r.preloadQueue = make(chan int64, r.cfs.opt.TotalWorkers*10)
    79  	r.confirmReading = make(chan bool)
    80  	r.startReadWorkers()
    81  	return r
    82  }
    83  
    84  // cacheFs is a convenience method to get the parent cache FS of the object's manager
    85  func (r *Handle) cacheFs() *Fs {
    86  	return r.cfs
    87  }
    88  
    89  // storage is a convenience method to get the persistent storage of the object's manager
    90  func (r *Handle) storage() *Persistent {
    91  	return r.cacheFs().cache
    92  }
    93  
    94  // String representation of this reader
    95  func (r *Handle) String() string {
    96  	return r.cachedObject.abs()
    97  }
    98  
    99  // startReadWorkers will start the worker pool
   100  func (r *Handle) startReadWorkers() {
   101  	if r.workers > 0 {
   102  		return
   103  	}
   104  	totalWorkers := r.cacheFs().opt.TotalWorkers
   105  
   106  	if r.cacheFs().plexConnector.isConfigured() {
   107  		if !r.cacheFs().plexConnector.isConnected() {
   108  			err := r.cacheFs().plexConnector.authenticate()
   109  			if err != nil {
   110  				fs.Errorf(r, "failed to authenticate to Plex: %v", err)
   111  			}
   112  		}
   113  		if r.cacheFs().plexConnector.isConnected() {
   114  			totalWorkers = 1
   115  		}
   116  	}
   117  
   118  	r.scaleWorkers(totalWorkers)
   119  }
   120  
   121  // scaleWorkers will increase the worker pool count by the provided amount
   122  func (r *Handle) scaleWorkers(desired int) {
   123  	current := r.workers
   124  	if current == desired {
   125  		return
   126  	}
   127  	if current > desired {
   128  		// scale in gracefully
   129  		for r.workers > desired {
   130  			r.preloadQueue <- -1
   131  			r.workers--
   132  		}
   133  	} else {
   134  		// scale out
   135  		for r.workers < desired {
   136  			w := &worker{
   137  				r:  r,
   138  				id: r.maxWorkerID,
   139  			}
   140  			r.workersWg.Add(1)
   141  			r.workers++
   142  			r.maxWorkerID++
   143  			go w.run()
   144  		}
   145  	}
   146  	// ignore first scale out from 0
   147  	if current != 0 {
   148  		fs.Debugf(r, "scale workers to %v", desired)
   149  	}
   150  }
   151  
   152  func (r *Handle) confirmExternalReading() {
   153  	// if we have a max value of workers
   154  	// then we skip this step
   155  	if r.workers > 1 ||
   156  		!r.cacheFs().plexConnector.isConfigured() {
   157  		return
   158  	}
   159  	if !r.cacheFs().plexConnector.isPlaying(r.cachedObject) {
   160  		return
   161  	}
   162  	fs.Infof(r, "confirmed reading by external reader")
   163  	r.scaleWorkers(r.cacheFs().opt.TotalWorkers)
   164  }
   165  
   166  // queueOffset will send an offset to the workers if it's different from the last one
   167  func (r *Handle) queueOffset(offset int64) {
   168  	if offset != r.preloadOffset {
   169  		// clean past in-memory chunks
   170  		if r.UseMemory {
   171  			go r.memory.CleanChunksByNeed(offset)
   172  		}
   173  		r.confirmExternalReading()
   174  		r.preloadOffset = offset
   175  
   176  		// clear the past seen chunks
   177  		// they will remain in our persistent storage but will be removed from transient
   178  		// so they need to be picked up by a worker
   179  		for k := range r.seenOffsets {
   180  			if k < offset {
   181  				r.seenOffsets[k] = false
   182  			}
   183  		}
   184  
   185  		for i := 0; i < r.workers; i++ {
   186  			o := r.preloadOffset + int64(r.cacheFs().opt.ChunkSize)*int64(i)
   187  			if o < 0 || o >= r.cachedObject.Size() {
   188  				continue
   189  			}
   190  			if v, ok := r.seenOffsets[o]; ok && v {
   191  				continue
   192  			}
   193  
   194  			r.seenOffsets[o] = true
   195  			r.preloadQueue <- o
   196  		}
   197  	}
   198  }
   199  
   200  // getChunk is called by the FS to retrieve a specific chunk of known start and size from where it can find it
   201  // it can be from transient or persistent cache
   202  // it will also build the chunk from the cache's specific chunk boundaries and build the final desired chunk in a buffer
   203  func (r *Handle) getChunk(chunkStart int64) ([]byte, error) {
   204  	var data []byte
   205  	var err error
   206  
   207  	// we calculate the modulus of the requested offset with the size of a chunk
   208  	offset := chunkStart % int64(r.cacheFs().opt.ChunkSize)
   209  
   210  	// we align the start offset of the first chunk to a likely chunk in the storage
   211  	chunkStart = chunkStart - offset
   212  	r.queueOffset(chunkStart)
   213  	found := false
   214  
   215  	if r.UseMemory {
   216  		data, err = r.memory.GetChunk(r.cachedObject, chunkStart)
   217  		if err == nil {
   218  			found = true
   219  		}
   220  	}
   221  
   222  	if !found {
   223  		// we're gonna give the workers a chance to pickup the chunk
   224  		// and retry a couple of times
   225  		for i := 0; i < r.cacheFs().opt.ReadRetries*8; i++ {
   226  			data, err = r.storage().GetChunk(r.cachedObject, chunkStart)
   227  			if err == nil {
   228  				found = true
   229  				break
   230  			}
   231  
   232  			fs.Debugf(r, "%v: chunk retry storage: %v", chunkStart, i)
   233  			time.Sleep(time.Millisecond * 500)
   234  		}
   235  	}
   236  
   237  	// not found in ram or
   238  	// the worker didn't managed to download the chunk in time so we abort and close the stream
   239  	if err != nil || len(data) == 0 || !found {
   240  		if r.workers == 0 {
   241  			fs.Errorf(r, "out of workers")
   242  			return nil, io.ErrUnexpectedEOF
   243  		}
   244  
   245  		return nil, fmt.Errorf("chunk not found %v", chunkStart)
   246  	}
   247  
   248  	// first chunk will be aligned with the start
   249  	if offset > 0 {
   250  		if offset > int64(len(data)) {
   251  			fs.Errorf(r, "unexpected conditions during reading. current position: %v, current chunk position: %v, current chunk size: %v, offset: %v, chunk size: %v, file size: %v",
   252  				r.offset, chunkStart, len(data), offset, r.cacheFs().opt.ChunkSize, r.cachedObject.Size())
   253  			return nil, io.ErrUnexpectedEOF
   254  		}
   255  		data = data[int(offset):]
   256  	}
   257  
   258  	return data, nil
   259  }
   260  
   261  // Read a chunk from storage or len(p)
   262  func (r *Handle) Read(p []byte) (n int, err error) {
   263  	r.mu.Lock()
   264  	defer r.mu.Unlock()
   265  	var buf []byte
   266  
   267  	// first reading
   268  	if !r.reading {
   269  		r.reading = true
   270  	}
   271  	// reached EOF
   272  	if r.offset >= r.cachedObject.Size() {
   273  		return 0, io.EOF
   274  	}
   275  	currentOffset := r.offset
   276  	buf, err = r.getChunk(currentOffset)
   277  	if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
   278  		fs.Errorf(r, "(%v/%v) error (%v) response", currentOffset, r.cachedObject.Size(), err)
   279  	}
   280  	if len(buf) == 0 && err != io.ErrUnexpectedEOF {
   281  		return 0, io.EOF
   282  	}
   283  	readSize := copy(p, buf)
   284  	newOffset := currentOffset + int64(readSize)
   285  	r.offset = newOffset
   286  
   287  	return readSize, err
   288  }
   289  
   290  // Close will tell the workers to stop
   291  func (r *Handle) Close() error {
   292  	r.mu.Lock()
   293  	defer r.mu.Unlock()
   294  	if r.closed {
   295  		return errors.New("file already closed")
   296  	}
   297  
   298  	close(r.preloadQueue)
   299  	r.closed = true
   300  	// wait for workers to complete their jobs before returning
   301  	r.workersWg.Wait()
   302  	r.memory.db.Flush()
   303  
   304  	fs.Debugf(r, "cache reader closed %v", r.offset)
   305  	return nil
   306  }
   307  
   308  // Seek will move the current offset based on whence and instruct the workers to move there too
   309  func (r *Handle) Seek(offset int64, whence int) (int64, error) {
   310  	r.mu.Lock()
   311  	defer r.mu.Unlock()
   312  
   313  	var err error
   314  	switch whence {
   315  	case io.SeekStart:
   316  		fs.Debugf(r, "moving offset set from %v to %v", r.offset, offset)
   317  		r.offset = offset
   318  	case io.SeekCurrent:
   319  		fs.Debugf(r, "moving offset cur from %v to %v", r.offset, r.offset+offset)
   320  		r.offset += offset
   321  	case io.SeekEnd:
   322  		fs.Debugf(r, "moving offset end (%v) from %v to %v", r.cachedObject.Size(), r.offset, r.cachedObject.Size()+offset)
   323  		r.offset = r.cachedObject.Size() + offset
   324  	default:
   325  		err = fmt.Errorf("cache: unimplemented seek whence %v", whence)
   326  	}
   327  
   328  	chunkStart := r.offset - (r.offset % int64(r.cacheFs().opt.ChunkSize))
   329  	if chunkStart >= int64(r.cacheFs().opt.ChunkSize) {
   330  		chunkStart = chunkStart - int64(r.cacheFs().opt.ChunkSize)
   331  	}
   332  	r.queueOffset(chunkStart)
   333  
   334  	return r.offset, err
   335  }
   336  
   337  type worker struct {
   338  	r  *Handle
   339  	rc io.ReadCloser
   340  	id int
   341  }
   342  
   343  // String is a representation of this worker
   344  func (w *worker) String() string {
   345  	return fmt.Sprintf("worker-%v <%v>", w.id, w.r.cachedObject.Name)
   346  }
   347  
   348  // reader will return a reader depending on the capabilities of the source reader:
   349  //   - if it supports seeking it will seek to the desired offset and return the same reader
   350  //   - if it doesn't support seeking it will close a possible existing one and open at the desired offset
   351  //   - if there's no reader associated with this worker, it will create one
   352  func (w *worker) reader(offset, end int64, closeOpen bool) (io.ReadCloser, error) {
   353  	var err error
   354  	r := w.rc
   355  	if w.rc == nil {
   356  		r, err = w.r.cacheFs().openRateLimited(func() (io.ReadCloser, error) {
   357  			return w.r.cachedObject.Object.Open(w.r.ctx, &fs.RangeOption{Start: offset, End: end - 1})
   358  		})
   359  		if err != nil {
   360  			return nil, err
   361  		}
   362  		return r, nil
   363  	}
   364  
   365  	if !closeOpen {
   366  		if do, ok := r.(fs.RangeSeeker); ok {
   367  			_, err = do.RangeSeek(w.r.ctx, offset, io.SeekStart, end-offset)
   368  			return r, err
   369  		} else if do, ok := r.(io.Seeker); ok {
   370  			_, err = do.Seek(offset, io.SeekStart)
   371  			return r, err
   372  		}
   373  	}
   374  
   375  	_ = w.rc.Close()
   376  	return w.r.cacheFs().openRateLimited(func() (io.ReadCloser, error) {
   377  		r, err = w.r.cachedObject.Object.Open(w.r.ctx, &fs.RangeOption{Start: offset, End: end - 1})
   378  		if err != nil {
   379  			return nil, err
   380  		}
   381  		return r, nil
   382  	})
   383  }
   384  
   385  // run is the main loop for the worker which receives offsets to preload
   386  func (w *worker) run() {
   387  	var err error
   388  	var data []byte
   389  	defer func() {
   390  		if w.rc != nil {
   391  			_ = w.rc.Close()
   392  		}
   393  		w.r.workersWg.Done()
   394  	}()
   395  
   396  	for {
   397  		chunkStart, open := <-w.r.preloadQueue
   398  		if chunkStart < 0 || !open {
   399  			break
   400  		}
   401  
   402  		// skip if it exists
   403  		if w.r.UseMemory {
   404  			if w.r.memory.HasChunk(w.r.cachedObject, chunkStart) {
   405  				continue
   406  			}
   407  
   408  			// add it in ram if it's in the persistent storage
   409  			data, err = w.r.storage().GetChunk(w.r.cachedObject, chunkStart)
   410  			if err == nil {
   411  				err = w.r.memory.AddChunk(w.r.cachedObject.abs(), data, chunkStart)
   412  				if err != nil {
   413  					fs.Errorf(w, "failed caching chunk in ram %v: %v", chunkStart, err)
   414  				} else {
   415  					continue
   416  				}
   417  			}
   418  		} else {
   419  			if w.r.storage().HasChunk(w.r.cachedObject, chunkStart) {
   420  				continue
   421  			}
   422  		}
   423  
   424  		chunkEnd := chunkStart + int64(w.r.cacheFs().opt.ChunkSize)
   425  		// TODO: Remove this comment if it proves to be reliable for #1896
   426  		//if chunkEnd > w.r.cachedObject.Size() {
   427  		//	chunkEnd = w.r.cachedObject.Size()
   428  		//}
   429  
   430  		w.download(chunkStart, chunkEnd, 0)
   431  	}
   432  }
   433  
   434  func (w *worker) download(chunkStart, chunkEnd int64, retry int) {
   435  	var err error
   436  	var data []byte
   437  
   438  	// stop retries
   439  	if retry >= w.r.cacheFs().opt.ReadRetries {
   440  		return
   441  	}
   442  	// back-off between retries
   443  	if retry > 0 {
   444  		time.Sleep(time.Second * time.Duration(retry))
   445  	}
   446  
   447  	closeOpen := false
   448  	if retry > 0 {
   449  		closeOpen = true
   450  	}
   451  	w.rc, err = w.reader(chunkStart, chunkEnd, closeOpen)
   452  	// we seem to be getting only errors so we abort
   453  	if err != nil {
   454  		fs.Errorf(w, "object open failed %v: %v", chunkStart, err)
   455  		err = w.r.cachedObject.refreshFromSource(w.r.ctx, true)
   456  		if err != nil {
   457  			fs.Errorf(w, "%v", err)
   458  		}
   459  		w.download(chunkStart, chunkEnd, retry+1)
   460  		return
   461  	}
   462  
   463  	data = make([]byte, chunkEnd-chunkStart)
   464  	var sourceRead int
   465  	sourceRead, err = io.ReadFull(w.rc, data)
   466  	if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
   467  		fs.Errorf(w, "failed to read chunk %v: %v", chunkStart, err)
   468  		err = w.r.cachedObject.refreshFromSource(w.r.ctx, true)
   469  		if err != nil {
   470  			fs.Errorf(w, "%v", err)
   471  		}
   472  		w.download(chunkStart, chunkEnd, retry+1)
   473  		return
   474  	}
   475  	data = data[:sourceRead] // reslice to remove extra garbage
   476  	if err == io.ErrUnexpectedEOF {
   477  		fs.Debugf(w, "partial downloaded chunk %v", fs.SizeSuffix(chunkStart))
   478  	} else {
   479  		fs.Debugf(w, "downloaded chunk %v", chunkStart)
   480  	}
   481  
   482  	if w.r.UseMemory {
   483  		err = w.r.memory.AddChunk(w.r.cachedObject.abs(), data, chunkStart)
   484  		if err != nil {
   485  			fs.Errorf(w, "failed caching chunk in ram %v: %v", chunkStart, err)
   486  		}
   487  	}
   488  
   489  	err = w.r.storage().AddChunk(w.r.cachedObject.abs(), data, chunkStart)
   490  	if err != nil {
   491  		fs.Errorf(w, "failed caching chunk in storage %v: %v", chunkStart, err)
   492  	}
   493  }
   494  
   495  const (
   496  	// BackgroundUploadStarted is a state for a temp file that has started upload
   497  	BackgroundUploadStarted = iota
   498  	// BackgroundUploadCompleted is a state for a temp file that has completed upload
   499  	BackgroundUploadCompleted
   500  	// BackgroundUploadError is a state for a temp file that has an error upload
   501  	BackgroundUploadError
   502  )
   503  
   504  // BackgroundUploadState is an entity that maps to an existing file which is stored on the temp fs
   505  type BackgroundUploadState struct {
   506  	Remote string
   507  	Status int
   508  	Error  error
   509  }
   510  
   511  type backgroundWriter struct {
   512  	fs       *Fs
   513  	stateCh  chan int
   514  	running  bool
   515  	notifyCh chan BackgroundUploadState
   516  	mu       sync.Mutex
   517  }
   518  
   519  func newBackgroundWriter(f *Fs) *backgroundWriter {
   520  	b := &backgroundWriter{
   521  		fs:       f,
   522  		stateCh:  make(chan int),
   523  		notifyCh: make(chan BackgroundUploadState),
   524  	}
   525  
   526  	return b
   527  }
   528  
   529  func (b *backgroundWriter) close() {
   530  	b.stateCh <- 2
   531  	b.mu.Lock()
   532  	defer b.mu.Unlock()
   533  	b.running = false
   534  
   535  }
   536  
   537  func (b *backgroundWriter) pause() {
   538  	b.stateCh <- 1
   539  }
   540  
   541  func (b *backgroundWriter) play() {
   542  	b.stateCh <- 0
   543  }
   544  
   545  func (b *backgroundWriter) isRunning() bool {
   546  	b.mu.Lock()
   547  	defer b.mu.Unlock()
   548  	return b.running
   549  }
   550  
   551  func (b *backgroundWriter) notify(remote string, status int, err error) {
   552  	state := BackgroundUploadState{
   553  		Remote: remote,
   554  		Status: status,
   555  		Error:  err,
   556  	}
   557  	select {
   558  	case b.notifyCh <- state:
   559  		fs.Debugf(remote, "notified background upload state: %v", state.Status)
   560  	default:
   561  	}
   562  }
   563  
   564  func (b *backgroundWriter) run() {
   565  	state := 0
   566  	for {
   567  		b.mu.Lock()
   568  		b.running = true
   569  		b.mu.Unlock()
   570  		select {
   571  		case s := <-b.stateCh:
   572  			state = s
   573  		default:
   574  			//
   575  		}
   576  		switch state {
   577  		case 1:
   578  			runtime.Gosched()
   579  			time.Sleep(time.Millisecond * 500)
   580  			continue
   581  		case 2:
   582  			return
   583  		}
   584  
   585  		absPath, err := b.fs.cache.getPendingUpload(b.fs.Root(), time.Duration(b.fs.opt.TempWaitTime))
   586  		if err != nil || absPath == "" || !b.fs.isRootInPath(absPath) {
   587  			time.Sleep(time.Second)
   588  			continue
   589  		}
   590  
   591  		remote := b.fs.cleanRootFromPath(absPath)
   592  		b.notify(remote, BackgroundUploadStarted, nil)
   593  		fs.Infof(remote, "background upload: started upload")
   594  		err = operations.MoveFile(context.TODO(), b.fs.UnWrap(), b.fs.tempFs, remote, remote)
   595  		if err != nil {
   596  			b.notify(remote, BackgroundUploadError, err)
   597  			_ = b.fs.cache.rollbackPendingUpload(absPath)
   598  			fs.Errorf(remote, "background upload: %v", err)
   599  			continue
   600  		}
   601  		// clean empty dirs up to root
   602  		thisDir := cleanPath(path.Dir(remote))
   603  		for thisDir != "" {
   604  			thisList, err := b.fs.tempFs.List(context.TODO(), thisDir)
   605  			if err != nil {
   606  				break
   607  			}
   608  			if len(thisList) > 0 {
   609  				break
   610  			}
   611  			err = b.fs.tempFs.Rmdir(context.TODO(), thisDir)
   612  			fs.Debugf(thisDir, "cleaned from temp path")
   613  			if err != nil {
   614  				break
   615  			}
   616  			thisDir = cleanPath(path.Dir(thisDir))
   617  		}
   618  		fs.Infof(remote, "background upload: uploaded entry")
   619  		err = b.fs.cache.removePendingUpload(absPath)
   620  		if err != nil && !strings.Contains(err.Error(), "pending upload not found") {
   621  			fs.Errorf(remote, "background upload: %v", err)
   622  		}
   623  		parentCd := NewDirectory(b.fs, cleanPath(path.Dir(remote)))
   624  		err = b.fs.cache.ExpireDir(parentCd)
   625  		if err != nil {
   626  			fs.Errorf(parentCd, "background upload: cache expire error: %v", err)
   627  		}
   628  		b.fs.notifyChangeUpstream(remote, fs.EntryObject)
   629  		fs.Infof(remote, "finished background upload")
   630  		b.notify(remote, BackgroundUploadCompleted, nil)
   631  	}
   632  }
   633  
   634  // Check the interfaces are satisfied
   635  var (
   636  	_ io.ReadCloser = (*Handle)(nil)
   637  	_ io.Seeker     = (*Handle)(nil)
   638  )