gitlab.com/SkynetLabs/skyd@v1.6.9/skymodules/renter/downloadstreamer.go (about)

     1  package renter
     2  
     3  import (
     4  	"bytes"
     5  	"io"
     6  	"sync"
     7  	"time"
     8  
     9  	"gitlab.com/NebulousLabs/errors"
    10  
    11  	"gitlab.com/SkynetLabs/skyd/skymodules"
    12  	"gitlab.com/SkynetLabs/skyd/skymodules/renter/filesystem"
    13  	"gitlab.com/SkynetLabs/skyd/skymodules/renter/filesystem/siafile"
    14  )
    15  
    16  type (
    17  	// streamer is a skymodules.Streamer that can be used to stream downloads from
    18  	// the sia network.
    19  	streamer struct {
    20  		// Reader variables. The snapshot is a snapshot of the file as it
    21  		// existed when it was opened, something that we do to give the streamer
    22  		// a consistent view of the file even if the file is being actively
    23  		// updated. Having this snapshot also isolates the reader from events
    24  		// such as name changes and deletions.
    25  		//
    26  		// We also keep the full file entry as it allows us to update metadata
    27  		// items in the file such as the access time.
    28  		staticFile   *siafile.Snapshot
    29  		offset       int64
    30  		staticRenter *Renter
    31  
    32  		// The cache itself is a []byte that is managed by threadedFillCache. The
    33  		// 'cacheOffset' indicates the starting location of the cache within the
    34  		// file, and all of the data in the []byte will be the actual file data
    35  		// that follows that offset. If the cache is empty, the length will be
    36  		// 0.
    37  		//
    38  		// Because the cache gets filled asynchronously, errors need to be
    39  		// recorded and then delivered to the user later. The errors get stored
    40  		// in readErr.
    41  		//
    42  		// cacheReady is a rotating channel which is used to signal to threads
    43  		// that the cache has been updated. When a Read call is made, the first
    44  		// action required is to grab a lock and then check if the cache has the
    45  		// requested data. If not, while still holding the lock the Read thread
    46  		// will grab a copy of cacheReady, and then release the lock. When the
    47  		// threadedFillCache thread has finished updating the cache, the thread
    48  		// will grab the lock and then the cacheReady channel will be closed and
    49  		// replaced with a new channel. This allows any number of Read threads
    50  		// to simultaneously block while waiting for cacheReady to be closed,
    51  		// and once cacheReady is closed they know to check the cache again.
    52  		//
    53  		// Multiple asynchronous calls to fill the cache may be sent out at
    54  		// once. To prevent race conditions, the 'cacheActive' channel is used
    55  		// to ensure that only one instance of 'threadedFillCache' is running at
    56  		// a time. If another instance of 'threadedFillCache' is active, the new
    57  		// call will immediately return.
    58  		cache                   []byte
    59  		activateCache           chan struct{}
    60  		cacheOffset             int64
    61  		cacheReady              chan struct{}
    62  		staticDisableLocalFetch bool
    63  		readErr                 error
    64  		targetCacheSize         int64
    65  
    66  		// Mutex to protect the offset variable, and all of the cacheing
    67  		// variables.
    68  		mu sync.Mutex
    69  	}
    70  )
    71  
    72  // managedFillCache will determine whether or not the cache of the streamer
    73  // needs to be filled, and if it does it will add data to the streamer.
    74  func (s *streamer) managedFillCache() bool {
    75  	// Before creating a download request to fill out the cache, check whether
    76  	// the cache is actually in need of being filled. The cache will only fill
    77  	// if the current reader approaching the point of running out of data.
    78  	s.mu.Lock()
    79  	_, partialDownloadsSupported := s.staticFile.ErasureCode().SupportsPartialEncoding()
    80  	chunkSize := s.staticFile.ChunkSize()
    81  	cacheOffset := int64(s.cacheOffset)
    82  	streamOffset := s.offset
    83  	cacheLen := int64(len(s.cache))
    84  	streamReadErr := s.readErr
    85  	fileSize := int64(s.staticFile.Size())
    86  	targetCacheSize := s.targetCacheSize
    87  	s.mu.Unlock()
    88  	// If there has been a read error in the stream, abort.
    89  	if streamReadErr != nil {
    90  		return false
    91  	}
    92  	// Check whether the cache has reached the end of the file and also the
    93  	// streamOffset is contained within the cache. If so, no updates are needed.
    94  	if cacheOffset <= streamOffset && cacheOffset+cacheLen == fileSize {
    95  		return false
    96  	}
    97  	// If partial downloads are supported and more than half of the target cache
    98  	// size is remaining, then no fetching is required.
    99  	if partialDownloadsSupported && cacheOffset <= streamOffset && streamOffset < (cacheOffset+cacheLen-(targetCacheSize/2)) {
   100  		return false
   101  	}
   102  	// If partial downloads are not supported, the full chunk containing the
   103  	// current offset should be the cache. If the cache is the full chunk that
   104  	// contains current offset, then nothing needs to be done as the cache is
   105  	// already prepared.
   106  	//
   107  	// This should be functionally nearly identical to the previous cache that
   108  	// we were using which has since been disabled.
   109  	if !partialDownloadsSupported && cacheOffset <= streamOffset && streamOffset < cacheOffset+cacheLen && cacheLen > 0 {
   110  		return false
   111  	}
   112  
   113  	// Defer a function to rotate out the cacheReady channel, to notify all
   114  	// calls blocking for more cache that more data is now available.
   115  	defer func() {
   116  		s.mu.Lock()
   117  		close(s.cacheReady)
   118  		s.cacheReady = make(chan struct{})
   119  		s.mu.Unlock()
   120  	}()
   121  
   122  	// Determine what data needs to be fetched.
   123  	//
   124  	// If there is no support for partial downloads, a whole chunk needs to be
   125  	// fetched, and the cache will be set equal to the chunk that currently
   126  	// contains the stream offset. This is because that amount of data will need
   127  	// to be fetched anyway, so we may as well use the full amount of data in
   128  	// the cache.
   129  	//
   130  	// If there is support for partial downloads but the stream offset is not
   131  	// contained within the existing cache, we need to fully replace the cache.
   132  	// At initialization, this will be the case (cacheLen of 0 cannot contain
   133  	// the stream offset byte within it, because it contains no bytes at all),
   134  	// so a check for 0-size cache is made. The full cache replacement will
   135  	// consist of a partial download the size of the cache starting from the
   136  	// stream offset.
   137  	//
   138  	// The final case is that the stream offset is contained within the current
   139  	// cache, but the stream offset is not the first byte of the cache. This
   140  	// means that we need to drop all of the bytes prior to the stream offset
   141  	// and then more bytes so that the cache remains the same size.
   142  	var fetchOffset, fetchLen int64
   143  	if !partialDownloadsSupported {
   144  		// Request a full chunk of data.
   145  		chunkIndex, _ := s.staticFile.ChunkIndexByOffset(uint64(streamOffset))
   146  		fetchOffset = int64(chunkIndex * chunkSize)
   147  		fetchLen = int64(chunkSize)
   148  	} else if streamOffset < cacheOffset || streamOffset >= cacheOffset+cacheLen {
   149  		// Grab enough data to fill the cache entirely starting from the current
   150  		// stream offset.
   151  		fetchOffset = streamOffset
   152  		fetchLen = targetCacheSize
   153  	} else {
   154  		// Set the fetch offset to the end of the current cache, and set the
   155  		// length equal to the number of bytes that the streamOffset has already
   156  		// consumed, so that the cache remains the same size after we drop all
   157  		// of the consumed bytes and extend the cache with new data.
   158  		fetchOffset = cacheOffset + cacheLen
   159  		fetchLen = targetCacheSize - (cacheOffset + cacheLen - streamOffset)
   160  	}
   161  
   162  	// Finally, check if the fetchOffset and fetchLen goes beyond the boundaries
   163  	// of the file. If so, the fetchLen will be truncated so that the cache only
   164  	// goes up to the end of the file.
   165  	if fetchOffset+fetchLen > fileSize {
   166  		fetchLen = fileSize - fetchOffset
   167  	}
   168  
   169  	// Perform the actual download.
   170  	buffer := bytes.NewBuffer([]byte{})
   171  	ddw := newDownloadDestinationWriter(buffer)
   172  	d, err := s.staticRenter.managedNewDownload(downloadParams{
   173  		destination:       ddw,
   174  		destinationType:   destinationTypeSeekStream,
   175  		destinationString: "httpresponse",
   176  		disableLocalFetch: s.staticDisableLocalFetch,
   177  		file:              s.staticFile,
   178  
   179  		latencyTarget: 50 * time.Millisecond, // TODO: low default until full latency support is added.
   180  		length:        uint64(fetchLen),
   181  		needsMemory:   true,
   182  		offset:        uint64(fetchOffset),
   183  		overdrive:     5,    // TODO: high default until full overdrive support is added.
   184  		priority:      1000, // TODO: high default until full priority support is added.
   185  
   186  		staticMemoryManager:    s.staticRenter.staticUserDownloadMemoryManager, // user initiated download
   187  		staticSpendingCategory: categoryDownload,
   188  	})
   189  	if err != nil {
   190  		closeErr := ddw.Close()
   191  		s.mu.Lock()
   192  		readErr := errors.Compose(s.readErr, err, closeErr)
   193  		s.readErr = readErr
   194  		s.mu.Unlock()
   195  		s.staticRenter.staticLog.Println("Error downloading for stream file:", readErr)
   196  		return false
   197  	}
   198  	// Register some cleanup for when the download is done.
   199  	d.OnComplete(func(_ error) error {
   200  		// close the destination buffer to avoid deadlocks.
   201  		return ddw.Close()
   202  	})
   203  	// Start the download.
   204  	if err := d.Start(); err != nil {
   205  		return false
   206  	}
   207  	// Block until the download has completed.
   208  	select {
   209  	case <-d.completeChan:
   210  		err := d.Err()
   211  		if err != nil {
   212  			completeErr := errors.AddContext(err, "download failed")
   213  			s.mu.Lock()
   214  			readErr := errors.Compose(s.readErr, completeErr)
   215  			s.readErr = readErr
   216  			s.mu.Unlock()
   217  			s.staticRenter.staticLog.Println("Error during stream download:", readErr)
   218  			return false
   219  		}
   220  	case <-s.staticRenter.tg.StopChan():
   221  		stopErr := errors.New("download interrupted by shutdown")
   222  		s.mu.Lock()
   223  		readErr := errors.Compose(s.readErr, stopErr)
   224  		s.readErr = readErr
   225  		s.mu.Unlock()
   226  		s.staticRenter.staticLog.Debugln(stopErr)
   227  		return false
   228  	}
   229  
   230  	// Update the cache.
   231  	s.mu.Lock()
   232  	defer s.mu.Unlock()
   233  
   234  	// Before updating the cache, check if the stream has caught up in the
   235  	// current cache. If the stream has caught up, the cache is not filling fast
   236  	// enough and the target cache size should be increased.
   237  	//
   238  	// streamOffsetInTail checks if the stream offset is in the final quarter of
   239  	// the cache. If it is, we consider the cache to be not filling fast enough,
   240  	// and we extend the size of the cache.
   241  	//
   242  	// A final check for cacheExists is performed, because if there currently is
   243  	// no cache at all, this must be the first fetch, and there is no reason to
   244  	// extend the cache size.
   245  	cacheLen = int64(len(s.cache))
   246  	streamOffsetInCache := s.cacheOffset <= s.offset && s.offset <= s.cacheOffset+cacheLen // NOTE: it's '<=' so that we also count being 1 byte beyond the cache
   247  	streamOffsetInTail := streamOffsetInCache && s.offset >= s.cacheOffset+(cacheLen/4)+(cacheLen/2)
   248  	targetCacheUnderLimit := s.targetCacheSize < maxStreamerCacheSize
   249  	cacheExists := cacheLen > 0
   250  	if cacheExists && partialDownloadsSupported && targetCacheUnderLimit && streamOffsetInTail {
   251  		if s.targetCacheSize*2 > maxStreamerCacheSize {
   252  			s.targetCacheSize = maxStreamerCacheSize
   253  		} else {
   254  			s.targetCacheSize *= 2
   255  		}
   256  	}
   257  
   258  	// Update the cache based on whether the entire cache needs to be replaced
   259  	// or whether only some of the cache is being replaced. The whole cache
   260  	// needs to be replaced in the even that partial downloads are not
   261  	// supported, and also in the event that the stream offset is complete
   262  	// outside the previous cache.
   263  	if !partialDownloadsSupported || streamOffset >= cacheOffset+cacheLen || streamOffset < cacheOffset {
   264  		s.cache = buffer.Bytes()
   265  		s.cacheOffset = fetchOffset
   266  	} else {
   267  		s.cache = s.cache[streamOffset-cacheOffset:]
   268  		s.cache = append(s.cache, buffer.Bytes()...)
   269  		s.cacheOffset = streamOffset
   270  	}
   271  
   272  	// Return true, indicating that this function should be called again,
   273  	// because there may be more cache that has been requested or used since the
   274  	// previous request.
   275  	return true
   276  }
   277  
   278  // threadedFillCache is a background thread that keeps the cache full as data is
   279  // read out of the cache. The Read and Seek functions have access to a channel
   280  // that they can use to signal that the cache should be refilled. To ensure that
   281  // the cache is always being filled, 'managedFillCache' will return a value
   282  // indicating whether it should be called again after completion based on
   283  // whether the cache was emptied further since the previous call.
   284  func (s *streamer) threadedFillCache() {
   285  	// Add this thread to the renter's threadgroup.
   286  	err := s.staticRenter.tg.Add()
   287  	if err != nil {
   288  		s.staticRenter.staticLog.Debugln("threadedFillCache terminating early because renter has stopped")
   289  	}
   290  	defer s.staticRenter.tg.Done()
   291  
   292  	// Kick things off by filling the cache for the first time.
   293  	fetchMore := s.managedFillCache()
   294  	for fetchMore {
   295  		fetchMore = s.managedFillCache()
   296  	}
   297  
   298  	for {
   299  		// Block until receiving notice that the cache needs to be updated,
   300  		// shutting down if a shutdown signal is received.
   301  		select {
   302  		case <-s.activateCache:
   303  		case <-s.staticRenter.tg.StopChan():
   304  			return
   305  		}
   306  
   307  		// Update the cache. Sometimes the cache will know that it is already
   308  		// out of date by the time it is returning, in those cases call the
   309  		// function again.
   310  		fetchMore = s.managedFillCache()
   311  		for fetchMore {
   312  			fetchMore = s.managedFillCache()
   313  		}
   314  	}
   315  }
   316  
   317  // Close closes the streamer.
   318  func (s *streamer) Close() error {
   319  	return nil
   320  }
   321  
   322  // Read will check the stream cache for the data that is being requested. If the
   323  // data is fully or partially there, Read will return what data is available
   324  // without error. If the data is not there, Read will issue a call to fill the
   325  // cache and then block until the data is at least partially available.
   326  func (s *streamer) Read(p []byte) (int, error) {
   327  	// Wait in a loop until the requested data is available, or until an error
   328  	// is recovered. The loop needs to release the lock between iterations, but
   329  	// the lock that it grabs needs to be held after the loops termination if
   330  	// the right conditions are met, resulting in an ugly/complex locking
   331  	// strategy.
   332  	for {
   333  		// Grab the lock and check that the cache has data which we want. If the
   334  		// cache does have data that we want, we will keep the lock and exit the
   335  		// loop. If there's an error, we will drop the lock and return the
   336  		// error. If the cache does not have the data we want but there is no
   337  		// error, we will drop the lock and spin up a thread to fill the cache,
   338  		// and then block until the cache has been updated.
   339  		s.mu.Lock()
   340  		// Get the file's size and check for EOF.
   341  		fileSize := int64(s.staticFile.Size())
   342  		if s.offset >= fileSize {
   343  			s.mu.Unlock()
   344  			return 0, io.EOF
   345  		}
   346  
   347  		// If there is a cache error, drop the lock and return. This check
   348  		// should happen before anything else.
   349  		if s.readErr != nil {
   350  			err := s.readErr
   351  			s.mu.Unlock()
   352  			return 0, err
   353  		}
   354  
   355  		// Do a check that the cache size is at least twice as large as the read
   356  		// size, to ensure that data is being fetched sufficiently far in
   357  		// advance.
   358  		twiceReadLen := int64(len(p) * 2)
   359  		if s.targetCacheSize < twiceReadLen {
   360  			if twiceReadLen > maxStreamerCacheSize {
   361  				s.targetCacheSize = maxStreamerCacheSize
   362  			} else {
   363  				s.targetCacheSize = twiceReadLen
   364  			}
   365  		}
   366  
   367  		// Check if the cache contains data that we are interested in. If so,
   368  		// break out of the cache-fetch loop while still holding the lock.
   369  		if s.cacheOffset <= s.offset && s.offset < s.cacheOffset+int64(len(s.cache)) {
   370  			break
   371  		}
   372  
   373  		// There is no error, but the data that we want is also unavailable.
   374  		// Grab the cacheReady channel to detect when the cache has been
   375  		// updated, and then drop the lock and block until there has been a
   376  		// cache update.
   377  		//
   378  		// Notably, it should not be necessary to spin up a new cache thread.
   379  		// There are four conditions which may cause the stream offset to be
   380  		// located outside of the existing cache, and all conditions will result
   381  		// with a thread being spun up regardless. The first condition is
   382  		// initialization, where no cache exists. A fill cache thread is spun up
   383  		// upon initialization. The second condition is after a Seek, which may
   384  		// move the offset outside of the current cache. The call to Seek also
   385  		// spins up a cache filling thread. The third condition is after a read,
   386  		// which adjusts the stream offset. A new cache fill thread gets spun up
   387  		// in this case as well, immediately after the stream offset is
   388  		// adjusted. Finally, there is the case where a cache fill thread was
   389  		// spun up, but then immediately spun down due to another cache fill
   390  		// thread already running. But this case is handled as well, because a
   391  		// cache fill thread will spin up another cache fill thread when it
   392  		// finishes specifically to cover this case.
   393  		cacheReady := s.cacheReady
   394  		s.mu.Unlock()
   395  		<-cacheReady
   396  
   397  		// Upon iterating, the lock is not held, so the call to grab the lock at
   398  		// the top of the function should not cause a deadlock.
   399  	}
   400  	// This code should only be reachable if the lock is still being held and
   401  	// there is also data in the cache for us. Defer releasing the lock.
   402  	defer s.mu.Unlock()
   403  
   404  	dataStart := int(s.offset - s.cacheOffset)
   405  	dataEnd := dataStart + len(p)
   406  	// If the read request extends beyond the cache, truncate it to include
   407  	// only up to where the cache ends.
   408  	if dataEnd > len(s.cache) {
   409  		dataEnd = len(s.cache)
   410  	}
   411  	copy(p, s.cache[dataStart:dataEnd])
   412  	s.offset += int64(dataEnd - dataStart)
   413  
   414  	// Now that data has been consumed, request more data.
   415  	select {
   416  	case s.activateCache <- struct{}{}:
   417  	default:
   418  	}
   419  
   420  	return dataEnd - dataStart, nil
   421  }
   422  
   423  // Seek sets the offset for the next Read to offset, interpreted
   424  // according to whence: SeekStart means relative to the start of the file,
   425  // SeekCurrent means relative to the current offset, and SeekEnd means relative
   426  // to the end. Seek returns the new offset relative to the start of the file
   427  // and an error, if any.
   428  func (s *streamer) Seek(offset int64, whence int) (int64, error) {
   429  	s.mu.Lock()
   430  	defer s.mu.Unlock()
   431  
   432  	var newOffset int64
   433  	switch whence {
   434  	case io.SeekStart:
   435  		newOffset = 0
   436  	case io.SeekCurrent:
   437  		newOffset = s.offset
   438  	case io.SeekEnd:
   439  		newOffset = int64(s.staticFile.Size())
   440  	}
   441  	newOffset += offset
   442  	if newOffset < 0 {
   443  		return s.offset, errors.New("cannot seek to negative offset")
   444  	}
   445  	// If the Seek is a no-op, do not invalidate the cache.
   446  	if newOffset == s.offset {
   447  		return 0, nil
   448  	}
   449  
   450  	// Reset the target cache size upon seek to be the default again. This is in
   451  	// place because some programs will rapidly consume the cache to build up
   452  	// their own buffer. This can result in the cache growing very large, which
   453  	// hurts seek times. By resetting the cache size upon seek, we ensure that
   454  	// the user gets a consistent experience when seeking. In a perfect world,
   455  	// we'd have an easy way to measure the bitrate of the file being streamed,
   456  	// so that we could set a target cache size according to that, but at the
   457  	// moment we don't have an easy way to get that information.
   458  	s.targetCacheSize = initialStreamerCacheSize
   459  
   460  	// Update the offset of the stream and immediately send a thread to update
   461  	// the cache.
   462  	s.offset = newOffset
   463  
   464  	// Now that data has been consumed, request more data.
   465  	select {
   466  	case s.activateCache <- struct{}{}:
   467  	default:
   468  	}
   469  
   470  	return newOffset, nil
   471  }
   472  
   473  // Streamer creates a skymodules.Streamer that can be used to stream downloads from
   474  // the sia network.
   475  func (r *Renter) Streamer(siaPath skymodules.SiaPath, disableLocalFetch bool) (_ string, _ skymodules.Streamer, err error) {
   476  	if err := r.tg.Add(); err != nil {
   477  		return "", nil, err
   478  	}
   479  	defer r.tg.Done()
   480  
   481  	// Lookup the file associated with the nickname.
   482  	node, err := r.staticFileSystem.OpenSiaFile(siaPath)
   483  	if err != nil {
   484  		return "", nil, err
   485  	}
   486  	defer func() {
   487  		err = errors.Compose(err, node.Close())
   488  	}()
   489  
   490  	// Create the streamer
   491  	snap, err := node.Snapshot(siaPath)
   492  	if err != nil {
   493  		return "", nil, err
   494  	}
   495  	s := r.managedStreamer(snap, disableLocalFetch)
   496  	return siaPath.String(), s, nil
   497  }
   498  
   499  // StreamerByNode will open a streamer for the renter, taking a FileNode as
   500  // input instead of a siapath. This is important for fuse, which has filenodes
   501  // that could be getting renamed before the streams are opened.
   502  func (r *Renter) StreamerByNode(node *filesystem.FileNode, disableLocalFetch bool) (skymodules.Streamer, error) {
   503  	if err := r.tg.Add(); err != nil {
   504  		return nil, err
   505  	}
   506  	defer r.tg.Done()
   507  
   508  	// Grab the current SiaPath of the FileNode and then create a snapshot.
   509  	sp := r.staticFileSystem.FileSiaPath(node)
   510  	snap, err := node.Snapshot(sp)
   511  	if err != nil {
   512  		return nil, err
   513  	}
   514  	s := r.managedStreamer(snap, disableLocalFetch)
   515  	return s, nil
   516  }
   517  
   518  // managedStreamer creates a streamer from a siafile snapshot and starts filling
   519  // its cache.
   520  func (r *Renter) managedStreamer(snapshot *siafile.Snapshot, disableLocalFetch bool) skymodules.Streamer {
   521  	s := &streamer{
   522  		staticFile:   snapshot,
   523  		staticRenter: r,
   524  
   525  		activateCache:           make(chan struct{}),
   526  		cacheReady:              make(chan struct{}),
   527  		staticDisableLocalFetch: disableLocalFetch,
   528  		targetCacheSize:         initialStreamerCacheSize,
   529  	}
   530  	go s.threadedFillCache()
   531  	return s
   532  }