gitlab.com/SiaPrime/SiaPrime@v1.4.1/modules/renter/downloadstreamer.go

gitlab.com/SiaPrime/SiaPrime@v1.4.1/modules/renter/downloadstreamer.go (about)

     1  package renter
     2  
     3  import (
     4  	"bytes"
     5  	"io"
     6  	"sync"
     7  	"time"
     8  
     9  	"gitlab.com/NebulousLabs/errors"
    10  
    11  	"gitlab.com/SiaPrime/SiaPrime/modules"
    12  	"gitlab.com/SiaPrime/SiaPrime/modules/renter/siafile"
    13  )
    14  
    15  type (
    16  	// streamer is a modules.Streamer that can be used to stream downloads from
    17  	// the sia network.
    18  	streamer struct {
    19  		// Reader variables. The snapshot is a snapshot of the file as it
    20  		// existed when it was opened, something that we do to give the streamer
    21  		// a consistent view of the file even if the file is being actively
    22  		// updated. Having this snapshot also isolates the reader from events
    23  		// such as name changes and deletions.
    24  		//
    25  		// We also keep the full file entry as it allows us to update metadata
    26  		// items in the file such as the access time.
    27  		staticFile *siafile.Snapshot
    28  		offset     int64
    29  		r          *Renter
    30  
    31  		// The cache itself is a []byte that is managed by threadedFillCache. The
    32  		// 'cacheOffset' indicates the starting location of the cache within the
    33  		// file, and all of the data in the []byte will be the actual file data
    34  		// that follows that offset. If the cache is empty, the length will be
    35  		// 0.
    36  		//
    37  		// Because the cache gets filled asynchronously, errors need to be
    38  		// recorded and then delivered to the user later. The errors get stored
    39  		// in readErr.
    40  		//
    41  		// cacheReady is a rotating channel which is used to signal to threads
    42  		// that the cache has been updated. When a Read call is made, the first
    43  		// action required is to grab a lock and then check if the cache has the
    44  		// requested data. If not, while still holding the lock the Read thread
    45  		// will grab a copy of cacheReady, and then release the lock. When the
    46  		// threadedFillCache thread has finished updating the cache, the thread
    47  		// will grab the lock and then the cacheReady channel will be closed and
    48  		// replaced with a new channel. This allows any number of Read threads
    49  		// to simultaneously block while waiting for cacheReady to be closed,
    50  		// and once cacheReady is closed they know to check the cache again.
    51  		//
    52  		// Multiple asynchronous calls to fill the cache may be sent out at
    53  		// once. To prevent race conditions, the 'cacheActive' channel is used
    54  		// to ensure that only one instance of 'threadedFillCache' is running at
    55  		// a time. If another instance of 'threadedFillCache' is active, the new
    56  		// call will immediately return.
    57  		cache           []byte
    58  		activateCache   chan struct{}
    59  		cacheOffset     int64
    60  		cacheReady      chan struct{}
    61  		readErr         error
    62  		targetCacheSize int64
    63  
    64  		// Mutex to protect the offset variable, and all of the cacheing
    65  		// variables.
    66  		mu sync.Mutex
    67  	}
    68  )
    69  
    70  // managedFillCache will determine whether or not the cache of the streamer
    71  // needs to be filled, and if it does it will add data to the streamer.
    72  func (s *streamer) managedFillCache() bool {
    73  	// Before creating a download request to fill out the cache, check whether
    74  	// the cache is actually in need of being filled. The cache will only fill
    75  	// if the current reader approaching the point of running out of data.
    76  	s.mu.Lock()
    77  	partialDownloadsSupported := s.staticFile.ErasureCode().SupportsPartialEncoding()
    78  	chunkSize := s.staticFile.ChunkSize()
    79  	cacheOffset := int64(s.cacheOffset)
    80  	streamOffset := s.offset
    81  	cacheLen := int64(len(s.cache))
    82  	streamReadErr := s.readErr
    83  	fileSize := int64(s.staticFile.Size())
    84  	targetCacheSize := s.targetCacheSize
    85  	s.mu.Unlock()
    86  	// If there has been a read error in the stream, abort.
    87  	if streamReadErr != nil {
    88  		return false
    89  	}
    90  	// Check whether the cache has reached the end of the file and also the
    91  	// streamOffset is contained within the cache. If so, no updates are needed.
    92  	if cacheOffset <= streamOffset && cacheOffset+cacheLen == fileSize {
    93  		return false
    94  	}
    95  	// If partial downloads are supported and the stream offset is in the first
    96  	// half of the cache, then no fetching is required.
    97  	//
    98  	// An extra check that there is any data in the cache needs to be made so
    99  	// that the cache fill function runs immediately after initialization.
   100  	if partialDownloadsSupported && cacheOffset <= streamOffset && streamOffset-cacheOffset < cacheLen/2 {
   101  		return false
   102  	}
   103  	// If partial downloads are not supported, the full chunk containing the
   104  	// current offset should be the cache. If the cache is the full chunk that
   105  	// contains current offset, then nothing needs to be done as the cache is
   106  	// already prepared.
   107  	//
   108  	// This should be functionally nearly identical to the previous cache that
   109  	// we were using which has since been disabled.
   110  	if !partialDownloadsSupported && cacheOffset <= streamOffset && streamOffset < cacheOffset+cacheLen && cacheLen > 0 {
   111  		return false
   112  	}
   113  
   114  	// Defer a function to rotate out the cacheReady channel, to notify all
   115  	// calls blocking for more cache that more data is now available.
   116  	defer func() {
   117  		s.mu.Lock()
   118  		close(s.cacheReady)
   119  		s.cacheReady = make(chan struct{})
   120  		s.mu.Unlock()
   121  	}()
   122  
   123  	// Determine what data needs to be fetched.
   124  	//
   125  	// If there is no support for partial downloads, a whole chunk needs to be
   126  	// fetched, and the cache will be set equal to the chunk that currently
   127  	// contains the stream offset. This is because that amount of data will need
   128  	// to be fetched anyway, so we may as well use the full amount of data in
   129  	// the cache.
   130  	//
   131  	// If there is support for partial downloads but the stream offset is not
   132  	// contained within the existing cache, we need to fully replace the cache.
   133  	// At initialization, this will be the case (cacheLen of 0 cannot contain
   134  	// the stream offset byte within it, because it contains no bytes at all),
   135  	// so a check for 0-size cache is made. The full cache replacement will
   136  	// consist of a partial download the size of the cache starting from the
   137  	// stream offset.
   138  	//
   139  	// The final case is that the stream offset is contained within the current
   140  	// cache, but the stream offset is not the first byte of the cache. This
   141  	// means that we need to drop all of the bytes prior to the stream offset
   142  	// and then more bytes so that the cache remains the same size.
   143  	var fetchOffset, fetchLen int64
   144  	if !partialDownloadsSupported {
   145  		// Request a full chunk of data.
   146  		chunkIndex, _ := s.staticFile.ChunkIndexByOffset(uint64(streamOffset))
   147  		fetchOffset = int64(chunkIndex * chunkSize)
   148  		fetchLen = int64(chunkSize)
   149  	} else if streamOffset < cacheOffset || streamOffset >= cacheOffset+cacheLen {
   150  		// Grab enough data to fill the cache entirely starting from the current
   151  		// stream offset.
   152  		fetchOffset = streamOffset
   153  		fetchLen = targetCacheSize
   154  	} else {
   155  		// Set the fetch offset to the end of the current cache, and set the
   156  		// length equal to the number of bytes that the streamOffset has already
   157  		// consumed, so that the cache remains the same size after we drop all
   158  		// of the consumed bytes and extend the cache with new data.
   159  		fetchOffset = cacheOffset + cacheLen
   160  		fetchLen = targetCacheSize - (streamOffset - cacheOffset)
   161  	}
   162  
   163  	// Finally, check if the fetchOffset and fetchLen goes beyond the boundaries
   164  	// of the file. If so, the fetchLen will be truncated so that the cache only
   165  	// goes up to the end of the file.
   166  	if fetchOffset+fetchLen > fileSize {
   167  		fetchLen = fileSize - fetchOffset
   168  	}
   169  
   170  	// Perform the actual download.
   171  	buffer := bytes.NewBuffer([]byte{})
   172  	ddw := newDownloadDestinationWriter(buffer)
   173  	d, err := s.r.managedNewDownload(downloadParams{
   174  		destination:       ddw,
   175  		destinationType:   destinationTypeSeekStream,
   176  		destinationString: "httpresponse",
   177  		file:              s.staticFile,
   178  
   179  		latencyTarget: 50 * time.Millisecond, // TODO: low default until full latency support is added.
   180  		length:        uint64(fetchLen),
   181  		needsMemory:   true,
   182  		offset:        uint64(fetchOffset),
   183  		overdrive:     5,    // TODO: high default until full overdrive support is added.
   184  		priority:      1000, // TODO: high default until full priority support is added.
   185  	})
   186  	if err != nil {
   187  		closeErr := ddw.Close()
   188  		s.mu.Lock()
   189  		readErr := errors.Compose(s.readErr, err, closeErr)
   190  		s.readErr = readErr
   191  		s.mu.Unlock()
   192  		s.r.log.Println("Error downloading for stream file:", readErr)
   193  		return false
   194  	}
   195  	// Register some cleanup for when the download is done.
   196  	d.OnComplete(func(_ error) error {
   197  		// close the destination buffer to avoid deadlocks.
   198  		return ddw.Close()
   199  	})
   200  	// Set the in-memory buffer to nil just to be safe in case of a memory
   201  	// leak.
   202  	defer func() {
   203  		d.destination = nil
   204  	}()
   205  	// Block until the download has completed.
   206  	select {
   207  	case <-d.completeChan:
   208  		err := d.Err()
   209  		if err != nil {
   210  			completeErr := errors.AddContext(err, "download failed")
   211  			s.mu.Lock()
   212  			readErr := errors.Compose(s.readErr, completeErr)
   213  			s.readErr = readErr
   214  			s.mu.Unlock()
   215  			s.r.log.Println("Error during stream download:", readErr)
   216  			return false
   217  		}
   218  	case <-s.r.tg.StopChan():
   219  		stopErr := errors.New("download interrupted by shutdown")
   220  		s.mu.Lock()
   221  		readErr := errors.Compose(s.readErr, stopErr)
   222  		s.readErr = readErr
   223  		s.mu.Unlock()
   224  		s.r.log.Debugln(stopErr)
   225  		return false
   226  	}
   227  
   228  	// Update the cache.
   229  	s.mu.Lock()
   230  	defer s.mu.Unlock()
   231  
   232  	// Before updating the cache, check if the stream has caught up in the
   233  	// current cache. If the stream has caught up, the cache is not filling fast
   234  	// enough and the target cache size should be increased.
   235  	//
   236  	// streamOffsetInTail checks if the stream offset is in the final quarter of
   237  	// the cache. If it is, we consider the cache to be not filling fast enough,
   238  	// and we extend the size of the cache.
   239  	//
   240  	// A final check for cacheExists is performed, because if there currently is
   241  	// no cache at all, this must be the first fetch, and there is no reason to
   242  	// extend the cache size.
   243  	cacheLen = int64(len(s.cache))
   244  	streamOffsetInCache := s.cacheOffset <= s.offset && s.offset <= s.cacheOffset+cacheLen // NOTE: it's '<=' so that we also count being 1 byte beyond the cache
   245  	streamOffsetInTail := streamOffsetInCache && s.offset >= s.cacheOffset+(cacheLen/4)+(cacheLen/2)
   246  	targetCacheUnderLimit := s.targetCacheSize < maxStreamerCacheSize
   247  	cacheExists := cacheLen > 0
   248  	if cacheExists && partialDownloadsSupported && targetCacheUnderLimit && streamOffsetInTail {
   249  		if s.targetCacheSize*2 > maxStreamerCacheSize {
   250  			s.targetCacheSize = maxStreamerCacheSize
   251  		} else {
   252  			s.targetCacheSize *= 2
   253  		}
   254  	}
   255  
   256  	// Update the cache based on whether the entire cache needs to be replaced
   257  	// or whether only some of the cache is being replaced. The whole cache
   258  	// needs to be replaced in the even that partial downloads are not
   259  	// supported, and also in the event that the stream offset is complete
   260  	// outside the previous cache.
   261  	if !partialDownloadsSupported || streamOffset >= cacheOffset+cacheLen || streamOffset < cacheOffset {
   262  		s.cache = buffer.Bytes()
   263  		s.cacheOffset = fetchOffset
   264  	} else {
   265  		s.cache = s.cache[streamOffset-cacheOffset:]
   266  		s.cache = append(s.cache, buffer.Bytes()...)
   267  		s.cacheOffset = streamOffset
   268  	}
   269  
   270  	// Return true, indicating that this function should be called again,
   271  	// because there may be more cache that has been requested or used since the
   272  	// previous request.
   273  	return true
   274  }
   275  
   276  // threadedFillCache is a background thread that keeps the cache full as data is
   277  // read out of the cache. The Read and Seek functions have access to a channel
   278  // that they can use to signal that the cache should be refilled. To ensure that
   279  // the cache is always being filled, 'managedFillCache' will return a value
   280  // indicating whether it should be called again after completion based on
   281  // whether the cache was emptied further since the previous call.
   282  func (s *streamer) threadedFillCache() {
   283  	// Add this thread to the renter's threadgroup.
   284  	err := s.r.tg.Add()
   285  	if err != nil {
   286  		s.r.log.Debugln("threadedFillCache terminating early because renter has stopped")
   287  	}
   288  	defer s.r.tg.Done()
   289  
   290  	// Kick things off by filling the cache for the first time.
   291  	fetchMore := s.managedFillCache()
   292  	for fetchMore {
   293  		fetchMore = s.managedFillCache()
   294  	}
   295  
   296  	for {
   297  		// Block until receiving notice that the cache needs to be updated,
   298  		// shutting down if a shutdown signal is received.
   299  		select {
   300  		case <-s.activateCache:
   301  		case <-s.r.tg.StopChan():
   302  			return
   303  		}
   304  
   305  		// Update the cache. Sometimes the cache will know that it is already
   306  		// out of date by the time it is returning, in those cases call the
   307  		// function again.
   308  		fetchMore = s.managedFillCache()
   309  		for fetchMore {
   310  			fetchMore = s.managedFillCache()
   311  		}
   312  	}
   313  }
   314  
   315  // Close closes the streamer.
   316  func (s *streamer) Close() error {
   317  	return nil
   318  }
   319  
   320  // Read will check the stream cache for the data that is being requested. If the
   321  // data is fully or partially there, Read will return what data is available
   322  // without error. If the data is not there, Read will issue a call to fill the
   323  // cache and then block until the data is at least partially available.
   324  func (s *streamer) Read(p []byte) (int, error) {
   325  	// Wait in a loop until the requested data is available, or until an error
   326  	// is recovered. The loop needs to release the lock between iterations, but
   327  	// the lock that it grabs needs to be held after the loops termination if
   328  	// the right conditions are met, resulting in an ugly/complex locking
   329  	// strategy.
   330  	for {
   331  		// Grab the lock and check that the cache has data which we want. If the
   332  		// cache does have data that we want, we will keep the lock and exit the
   333  		// loop. If there's an error, we will drop the lock and return the
   334  		// error. If the cache does not have the data we want but there is no
   335  		// error, we will drop the lock and spin up a thread to fill the cache,
   336  		// and then block until the cache has been updated.
   337  		s.mu.Lock()
   338  		// Get the file's size and check for EOF.
   339  		fileSize := int64(s.staticFile.Size())
   340  		if s.offset >= fileSize {
   341  			s.mu.Unlock()
   342  			return 0, io.EOF
   343  		}
   344  
   345  		// If there is a cache error, drop the lock and return. This check
   346  		// should happen before anything else.
   347  		if s.readErr != nil {
   348  			err := s.readErr
   349  			s.mu.Unlock()
   350  			return 0, err
   351  		}
   352  
   353  		// Do a check that the cache size is at least twice as large as the read
   354  		// size, to ensure that data is being fetched sufficiently far in
   355  		// advance.
   356  		twiceReadLen := int64(len(p) * 2)
   357  		if s.targetCacheSize < twiceReadLen {
   358  			if twiceReadLen > maxStreamerCacheSize {
   359  				s.targetCacheSize = maxStreamerCacheSize
   360  			} else {
   361  				s.targetCacheSize = twiceReadLen
   362  			}
   363  		}
   364  
   365  		// Check if the cache contains data that we are interested in. If so,
   366  		// break out of the cache-fetch loop while still holding the lock.
   367  		if s.cacheOffset <= s.offset && s.offset < s.cacheOffset+int64(len(s.cache)) {
   368  			break
   369  		}
   370  
   371  		// There is no error, but the data that we want is also unavailable.
   372  		// Grab the cacheReady channel to detect when the cache has been
   373  		// updated, and then drop the lock and block until there has been a
   374  		// cache update.
   375  		//
   376  		// Notably, it should not be necessary to spin up a new cache thread.
   377  		// There are four conditions which may cause the stream offset to be
   378  		// located outside of the existing cache, and all conditions will result
   379  		// with a thread being spun up regardless. The first condition is
   380  		// initialization, where no cache exists. A fill cache thread is spun up
   381  		// upon initialization. The second condition is after a Seek, which may
   382  		// move the offset outside of the current cache. The call to Seek also
   383  		// spins up a cache filling thread. The third condition is after a read,
   384  		// which adjusts the stream offset. A new cache fill thread gets spun up
   385  		// in this case as well, immediately after the stream offset is
   386  		// adjusted. Finally, there is the case where a cache fill thread was
   387  		// spun up, but then immediately spun down due to another cache fill
   388  		// thread already running. But this case is handled as well, because a
   389  		// cache fill thread will spin up another cache fill thread when it
   390  		// finishes specifically to cover this case.
   391  		cacheReady := s.cacheReady
   392  		s.mu.Unlock()
   393  		<-cacheReady
   394  
   395  		// Upon iterating, the lock is not held, so the call to grab the lock at
   396  		// the top of the function should not cause a deadlock.
   397  	}
   398  	// This code should only be reachable if the lock is still being held and
   399  	// there is also data in the cache for us. Defer releasing the lock.
   400  	defer s.mu.Unlock()
   401  
   402  	dataStart := int(s.offset - s.cacheOffset)
   403  	dataEnd := dataStart + len(p)
   404  	// If the read request extends beyond the cache, truncate it to include
   405  	// only up to where the cache ends.
   406  	if dataEnd > len(s.cache) {
   407  		dataEnd = len(s.cache)
   408  	}
   409  	copy(p, s.cache[dataStart:dataEnd])
   410  	s.offset += int64(dataEnd - dataStart)
   411  
   412  	// Now that data has been consumed, request more data.
   413  	select {
   414  	case s.activateCache <- struct{}{}:
   415  	default:
   416  	}
   417  
   418  	return dataEnd - dataStart, nil
   419  }
   420  
   421  // Seek sets the offset for the next Read to offset, interpreted
   422  // according to whence: SeekStart means relative to the start of the file,
   423  // SeekCurrent means relative to the current offset, and SeekEnd means relative
   424  // to the end. Seek returns the new offset relative to the start of the file
   425  // and an error, if any.
   426  func (s *streamer) Seek(offset int64, whence int) (int64, error) {
   427  	s.mu.Lock()
   428  	defer s.mu.Unlock()
   429  
   430  	var newOffset int64
   431  	switch whence {
   432  	case io.SeekStart:
   433  		newOffset = 0
   434  	case io.SeekCurrent:
   435  		newOffset = s.offset
   436  	case io.SeekEnd:
   437  		newOffset = int64(s.staticFile.Size())
   438  	}
   439  	newOffset += offset
   440  	if newOffset < 0 {
   441  		return s.offset, errors.New("cannot seek to negative offset")
   442  	}
   443  
   444  	// Reset the target cache size upon seek to be the default again. This is in
   445  	// place because some programs will rapidly consume the cache to build up
   446  	// their own buffer. This can result in the cache growing very large, which
   447  	// hurts seek times. By resetting the cache size upon seek, we ensure that
   448  	// the user gets a consistent experience when seeking. In a perfect world,
   449  	// we'd have an easy way to measure the bitrate of the file being streamed,
   450  	// so that we could set a target cache size according to that, but at the
   451  	// moment we don't have an easy way to get that information.
   452  	s.targetCacheSize = initialStreamerCacheSize
   453  
   454  	// Update the offset of the stream and immediately send a thread to update
   455  	// the cache.
   456  	s.offset = newOffset
   457  
   458  	// Now that data has been consumed, request more data.
   459  	select {
   460  	case s.activateCache <- struct{}{}:
   461  	default:
   462  	}
   463  
   464  	return newOffset, nil
   465  }
   466  
   467  // Streamer creates a modules.Streamer that can be used to stream downloads from
   468  // the sia network.
   469  func (r *Renter) Streamer(siaPath modules.SiaPath) (string, modules.Streamer, error) {
   470  	if err := r.tg.Add(); err != nil {
   471  		return "", nil, err
   472  	}
   473  	defer r.tg.Done()
   474  	// Lookup the file associated with the nickname.
   475  	entry, err := r.staticFileSet.Open(siaPath)
   476  	if err != nil {
   477  		return "", nil, err
   478  	}
   479  	defer entry.Close()
   480  
   481  	// Create the streamer
   482  	snap, err := entry.Snapshot()
   483  	if err != nil {
   484  		return "", nil, err
   485  	}
   486  	s := r.managedStreamer(snap)
   487  	return r.staticFileSet.SiaPath(entry).String(), s, nil
   488  }
   489  
   490  // managedStreamer creates a streamer from a siafile snapshot and starts filling
   491  // its cache.
   492  func (r *Renter) managedStreamer(snapshot *siafile.Snapshot) modules.Streamer {
   493  	s := &streamer{
   494  		staticFile: snapshot,
   495  		r:          r,
   496  
   497  		activateCache:   make(chan struct{}),
   498  		cacheReady:      make(chan struct{}),
   499  		targetCacheSize: initialStreamerCacheSize,
   500  	}
   501  	go s.threadedFillCache()
   502  	return s
   503  }