gitlab.com/SkynetLabs/skyd@v1.6.9/skymodules/renter/streambuffer.go (about)

     1  package renter
     2  
     3  // NOTE: This stream buffer is uninfished in a couple of ways. The first way is
     4  // that it's not possible to cancel fetches. The second way is that fetches are
     5  // not prioritized, there should be a higher priority on data that is closer to
     6  // the current stream offset. The third is that the amount of data which gets
     7  // fetched is not dynamically adjusted. The streamer really should be monitoring
     8  // the total amount of time it takes for a call to the data source to return
     9  // some data, and should buffer accordingly. If auto-adjusting the lookahead
    10  // size, care needs to be taken to ensure not to exceed the
    11  // bytesBufferedPerStream size, as exceeding that will cause issues with the
    12  // lru, and cause data fetches to be evicted before they become useful.
    13  
    14  import (
    15  	"context"
    16  	"encoding/hex"
    17  	"io"
    18  	"sync"
    19  	"time"
    20  
    21  	"github.com/opentracing/opentracing-go"
    22  	"gitlab.com/SkynetLabs/skyd/build"
    23  	"gitlab.com/SkynetLabs/skyd/skymodules"
    24  	"go.sia.tech/siad/crypto"
    25  	"go.sia.tech/siad/types"
    26  
    27  	"gitlab.com/NebulousLabs/errors"
    28  	"gitlab.com/NebulousLabs/fastrand"
    29  	"gitlab.com/NebulousLabs/threadgroup"
    30  )
    31  
    32  const (
    33  	// minimumDataSections is set to two because the streamer always tries to
    34  	// buffer at least the current data section and the next data section for
    35  	// the current offset of a stream.
    36  	//
    37  	// Three as a number was considered so that in addition to buffering one
    38  	// piece ahead, a previous piece could also be cached. This was considered
    39  	// to be less valuable than keeping memory requirements low -
    40  	// minimumDataSections is only at play if there is not enough room for
    41  	// multiple cache nodes in the bytesBufferedPerStream.
    42  	minimumDataSections = 2
    43  
    44  	// longDownloadThreshold specifies when a download is considered to be
    45  	// taking long. This value might change in the future, it is based on the
    46  	// p99 values for downloads, which is above 3s on some of our servers in
    47  	// production currently.
    48  	longDownloadThreshold = time.Second * 3
    49  )
    50  
    51  var (
    52  	// errTimeout is returned when the context cancels before the data is
    53  	// available.
    54  	errTimeout = errors.New("could not get data from data section, context timed out")
    55  
    56  	// bytesBufferedPerStream is the total amount of data that gets allocated
    57  	// per stream. If the RequestSize of a stream buffer is less than three
    58  	// times the bytesBufferedPerStream, that much data will be allocated
    59  	// instead.
    60  	//
    61  	// For example, if the RequestSize is 10kb and the bytesBufferedPerStream is
    62  	// 100kb, then each stream is going to buffer 10 segments that are each 10kb
    63  	// long in the LRU.
    64  	//
    65  	// But if the RequestSize is 50kb and the bytesBufferedPerStream is 100kb,
    66  	// then each stream is going to buffer 3 segments that are each 50kb long in
    67  	// the LRU, for a total of 150kb.
    68  	bytesBufferedPerStream = build.Select(build.Var{
    69  		Dev:      uint64(1 << 25), // 32 MiB
    70  		Standard: uint64(1 << 25), // 32 MiB
    71  		Testing:  uint64(1 << 8),  // 256 bytes
    72  	}).(uint64)
    73  
    74  	// keepOldBuffersDuration specifies how long a stream buffer will stay in
    75  	// the buffer set after the final stream is closed. This gives some buffer
    76  	// time for a new request to the same resource, without having the data
    77  	// source fully cleared out. This optimization is particularly useful for
    78  	// certain video players and web applications.
    79  	keepOldBuffersDuration = build.Select(build.Var{
    80  		Dev:      time.Second * 15,
    81  		Standard: time.Second * 60,
    82  		Testing:  time.Second * 2,
    83  	}).(time.Duration)
    84  
    85  	// minimumLookahead defines the minimum amount that the stream will fetch
    86  	// ahead of the current seek position in a stream.
    87  	//
    88  	// Note that there is a throughput vs. latency tradeoff here. The maximum
    89  	// speed of a stream has an upper bound of the lookahead / latency. So if it
    90  	// takes 1 second to fetch data and the lookahead is 2 MB, the maximum speed
    91  	// of a single stream is going to be 2 MB/s. When Sia is healthy, the
    92  	// latency on a fetch should be under 200ms, which means with a 2 MB
    93  	// lookahead a single stream should be able to do more than 10 MB/s.
    94  	//
    95  	// A smaller minimum lookahead means that less data is being buffered
    96  	// simultaneously, so seek times should be lower. A smaller minimum
    97  	// lookahead becomes less important if we get some way to ensure the earlier
    98  	// parts are prioritized, but we don't have control over that at the moment.
    99  	minimumLookahead = build.Select(build.Var{
   100  		Dev:      uint64(1 << 21), // 2 MiB
   101  		Standard: uint64(1 << 23), // 8 MiB
   102  		Testing:  uint64(1 << 6),  // 64 bytes
   103  	}).(uint64)
   104  
   105  	// newDataSectionTimeout is the timeout we enforce when downloading the
   106  	// data for a new data section we just created.
   107  	newDataSectionTimeout = build.Select(build.Var{
   108  		Dev:      30 * time.Second,
   109  		Standard: 2 * time.Minute,
   110  		Testing:  30 * time.Second,
   111  	}).(time.Duration)
   112  )
   113  
   114  // streamBufferDataSource is an interface that the stream buffer uses to fetch
   115  // data. This type is internal to the renter as there are plans to expand on the
   116  // type.
   117  type streamBufferDataSource interface {
   118  	// DataSize should return the size of the data. When the streamBuffer is
   119  	// reading from the data source, it will ensure that none of the read calls
   120  	// go beyond the boundary of the data source.
   121  	DataSize() uint64
   122  
   123  	// ID returns the ID of the data source. This should be unique to the data
   124  	// source - that is, every data source that returns the same ID should have
   125  	// identical data and be fully interchangeable.
   126  	ID() skymodules.DataSourceID
   127  
   128  	// HasRecursiveFanout returns 'true' if the datasource belongs to a
   129  	// skyfile with recursive fanout.
   130  	HasRecursiveFanout() bool
   131  
   132  	// Metadata returns the Skyfile metadata of a data source.
   133  	Metadata() skymodules.SkyfileMetadata
   134  
   135  	// RawMetadata returns the raw metadata of a data source.
   136  	RawMetadata() []byte
   137  
   138  	// ReadBaseSectorPayload reads data from the data source's base sector
   139  	// payload.  This will return an error when called on anything but a
   140  	// small skyfile.
   141  	ReadBaseSectorPayload(off, length uint64) (*downloadResponse, error)
   142  
   143  	// ReadFanout reads a single piece root from the fanout of the
   144  	// datasource and returns the proof for that root as well as the offset
   145  	// within the sector.
   146  	ReadFanout(chunkIndex, pieceIndex uint64) ([]byte, []crypto.Hash, uint32, error)
   147  
   148  	// Layout returns the Skyfile layout of a data source.
   149  	Layout() skymodules.SkyfileLayout
   150  
   151  	// Layout returns the Skyfile layout of a data source.
   152  	RawLayout() (skymodules.SkyfileLayout, []byte, []crypto.Hash)
   153  
   154  	// RequestSize should return the request size that the dataSource expects
   155  	// the streamBuffer to use. The streamBuffer will always make ReadAt calls
   156  	// that are of the suggested request size and byte aligned.
   157  	//
   158  	// If the request size is small, many ReadAt calls will be made in parallel.
   159  	// If the dataSource can handle high parallelism, a smaller request size
   160  	// should be recommended to the streamBuffer, because that will reduce
   161  	// latency. If the dataSource cannot handle high parallelism, a larger
   162  	// request size should be used to optimize for total throughput.
   163  	//
   164  	// A general rule of thumb is that the streamer should be able to
   165  	// comfortably handle 100 mbps (high end 4K video) if the user's local
   166  	// connection has that much throughput.
   167  	RequestSize() uint64
   168  
   169  	// SilentClose is an io.Closer that does not return an error. The data
   170  	// source is expected to handle any logging or reporting that is necessary
   171  	// if the closing fails.
   172  	SilentClose()
   173  
   174  	// Skylink returns the skylink of the datasource.
   175  	Skylink() skymodules.Skylink
   176  
   177  	// ReadSection allows the stream buffer to request a specific data
   178  	// section from the data source. It returns a channel containing a
   179  	// download response.
   180  	ReadSection(context.Context, uint64, types.Currency) (<-chan *downloadResponse, error)
   181  }
   182  
   183  // dataSection represents a section of data from a data source. The data section
   184  // includes a refcount of how many different streams have the data in their LRU.
   185  // If the refCount is ever set to 0, the data section should be deleted. Because
   186  // the dataSection has no mutex, the refCount falls under the consistency domain
   187  // of the object holding it, which should always be a streamBuffer.
   188  type dataSection struct {
   189  	// staticID uniquely identifies this data section.
   190  	staticID string
   191  
   192  	// dataAvailable, externData, externDuration, and externErr work together.
   193  	// The data and error are not allowed to be accessed by external threads
   194  	// until the data available channel has been closed. Once the dataAvailable
   195  	// channel has been closed, externData, externDuration and externErr are to
   196  	// be treated like static fields.
   197  	dataAvailable  chan struct{}
   198  	externDuration time.Duration
   199  	externData     *downloadedData
   200  	externErr      error
   201  
   202  	refCount uint64
   203  }
   204  
   205  // stream is a single stream that uses a stream buffer. The stream implements
   206  // io.ReadSeeker and io.Closer, and must be closed when it is done being used.
   207  // The stream will cache data, both data that has been accessed recently as well
   208  // as data that is in front of the current read head. The stream buffer is a
   209  // common cache that is used between all streams that are using the same data
   210  // source, allowing each stream to depend on the other streams if data has
   211  // already been loaded.
   212  type stream struct {
   213  	lru    *leastRecentlyUsedCache
   214  	offset uint64
   215  
   216  	mu                 sync.Mutex
   217  	staticStreamBuffer *streamBuffer
   218  
   219  	staticCacheRatio  float64
   220  	staticContext     context.Context
   221  	staticRepair      bool
   222  	staticSpan        opentracing.Span
   223  	staticReadTimeout time.Duration
   224  }
   225  
   226  // streamBuffer is a buffer for a single dataSource.
   227  //
   228  // The streamBuffer uses a threadgroup to ensure that it does not call ReadAt
   229  // after calling SilentClose.
   230  type streamBuffer struct {
   231  	dataSections map[uint64]*dataSection
   232  
   233  	// externRefCount is in the same consistency domain as the streamBufferSet,
   234  	// it needs to be incremented and decremented simultaneously with the
   235  	// creation and deletion of the streamBuffer.
   236  	externRefCount uint64
   237  
   238  	mu                    sync.Mutex
   239  	staticCache           PersistedLRU
   240  	staticTG              threadgroup.ThreadGroup
   241  	staticDataSource      streamBufferDataSource
   242  	staticStreamBufferSet *streamBufferSet
   243  	staticStreamID        skymodules.DataSourceID
   244  	staticPricePerMS      types.Currency
   245  	staticSpan            opentracing.Span
   246  }
   247  
   248  // streamBufferSet tracks all of the stream buffers that are currently active.
   249  // When a new stream is created, the stream buffer set is referenced to check
   250  // whether another stream using the same data source already exists.
   251  type streamBufferSet struct {
   252  	streams map[skymodules.DataSourceID]*streamBuffer
   253  
   254  	staticCache          PersistedLRU
   255  	staticStatsCollector *skymodules.DistributionTracker
   256  	staticTG             *threadgroup.ThreadGroup
   257  	mu                   sync.Mutex
   258  }
   259  
   260  // newStreamBufferSet initializes and returns a stream buffer set.
   261  func newStreamBufferSet(statsCollector *skymodules.DistributionTracker, tg *threadgroup.ThreadGroup, cache PersistedLRU) *streamBufferSet {
   262  	return &streamBufferSet{
   263  		streams: make(map[skymodules.DataSourceID]*streamBuffer),
   264  
   265  		staticCache:          cache,
   266  		staticStatsCollector: statsCollector,
   267  		staticTG:             tg,
   268  	}
   269  }
   270  
   271  // callNewStream will create a stream that implements io.Close and
   272  // io.ReadSeeker. A dataSource must be provided for the stream so that the
   273  // stream can fetch data in advance of calls to 'Read' and attempt to provide a
   274  // smooth streaming experience.
   275  //
   276  // The 'sourceID' is a unique identifier for the dataSource which allows
   277  // multiple streams fetching data from the same source to combine their cache.
   278  // This shared cache only comes into play if the streams are simultaneously
   279  // accessing the same data, allowing the buffer to save on memory and access
   280  // latency.
   281  //
   282  // Each stream has a separate LRU for determining what data to buffer. Because
   283  // the LRU is distinct to the stream, the shared cache feature will not result
   284  // in one stream evicting data from another stream's LRU.
   285  func (sbs *streamBufferSet) callNewStream(ctx context.Context, dataSource streamBufferDataSource, initialOffset uint64, timeout time.Duration, pricePerMS types.Currency, cachedSections uint64, repair bool) *stream {
   286  	// Grab the streamBuffer for the provided sourceID. If no streamBuffer for
   287  	// the sourceID exists, create a new one.
   288  	sourceID := dataSource.ID()
   289  	sbs.mu.Lock()
   290  	streamBuf, exists := sbs.streams[sourceID]
   291  	if !exists {
   292  		streamBuf = &streamBuffer{
   293  			dataSections: make(map[uint64]*dataSection),
   294  
   295  			staticCache:           sbs.staticCache,
   296  			staticDataSource:      dataSource,
   297  			staticPricePerMS:      pricePerMS,
   298  			staticStreamBufferSet: sbs,
   299  			staticStreamID:        sourceID,
   300  			staticSpan:            opentracing.SpanFromContext(ctx),
   301  		}
   302  		sbs.streams[sourceID] = streamBuf
   303  	} else {
   304  		// Another data source already exists for this content which will be
   305  		// used instead of the input data source. Close the input source.
   306  		dataSource.SilentClose()
   307  	}
   308  	streamBuf.externRefCount++
   309  	sbs.mu.Unlock()
   310  	return streamBuf.managedPrepareNewStream(ctx, initialOffset, timeout, cachedSections, repair)
   311  }
   312  
   313  // callNewStreamFromID will check the stream buffer set to see if a stream
   314  // buffer exists for the given data source id. If so, a new stream will be
   315  // created using the data source, and the bool will be set to 'true'. Otherwise,
   316  // the stream returned will be nil and the bool will be set to 'false'.
   317  func (sbs *streamBufferSet) callNewStreamFromID(ctx context.Context, id skymodules.DataSourceID, initialOffset uint64, timeout time.Duration, cachedSections uint64, repair bool) (*stream, bool) {
   318  	sbs.mu.Lock()
   319  	streamBuf, exists := sbs.streams[id]
   320  	if !exists {
   321  		sbs.mu.Unlock()
   322  		return nil, false
   323  	}
   324  	streamBuf.externRefCount++
   325  	sbs.mu.Unlock()
   326  	return streamBuf.managedPrepareNewStream(ctx, initialOffset, timeout, cachedSections, repair), true
   327  }
   328  
   329  // managedData will block until the data for a data section is available, and
   330  // then return the data. The data is not safe to modify.
   331  func (ds *dataSection) managedData(ctx context.Context) (_ *downloadedData, err error) {
   332  	start := time.Now()
   333  
   334  	// Trace info.
   335  	var duration time.Duration
   336  	if span := opentracing.SpanFromContext(ctx); span != nil {
   337  		span.SetTag("datasection", ds.staticID)
   338  		defer func() {
   339  			span.SetTag("success", err == nil)
   340  			span.SetTag("duration", duration)
   341  			if err != nil {
   342  				span.LogKV("error", err)
   343  				if errors.Contains(err, errTimeout) {
   344  					span.SetTag("timeout", true)
   345  				}
   346  			}
   347  		}()
   348  	}
   349  
   350  	select {
   351  	case <-ds.dataAvailable:
   352  		duration = time.Since(start)
   353  	case <-ctx.Done():
   354  		return nil, errTimeout
   355  	}
   356  	return ds.externData, ds.externErr
   357  }
   358  
   359  // CacheRatio returns the percentage of data served from  the persisted cache by
   360  // this stream.
   361  func (s *stream) CacheRatio() float64 {
   362  	return s.staticCacheRatio
   363  }
   364  
   365  // Close will release all of the resources held by a stream.
   366  //
   367  // Before removing the stream, this function will sleep for some time. This is
   368  // specifically to address the use case where an application may be using the
   369  // same file or resource continuously, but doing so by repeatedly opening new
   370  // connections to siad rather than keeping a single stable connection. Some
   371  // video players do this. On Skynet, most javascript applications do this, as
   372  // the javascript application does not realize that multiple files within the
   373  // app are all part of the same resource. This sleep here to delay the release
   374  // of a resource substantially improves performance in practice, in many cases
   375  // causing a 4x reduction in response latency.
   376  func (s *stream) Close() error {
   377  	// Finish the span
   378  	s.staticSpan.Finish()
   379  
   380  	s.staticStreamBuffer.staticStreamBufferSet.staticTG.Launch(func() {
   381  		// Convenience variables.
   382  		sb := s.staticStreamBuffer
   383  		sbs := sb.staticStreamBufferSet
   384  		// Keep the memory for a while after closing unless this stream
   385  		// was used for repairs.
   386  		if !s.staticRepair {
   387  			sbs.staticTG.Sleep(keepOldBuffersDuration)
   388  		}
   389  
   390  		// Drop all nodes from the lru.
   391  		s.lru.callEvictAll()
   392  
   393  		// Remove the stream from the streamBuffer.
   394  		sbs.managedRemoveStream(sb)
   395  	})
   396  	return nil
   397  }
   398  
   399  // Metadata returns the skyfile metadata associated with this stream.
   400  func (s *stream) Metadata() skymodules.SkyfileMetadata {
   401  	return s.staticStreamBuffer.staticDataSource.Metadata()
   402  }
   403  
   404  // RawMetadata returns the skyfile metadata associated with this stream.
   405  func (s *stream) RawMetadata() []byte {
   406  	return s.staticStreamBuffer.staticDataSource.RawMetadata()
   407  }
   408  
   409  // Layout returns the skyfile layout associated with this stream.
   410  func (s *stream) Layout() skymodules.SkyfileLayout {
   411  	return s.staticStreamBuffer.staticDataSource.Layout()
   412  }
   413  
   414  // Layout returns the skyfile layout associated with this stream.
   415  func (s *stream) RawLayout() (skymodules.SkyfileLayout, []byte, []crypto.Hash) {
   416  	return s.staticStreamBuffer.staticDataSource.RawLayout()
   417  }
   418  
   419  // Skylink returns the skylink associated with this stream.
   420  func (s *stream) Skylink() skymodules.Skylink {
   421  	return s.staticStreamBuffer.staticDataSource.Skylink()
   422  }
   423  
   424  // Read will read data into 'b', returning the number of bytes read and any
   425  // errors. Read will not fill 'b' up all the way if only part of the data is
   426  // available.
   427  func (s *stream) Read(b []byte) (int, error) {
   428  	s.mu.Lock()
   429  	defer s.mu.Unlock()
   430  
   431  	// Create a context.
   432  	ctx := s.staticContext
   433  	if s.staticReadTimeout > 0 {
   434  		var cancel context.CancelFunc
   435  		ctx, cancel = context.WithTimeout(ctx, s.staticReadTimeout)
   436  		defer cancel()
   437  	}
   438  
   439  	// Create a child span.
   440  	spanRef := opentracing.ChildOf(s.staticSpan.Context())
   441  	span := opentracing.StartSpan("Read", spanRef)
   442  	defer span.Finish()
   443  
   444  	// Attach the span to the ctx.
   445  	ctx = opentracing.ContextWithSpan(ctx, span)
   446  
   447  	// Convenience variables.
   448  	dataSize := s.staticStreamBuffer.staticDataSource.DataSize()
   449  	dataSectionSize := s.staticStreamBuffer.staticDataSource.RequestSize()
   450  	sb := s.staticStreamBuffer
   451  
   452  	// Check for EOF.
   453  	if s.offset == dataSize {
   454  		return 0, io.EOF
   455  	}
   456  
   457  	// Get the index of the current section and the offset within the current
   458  	// section.
   459  	currentSection := s.offset / dataSectionSize
   460  	offsetInSection := s.offset % dataSectionSize
   461  
   462  	// Determine how many bytes are remaining within the current section, this
   463  	// forms an upper bound on how many bytes can be read.
   464  	var bytesRemaining uint64
   465  	lastSection := (currentSection+1)*dataSectionSize >= dataSize
   466  	if !lastSection {
   467  		bytesRemaining = dataSectionSize - offsetInSection
   468  	} else {
   469  		bytesRemaining = dataSize - s.offset
   470  	}
   471  
   472  	// Determine how many bytes should be read.
   473  	var bytesToRead uint64
   474  	if bytesRemaining > uint64(len(b)) {
   475  		bytesToRead = uint64(len(b))
   476  	} else {
   477  		bytesToRead = bytesRemaining
   478  	}
   479  
   480  	// Fetch the dataSection that has the data we want to read.
   481  	sb.mu.Lock()
   482  	dataSection, exists := sb.dataSections[currentSection]
   483  	sb.mu.Unlock()
   484  	if !exists {
   485  		err := errors.New("data section should always in the stream buffer for the current offset of a stream")
   486  		build.Critical(err)
   487  		return 0, err
   488  	}
   489  
   490  	// Block until the data is available.
   491  	dd, err := dataSection.managedData(ctx)
   492  	if err != nil {
   493  		return 0, errors.AddContext(err, "read call failed because data section fetch failed")
   494  	}
   495  
   496  	// Recover the data into b.
   497  	b = b[:bytesToRead]
   498  	err = dd.RecoverTo(b, int(offsetInSection))
   499  	if err != nil {
   500  		return 0, errors.AddContext(err, "failed to recover data")
   501  	}
   502  	s.offset += bytesToRead
   503  
   504  	// Put the section in the cache. This needs to be called every time we
   505  	// access a section for the hit counter to increment. Put only caches
   506  	// once a certain threshold is reached.
   507  	if err := sb.staticCache.Put(sb.staticDataSource.ID(), currentSection, dd); err != nil {
   508  		build.Critical("failed to store response data in cache", err)
   509  	}
   510  
   511  	// Send the call to prepare the next data section.
   512  	s.prepareOffset()
   513  	return int(bytesToRead), nil
   514  }
   515  
   516  // Seek will move the read head of the stream to the provided offset.
   517  func (s *stream) Seek(offset int64, whence int) (int64, error) {
   518  	// Input checking.
   519  	if offset < 0 {
   520  		return int64(s.offset), errors.New("offset cannot be negative in call to seek")
   521  	}
   522  	s.mu.Lock()
   523  	defer s.mu.Unlock()
   524  
   525  	// Update the offset of the stream according to the inputs.
   526  	dataSize := s.staticStreamBuffer.staticDataSource.DataSize()
   527  	switch whence {
   528  	case io.SeekStart:
   529  		s.offset = uint64(offset)
   530  	case io.SeekCurrent:
   531  		newOffset := s.offset + uint64(offset)
   532  		if newOffset > dataSize {
   533  			return int64(s.offset), errors.New("offset cannot seek beyond the bounds of the file")
   534  		}
   535  		s.offset = newOffset
   536  	case io.SeekEnd:
   537  		if uint64(offset) > dataSize {
   538  			return int64(s.offset), errors.New("cannot seek before the front of the file")
   539  		}
   540  		s.offset = dataSize - uint64(offset)
   541  	default:
   542  		return int64(s.offset), errors.New("invalid value for 'whence' in call to seek")
   543  	}
   544  
   545  	// Prepare the fetch of the updated offset.
   546  	s.prepareOffset()
   547  	return int64(s.offset), nil
   548  }
   549  
   550  // prepareOffset will ensure that the dataSection containing the offset is made
   551  // available in the LRU, and that the following dataSection is also available.
   552  func (s *stream) prepareOffset() {
   553  	// Convenience variables.
   554  	dataSize := s.staticStreamBuffer.staticDataSource.DataSize()
   555  	dataSectionSize := s.staticStreamBuffer.staticDataSource.RequestSize()
   556  
   557  	// If the offset is already at the end of the data, there is nothing to do.
   558  	if s.offset == dataSize {
   559  		return
   560  	}
   561  
   562  	// Update the current data section. The update call will trigger the
   563  	// streamBuffer to fetch the dataSection if the dataSection is not already
   564  	// in the streamBuffer cache.
   565  	index := s.offset / dataSectionSize
   566  	s.lru.callUpdate(index)
   567  
   568  	// If there is a following data section, update that as well. This update is
   569  	// done regardless of the minimumLookahead, we always want to buffer at
   570  	// least one more piece than the current piece.
   571  	nextIndex := index + 1
   572  	if nextIndex*dataSectionSize < dataSize {
   573  		s.lru.callUpdate(nextIndex)
   574  	}
   575  
   576  	// For repair streams we don't prepare anymore sections since we usually
   577  	// only download one chunk at-a-time anyway and repairs don't require
   578  	// high latency guarantees.
   579  	if s.staticRepair {
   580  		return
   581  	}
   582  
   583  	// Keep adding more pieces to the buffer until we have buffered at least
   584  	// minimumLookahead total data or have reached the end of the stream.
   585  	nextIndex++
   586  	for i := dataSectionSize * 2; i < minimumLookahead && nextIndex*dataSectionSize < dataSize; i += dataSectionSize {
   587  		s.lru.callUpdate(nextIndex)
   588  		nextIndex++
   589  	}
   590  }
   591  
   592  // callFetchDataSection will increment the refcount of a dataSection in the
   593  // stream buffer. If the dataSection is not currently available in the stream
   594  // buffer, the data section will be fetched from the dataSource.
   595  func (sb *streamBuffer) callFetchDataSection(index uint64) {
   596  	sb.mu.Lock()
   597  	defer sb.mu.Unlock()
   598  
   599  	// Fetch the relevant dataSection, creating a new one if necessary.
   600  	dataSection, exists := sb.dataSections[index]
   601  
   602  	// If the data section exists, check if the data is valid.
   603  	if exists {
   604  		var replace bool
   605  		select {
   606  		case <-dataSection.dataAvailable:
   607  			// If the cached section is invalid, replace it.
   608  			replace = dataSection.externErr != nil
   609  		default:
   610  		}
   611  		// If the section exists and shouldn't be replaced, just increment the
   612  		// refcount.
   613  		if !replace {
   614  			// Increment the refcount of the dataSection.
   615  			dataSection.refCount++
   616  			return
   617  		}
   618  	}
   619  
   620  	// Otherwise we create a new datasection and either set the refcount to
   621  	// 1 or the previous count + 1.
   622  	refCount := uint64(1)
   623  	if exists {
   624  		refCount = dataSection.refCount + 1
   625  	}
   626  
   627  	dataSection = sb.newDataSection(index)
   628  	dataSection.refCount = refCount
   629  }
   630  
   631  // callRemoveDataSection will decrement the refcount of a data section in the
   632  // stream buffer. If the refcount reaches zero, the data section will be deleted
   633  // from the stream buffer.
   634  func (sb *streamBuffer) callRemoveDataSection(index uint64) {
   635  	sb.mu.Lock()
   636  	defer sb.mu.Unlock()
   637  
   638  	// Fetch the data section.
   639  	ds, exists := sb.dataSections[index]
   640  	if !exists {
   641  		build.Critical("remove called on data section that does not exist")
   642  		return
   643  	}
   644  	// Decrement the refcount.
   645  	ds.refCount--
   646  	// Delete the data section if the refcount has fallen to zero.
   647  	if ds.refCount == 0 {
   648  		// Also set the section in the map to nil for the garabage
   649  		// collector.
   650  		sb.dataSections[index] = nil
   651  		delete(sb.dataSections, index)
   652  	}
   653  	// If there are no more sections we recreate the map to allow for the
   654  	// old one to be garbage collected.
   655  	if len(sb.dataSections) == 0 {
   656  		sb.dataSections = make(map[uint64]*dataSection)
   657  	}
   658  }
   659  
   660  // managedPrepareNewStream creates a new stream from an existing stream buffer.
   661  // The ref count for the buffer needs to be incremented under the
   662  // streamBufferSet lock, before this method is called.
   663  func (sb *streamBuffer) managedPrepareNewStream(ctx context.Context, initialOffset uint64, timeout time.Duration, cachedSections uint64, repair bool) *stream {
   664  	// Determine how many data sections the stream should cache.
   665  	dataSize := sb.staticDataSource.DataSize()
   666  	sectionSize := sb.staticDataSource.RequestSize()
   667  	dataSectionsToCache := bytesBufferedPerStream / sectionSize
   668  	if dataSectionsToCache < minimumDataSections {
   669  		dataSectionsToCache = minimumDataSections
   670  	}
   671  
   672  	// Check how many sections are already in the persisted cache relative
   673  	// to all potential sections we could cache.
   674  	totalSections := dataSize / sectionSize
   675  	if dataSize%sectionSize != 0 {
   676  		totalSections++
   677  	}
   678  	// We add +1 to the total sections since there is one section reserved
   679  	// for the base sector as well.
   680  	totalSections++
   681  
   682  	var cacheRatio float64
   683  	if totalSections > 0 {
   684  		cacheRatio = float64(100*cachedSections/totalSections) / 100
   685  	}
   686  
   687  	// Create a stream that points to the stream buffer.
   688  	stream := &stream{
   689  		lru:    newLeastRecentlyUsedCache(dataSectionsToCache, sb),
   690  		offset: initialOffset,
   691  
   692  		staticCacheRatio:   cacheRatio,
   693  		staticContext:      sb.staticTG.StopCtx(),
   694  		staticReadTimeout:  timeout,
   695  		staticRepair:       repair,
   696  		staticStreamBuffer: sb,
   697  		staticSpan:         opentracing.SpanFromContext(ctx),
   698  	}
   699  	stream.prepareOffset()
   700  	return stream
   701  }
   702  
   703  // newDataSection will create a new data section for the streamBuffer and spin
   704  // up a goroutine to pull the data from the data source.
   705  func (sb *streamBuffer) newDataSection(index uint64) *dataSection {
   706  	// Create a random identifier
   707  	var id [8]byte
   708  	fastrand.Read(id[:])
   709  
   710  	// Create the data section, allocating the right number of bytes for the
   711  	// ReadAt call to fill out.
   712  	ds := &dataSection{
   713  		staticID: hex.EncodeToString(id[:]),
   714  
   715  		dataAvailable: make(chan struct{}),
   716  	}
   717  	sb.dataSections[index] = ds
   718  
   719  	// See if we can fill the data section from the cache.
   720  	lru := sb.staticCache
   721  	data, cached, err := lru.Get(sb.staticDataSource.ID(), index)
   722  	if err != nil {
   723  		build.Critical("failed to read from cache", err)
   724  	}
   725  	if err == nil && cached {
   726  		ds.externData = data
   727  		close(ds.dataAvailable)
   728  		return ds
   729  	}
   730  
   731  	// If not, perform the data fetch in a goroutine. The dataAvailable
   732  	// channel will be closed when the data is available.
   733  	go func() {
   734  		defer close(ds.dataAvailable)
   735  
   736  		// Create a child span for the data section
   737  		spanRef := opentracing.ChildOf(sb.staticSpan.Context())
   738  		span := opentracing.StartSpan("newDataSection", spanRef)
   739  		span.LogKV("index", index)
   740  		defer func() {
   741  			if ds.externErr != nil {
   742  				span.LogKV("error", ds.externErr)
   743  			}
   744  			span.SetTag("success", ds.externErr == nil)
   745  			span.SetTag("long", ds.externDuration >= longDownloadThreshold)
   746  			span.Finish()
   747  		}()
   748  
   749  		// Ensure that the streambuffer has not closed.
   750  		err := sb.staticTG.Add()
   751  		if err != nil {
   752  			ds.externErr = errors.AddContext(err, "stream buffer has been shut down")
   753  			return
   754  		}
   755  		defer sb.staticTG.Done()
   756  
   757  		// Limit the time we wait for the section to be downloaded.
   758  		ctx, cancel := context.WithTimeout(sb.staticTG.StopCtx(), newDataSectionTimeout)
   759  		defer cancel()
   760  
   761  		// Create a context from our span
   762  		ctx = opentracing.ContextWithSpan(ctx, span)
   763  
   764  		// Grab the data from the data source.
   765  		start := time.Now()
   766  		responseChan, err := sb.staticDataSource.ReadSection(ctx, index, sb.staticPricePerMS)
   767  		if err != nil {
   768  			ds.externErr = errors.AddContext(err, "failed to read data section")
   769  			return
   770  		}
   771  
   772  		select {
   773  		case response := <-responseChan:
   774  			dd, err := response.Data()
   775  			ds.externErr = errors.AddContext(err, "data section ReadStream failed")
   776  			ds.externDuration = time.Since(start)
   777  			ds.externData = dd
   778  
   779  			if ds.externErr == nil {
   780  				// Add datapoint to stats.
   781  				sb.staticStreamBufferSet.staticStatsCollector.AddDataPoint(ds.externDuration)
   782  			}
   783  		case <-sb.staticTG.StopChan():
   784  			ds.externErr = errors.AddContext(errTimeout, "failed to read response from ReadStream")
   785  		}
   786  	}()
   787  	return ds
   788  }
   789  
   790  // managedRemoveStream will remove a stream from a stream buffer. If the total
   791  // number of streams using that stream buffer reaches zero, the stream buffer
   792  // will be removed from the stream buffer set.
   793  //
   794  // The reference counter for a stream buffer needs to be in the domain of the
   795  // stream buffer set because the stream buffer needs to be deleted from the
   796  // stream buffer set simultaneously with the reference counter reaching zero.
   797  func (sbs *streamBufferSet) managedRemoveStream(sb *streamBuffer) {
   798  	// Decrement the refcount of the streamBuffer.
   799  	sbs.mu.Lock()
   800  	sb.externRefCount--
   801  	if sb.externRefCount > 0 {
   802  		// streamBuffer still in use, nothing to do.
   803  		sbs.mu.Unlock()
   804  		return
   805  	}
   806  	// Before deletion, nil the entry for the GC.
   807  	sbs.streams[sb.staticStreamID] = nil
   808  	delete(sbs.streams, sb.staticStreamID)
   809  
   810  	// Reallocate the map if it is empty to free more memory.
   811  	if len(sbs.streams) == 0 {
   812  		sbs.streams = make(map[skymodules.DataSourceID]*streamBuffer)
   813  	}
   814  	sbs.mu.Unlock()
   815  
   816  	// Close out the streamBuffer and its data source. Calling Stop() will block
   817  	// any new calls to ReadAt from executing, and will block until all existing
   818  	// calls are completed. This prevents any issues that could be caused by the
   819  	// data source being accessed after it has been closed.
   820  	sb.staticTG.Stop()
   821  	sb.staticDataSource.SilentClose()
   822  }