gitlab.com/SkynetLabs/skyd@v1.6.9/skymodules/renter/streambuffer.go (about) 1 package renter 2 3 // NOTE: This stream buffer is uninfished in a couple of ways. The first way is 4 // that it's not possible to cancel fetches. The second way is that fetches are 5 // not prioritized, there should be a higher priority on data that is closer to 6 // the current stream offset. The third is that the amount of data which gets 7 // fetched is not dynamically adjusted. The streamer really should be monitoring 8 // the total amount of time it takes for a call to the data source to return 9 // some data, and should buffer accordingly. If auto-adjusting the lookahead 10 // size, care needs to be taken to ensure not to exceed the 11 // bytesBufferedPerStream size, as exceeding that will cause issues with the 12 // lru, and cause data fetches to be evicted before they become useful. 13 14 import ( 15 "context" 16 "encoding/hex" 17 "io" 18 "sync" 19 "time" 20 21 "github.com/opentracing/opentracing-go" 22 "gitlab.com/SkynetLabs/skyd/build" 23 "gitlab.com/SkynetLabs/skyd/skymodules" 24 "go.sia.tech/siad/crypto" 25 "go.sia.tech/siad/types" 26 27 "gitlab.com/NebulousLabs/errors" 28 "gitlab.com/NebulousLabs/fastrand" 29 "gitlab.com/NebulousLabs/threadgroup" 30 ) 31 32 const ( 33 // minimumDataSections is set to two because the streamer always tries to 34 // buffer at least the current data section and the next data section for 35 // the current offset of a stream. 36 // 37 // Three as a number was considered so that in addition to buffering one 38 // piece ahead, a previous piece could also be cached. This was considered 39 // to be less valuable than keeping memory requirements low - 40 // minimumDataSections is only at play if there is not enough room for 41 // multiple cache nodes in the bytesBufferedPerStream. 42 minimumDataSections = 2 43 44 // longDownloadThreshold specifies when a download is considered to be 45 // taking long. This value might change in the future, it is based on the 46 // p99 values for downloads, which is above 3s on some of our servers in 47 // production currently. 48 longDownloadThreshold = time.Second * 3 49 ) 50 51 var ( 52 // errTimeout is returned when the context cancels before the data is 53 // available. 54 errTimeout = errors.New("could not get data from data section, context timed out") 55 56 // bytesBufferedPerStream is the total amount of data that gets allocated 57 // per stream. If the RequestSize of a stream buffer is less than three 58 // times the bytesBufferedPerStream, that much data will be allocated 59 // instead. 60 // 61 // For example, if the RequestSize is 10kb and the bytesBufferedPerStream is 62 // 100kb, then each stream is going to buffer 10 segments that are each 10kb 63 // long in the LRU. 64 // 65 // But if the RequestSize is 50kb and the bytesBufferedPerStream is 100kb, 66 // then each stream is going to buffer 3 segments that are each 50kb long in 67 // the LRU, for a total of 150kb. 68 bytesBufferedPerStream = build.Select(build.Var{ 69 Dev: uint64(1 << 25), // 32 MiB 70 Standard: uint64(1 << 25), // 32 MiB 71 Testing: uint64(1 << 8), // 256 bytes 72 }).(uint64) 73 74 // keepOldBuffersDuration specifies how long a stream buffer will stay in 75 // the buffer set after the final stream is closed. This gives some buffer 76 // time for a new request to the same resource, without having the data 77 // source fully cleared out. This optimization is particularly useful for 78 // certain video players and web applications. 79 keepOldBuffersDuration = build.Select(build.Var{ 80 Dev: time.Second * 15, 81 Standard: time.Second * 60, 82 Testing: time.Second * 2, 83 }).(time.Duration) 84 85 // minimumLookahead defines the minimum amount that the stream will fetch 86 // ahead of the current seek position in a stream. 87 // 88 // Note that there is a throughput vs. latency tradeoff here. The maximum 89 // speed of a stream has an upper bound of the lookahead / latency. So if it 90 // takes 1 second to fetch data and the lookahead is 2 MB, the maximum speed 91 // of a single stream is going to be 2 MB/s. When Sia is healthy, the 92 // latency on a fetch should be under 200ms, which means with a 2 MB 93 // lookahead a single stream should be able to do more than 10 MB/s. 94 // 95 // A smaller minimum lookahead means that less data is being buffered 96 // simultaneously, so seek times should be lower. A smaller minimum 97 // lookahead becomes less important if we get some way to ensure the earlier 98 // parts are prioritized, but we don't have control over that at the moment. 99 minimumLookahead = build.Select(build.Var{ 100 Dev: uint64(1 << 21), // 2 MiB 101 Standard: uint64(1 << 23), // 8 MiB 102 Testing: uint64(1 << 6), // 64 bytes 103 }).(uint64) 104 105 // newDataSectionTimeout is the timeout we enforce when downloading the 106 // data for a new data section we just created. 107 newDataSectionTimeout = build.Select(build.Var{ 108 Dev: 30 * time.Second, 109 Standard: 2 * time.Minute, 110 Testing: 30 * time.Second, 111 }).(time.Duration) 112 ) 113 114 // streamBufferDataSource is an interface that the stream buffer uses to fetch 115 // data. This type is internal to the renter as there are plans to expand on the 116 // type. 117 type streamBufferDataSource interface { 118 // DataSize should return the size of the data. When the streamBuffer is 119 // reading from the data source, it will ensure that none of the read calls 120 // go beyond the boundary of the data source. 121 DataSize() uint64 122 123 // ID returns the ID of the data source. This should be unique to the data 124 // source - that is, every data source that returns the same ID should have 125 // identical data and be fully interchangeable. 126 ID() skymodules.DataSourceID 127 128 // HasRecursiveFanout returns 'true' if the datasource belongs to a 129 // skyfile with recursive fanout. 130 HasRecursiveFanout() bool 131 132 // Metadata returns the Skyfile metadata of a data source. 133 Metadata() skymodules.SkyfileMetadata 134 135 // RawMetadata returns the raw metadata of a data source. 136 RawMetadata() []byte 137 138 // ReadBaseSectorPayload reads data from the data source's base sector 139 // payload. This will return an error when called on anything but a 140 // small skyfile. 141 ReadBaseSectorPayload(off, length uint64) (*downloadResponse, error) 142 143 // ReadFanout reads a single piece root from the fanout of the 144 // datasource and returns the proof for that root as well as the offset 145 // within the sector. 146 ReadFanout(chunkIndex, pieceIndex uint64) ([]byte, []crypto.Hash, uint32, error) 147 148 // Layout returns the Skyfile layout of a data source. 149 Layout() skymodules.SkyfileLayout 150 151 // Layout returns the Skyfile layout of a data source. 152 RawLayout() (skymodules.SkyfileLayout, []byte, []crypto.Hash) 153 154 // RequestSize should return the request size that the dataSource expects 155 // the streamBuffer to use. The streamBuffer will always make ReadAt calls 156 // that are of the suggested request size and byte aligned. 157 // 158 // If the request size is small, many ReadAt calls will be made in parallel. 159 // If the dataSource can handle high parallelism, a smaller request size 160 // should be recommended to the streamBuffer, because that will reduce 161 // latency. If the dataSource cannot handle high parallelism, a larger 162 // request size should be used to optimize for total throughput. 163 // 164 // A general rule of thumb is that the streamer should be able to 165 // comfortably handle 100 mbps (high end 4K video) if the user's local 166 // connection has that much throughput. 167 RequestSize() uint64 168 169 // SilentClose is an io.Closer that does not return an error. The data 170 // source is expected to handle any logging or reporting that is necessary 171 // if the closing fails. 172 SilentClose() 173 174 // Skylink returns the skylink of the datasource. 175 Skylink() skymodules.Skylink 176 177 // ReadSection allows the stream buffer to request a specific data 178 // section from the data source. It returns a channel containing a 179 // download response. 180 ReadSection(context.Context, uint64, types.Currency) (<-chan *downloadResponse, error) 181 } 182 183 // dataSection represents a section of data from a data source. The data section 184 // includes a refcount of how many different streams have the data in their LRU. 185 // If the refCount is ever set to 0, the data section should be deleted. Because 186 // the dataSection has no mutex, the refCount falls under the consistency domain 187 // of the object holding it, which should always be a streamBuffer. 188 type dataSection struct { 189 // staticID uniquely identifies this data section. 190 staticID string 191 192 // dataAvailable, externData, externDuration, and externErr work together. 193 // The data and error are not allowed to be accessed by external threads 194 // until the data available channel has been closed. Once the dataAvailable 195 // channel has been closed, externData, externDuration and externErr are to 196 // be treated like static fields. 197 dataAvailable chan struct{} 198 externDuration time.Duration 199 externData *downloadedData 200 externErr error 201 202 refCount uint64 203 } 204 205 // stream is a single stream that uses a stream buffer. The stream implements 206 // io.ReadSeeker and io.Closer, and must be closed when it is done being used. 207 // The stream will cache data, both data that has been accessed recently as well 208 // as data that is in front of the current read head. The stream buffer is a 209 // common cache that is used between all streams that are using the same data 210 // source, allowing each stream to depend on the other streams if data has 211 // already been loaded. 212 type stream struct { 213 lru *leastRecentlyUsedCache 214 offset uint64 215 216 mu sync.Mutex 217 staticStreamBuffer *streamBuffer 218 219 staticCacheRatio float64 220 staticContext context.Context 221 staticRepair bool 222 staticSpan opentracing.Span 223 staticReadTimeout time.Duration 224 } 225 226 // streamBuffer is a buffer for a single dataSource. 227 // 228 // The streamBuffer uses a threadgroup to ensure that it does not call ReadAt 229 // after calling SilentClose. 230 type streamBuffer struct { 231 dataSections map[uint64]*dataSection 232 233 // externRefCount is in the same consistency domain as the streamBufferSet, 234 // it needs to be incremented and decremented simultaneously with the 235 // creation and deletion of the streamBuffer. 236 externRefCount uint64 237 238 mu sync.Mutex 239 staticCache PersistedLRU 240 staticTG threadgroup.ThreadGroup 241 staticDataSource streamBufferDataSource 242 staticStreamBufferSet *streamBufferSet 243 staticStreamID skymodules.DataSourceID 244 staticPricePerMS types.Currency 245 staticSpan opentracing.Span 246 } 247 248 // streamBufferSet tracks all of the stream buffers that are currently active. 249 // When a new stream is created, the stream buffer set is referenced to check 250 // whether another stream using the same data source already exists. 251 type streamBufferSet struct { 252 streams map[skymodules.DataSourceID]*streamBuffer 253 254 staticCache PersistedLRU 255 staticStatsCollector *skymodules.DistributionTracker 256 staticTG *threadgroup.ThreadGroup 257 mu sync.Mutex 258 } 259 260 // newStreamBufferSet initializes and returns a stream buffer set. 261 func newStreamBufferSet(statsCollector *skymodules.DistributionTracker, tg *threadgroup.ThreadGroup, cache PersistedLRU) *streamBufferSet { 262 return &streamBufferSet{ 263 streams: make(map[skymodules.DataSourceID]*streamBuffer), 264 265 staticCache: cache, 266 staticStatsCollector: statsCollector, 267 staticTG: tg, 268 } 269 } 270 271 // callNewStream will create a stream that implements io.Close and 272 // io.ReadSeeker. A dataSource must be provided for the stream so that the 273 // stream can fetch data in advance of calls to 'Read' and attempt to provide a 274 // smooth streaming experience. 275 // 276 // The 'sourceID' is a unique identifier for the dataSource which allows 277 // multiple streams fetching data from the same source to combine their cache. 278 // This shared cache only comes into play if the streams are simultaneously 279 // accessing the same data, allowing the buffer to save on memory and access 280 // latency. 281 // 282 // Each stream has a separate LRU for determining what data to buffer. Because 283 // the LRU is distinct to the stream, the shared cache feature will not result 284 // in one stream evicting data from another stream's LRU. 285 func (sbs *streamBufferSet) callNewStream(ctx context.Context, dataSource streamBufferDataSource, initialOffset uint64, timeout time.Duration, pricePerMS types.Currency, cachedSections uint64, repair bool) *stream { 286 // Grab the streamBuffer for the provided sourceID. If no streamBuffer for 287 // the sourceID exists, create a new one. 288 sourceID := dataSource.ID() 289 sbs.mu.Lock() 290 streamBuf, exists := sbs.streams[sourceID] 291 if !exists { 292 streamBuf = &streamBuffer{ 293 dataSections: make(map[uint64]*dataSection), 294 295 staticCache: sbs.staticCache, 296 staticDataSource: dataSource, 297 staticPricePerMS: pricePerMS, 298 staticStreamBufferSet: sbs, 299 staticStreamID: sourceID, 300 staticSpan: opentracing.SpanFromContext(ctx), 301 } 302 sbs.streams[sourceID] = streamBuf 303 } else { 304 // Another data source already exists for this content which will be 305 // used instead of the input data source. Close the input source. 306 dataSource.SilentClose() 307 } 308 streamBuf.externRefCount++ 309 sbs.mu.Unlock() 310 return streamBuf.managedPrepareNewStream(ctx, initialOffset, timeout, cachedSections, repair) 311 } 312 313 // callNewStreamFromID will check the stream buffer set to see if a stream 314 // buffer exists for the given data source id. If so, a new stream will be 315 // created using the data source, and the bool will be set to 'true'. Otherwise, 316 // the stream returned will be nil and the bool will be set to 'false'. 317 func (sbs *streamBufferSet) callNewStreamFromID(ctx context.Context, id skymodules.DataSourceID, initialOffset uint64, timeout time.Duration, cachedSections uint64, repair bool) (*stream, bool) { 318 sbs.mu.Lock() 319 streamBuf, exists := sbs.streams[id] 320 if !exists { 321 sbs.mu.Unlock() 322 return nil, false 323 } 324 streamBuf.externRefCount++ 325 sbs.mu.Unlock() 326 return streamBuf.managedPrepareNewStream(ctx, initialOffset, timeout, cachedSections, repair), true 327 } 328 329 // managedData will block until the data for a data section is available, and 330 // then return the data. The data is not safe to modify. 331 func (ds *dataSection) managedData(ctx context.Context) (_ *downloadedData, err error) { 332 start := time.Now() 333 334 // Trace info. 335 var duration time.Duration 336 if span := opentracing.SpanFromContext(ctx); span != nil { 337 span.SetTag("datasection", ds.staticID) 338 defer func() { 339 span.SetTag("success", err == nil) 340 span.SetTag("duration", duration) 341 if err != nil { 342 span.LogKV("error", err) 343 if errors.Contains(err, errTimeout) { 344 span.SetTag("timeout", true) 345 } 346 } 347 }() 348 } 349 350 select { 351 case <-ds.dataAvailable: 352 duration = time.Since(start) 353 case <-ctx.Done(): 354 return nil, errTimeout 355 } 356 return ds.externData, ds.externErr 357 } 358 359 // CacheRatio returns the percentage of data served from the persisted cache by 360 // this stream. 361 func (s *stream) CacheRatio() float64 { 362 return s.staticCacheRatio 363 } 364 365 // Close will release all of the resources held by a stream. 366 // 367 // Before removing the stream, this function will sleep for some time. This is 368 // specifically to address the use case where an application may be using the 369 // same file or resource continuously, but doing so by repeatedly opening new 370 // connections to siad rather than keeping a single stable connection. Some 371 // video players do this. On Skynet, most javascript applications do this, as 372 // the javascript application does not realize that multiple files within the 373 // app are all part of the same resource. This sleep here to delay the release 374 // of a resource substantially improves performance in practice, in many cases 375 // causing a 4x reduction in response latency. 376 func (s *stream) Close() error { 377 // Finish the span 378 s.staticSpan.Finish() 379 380 s.staticStreamBuffer.staticStreamBufferSet.staticTG.Launch(func() { 381 // Convenience variables. 382 sb := s.staticStreamBuffer 383 sbs := sb.staticStreamBufferSet 384 // Keep the memory for a while after closing unless this stream 385 // was used for repairs. 386 if !s.staticRepair { 387 sbs.staticTG.Sleep(keepOldBuffersDuration) 388 } 389 390 // Drop all nodes from the lru. 391 s.lru.callEvictAll() 392 393 // Remove the stream from the streamBuffer. 394 sbs.managedRemoveStream(sb) 395 }) 396 return nil 397 } 398 399 // Metadata returns the skyfile metadata associated with this stream. 400 func (s *stream) Metadata() skymodules.SkyfileMetadata { 401 return s.staticStreamBuffer.staticDataSource.Metadata() 402 } 403 404 // RawMetadata returns the skyfile metadata associated with this stream. 405 func (s *stream) RawMetadata() []byte { 406 return s.staticStreamBuffer.staticDataSource.RawMetadata() 407 } 408 409 // Layout returns the skyfile layout associated with this stream. 410 func (s *stream) Layout() skymodules.SkyfileLayout { 411 return s.staticStreamBuffer.staticDataSource.Layout() 412 } 413 414 // Layout returns the skyfile layout associated with this stream. 415 func (s *stream) RawLayout() (skymodules.SkyfileLayout, []byte, []crypto.Hash) { 416 return s.staticStreamBuffer.staticDataSource.RawLayout() 417 } 418 419 // Skylink returns the skylink associated with this stream. 420 func (s *stream) Skylink() skymodules.Skylink { 421 return s.staticStreamBuffer.staticDataSource.Skylink() 422 } 423 424 // Read will read data into 'b', returning the number of bytes read and any 425 // errors. Read will not fill 'b' up all the way if only part of the data is 426 // available. 427 func (s *stream) Read(b []byte) (int, error) { 428 s.mu.Lock() 429 defer s.mu.Unlock() 430 431 // Create a context. 432 ctx := s.staticContext 433 if s.staticReadTimeout > 0 { 434 var cancel context.CancelFunc 435 ctx, cancel = context.WithTimeout(ctx, s.staticReadTimeout) 436 defer cancel() 437 } 438 439 // Create a child span. 440 spanRef := opentracing.ChildOf(s.staticSpan.Context()) 441 span := opentracing.StartSpan("Read", spanRef) 442 defer span.Finish() 443 444 // Attach the span to the ctx. 445 ctx = opentracing.ContextWithSpan(ctx, span) 446 447 // Convenience variables. 448 dataSize := s.staticStreamBuffer.staticDataSource.DataSize() 449 dataSectionSize := s.staticStreamBuffer.staticDataSource.RequestSize() 450 sb := s.staticStreamBuffer 451 452 // Check for EOF. 453 if s.offset == dataSize { 454 return 0, io.EOF 455 } 456 457 // Get the index of the current section and the offset within the current 458 // section. 459 currentSection := s.offset / dataSectionSize 460 offsetInSection := s.offset % dataSectionSize 461 462 // Determine how many bytes are remaining within the current section, this 463 // forms an upper bound on how many bytes can be read. 464 var bytesRemaining uint64 465 lastSection := (currentSection+1)*dataSectionSize >= dataSize 466 if !lastSection { 467 bytesRemaining = dataSectionSize - offsetInSection 468 } else { 469 bytesRemaining = dataSize - s.offset 470 } 471 472 // Determine how many bytes should be read. 473 var bytesToRead uint64 474 if bytesRemaining > uint64(len(b)) { 475 bytesToRead = uint64(len(b)) 476 } else { 477 bytesToRead = bytesRemaining 478 } 479 480 // Fetch the dataSection that has the data we want to read. 481 sb.mu.Lock() 482 dataSection, exists := sb.dataSections[currentSection] 483 sb.mu.Unlock() 484 if !exists { 485 err := errors.New("data section should always in the stream buffer for the current offset of a stream") 486 build.Critical(err) 487 return 0, err 488 } 489 490 // Block until the data is available. 491 dd, err := dataSection.managedData(ctx) 492 if err != nil { 493 return 0, errors.AddContext(err, "read call failed because data section fetch failed") 494 } 495 496 // Recover the data into b. 497 b = b[:bytesToRead] 498 err = dd.RecoverTo(b, int(offsetInSection)) 499 if err != nil { 500 return 0, errors.AddContext(err, "failed to recover data") 501 } 502 s.offset += bytesToRead 503 504 // Put the section in the cache. This needs to be called every time we 505 // access a section for the hit counter to increment. Put only caches 506 // once a certain threshold is reached. 507 if err := sb.staticCache.Put(sb.staticDataSource.ID(), currentSection, dd); err != nil { 508 build.Critical("failed to store response data in cache", err) 509 } 510 511 // Send the call to prepare the next data section. 512 s.prepareOffset() 513 return int(bytesToRead), nil 514 } 515 516 // Seek will move the read head of the stream to the provided offset. 517 func (s *stream) Seek(offset int64, whence int) (int64, error) { 518 // Input checking. 519 if offset < 0 { 520 return int64(s.offset), errors.New("offset cannot be negative in call to seek") 521 } 522 s.mu.Lock() 523 defer s.mu.Unlock() 524 525 // Update the offset of the stream according to the inputs. 526 dataSize := s.staticStreamBuffer.staticDataSource.DataSize() 527 switch whence { 528 case io.SeekStart: 529 s.offset = uint64(offset) 530 case io.SeekCurrent: 531 newOffset := s.offset + uint64(offset) 532 if newOffset > dataSize { 533 return int64(s.offset), errors.New("offset cannot seek beyond the bounds of the file") 534 } 535 s.offset = newOffset 536 case io.SeekEnd: 537 if uint64(offset) > dataSize { 538 return int64(s.offset), errors.New("cannot seek before the front of the file") 539 } 540 s.offset = dataSize - uint64(offset) 541 default: 542 return int64(s.offset), errors.New("invalid value for 'whence' in call to seek") 543 } 544 545 // Prepare the fetch of the updated offset. 546 s.prepareOffset() 547 return int64(s.offset), nil 548 } 549 550 // prepareOffset will ensure that the dataSection containing the offset is made 551 // available in the LRU, and that the following dataSection is also available. 552 func (s *stream) prepareOffset() { 553 // Convenience variables. 554 dataSize := s.staticStreamBuffer.staticDataSource.DataSize() 555 dataSectionSize := s.staticStreamBuffer.staticDataSource.RequestSize() 556 557 // If the offset is already at the end of the data, there is nothing to do. 558 if s.offset == dataSize { 559 return 560 } 561 562 // Update the current data section. The update call will trigger the 563 // streamBuffer to fetch the dataSection if the dataSection is not already 564 // in the streamBuffer cache. 565 index := s.offset / dataSectionSize 566 s.lru.callUpdate(index) 567 568 // If there is a following data section, update that as well. This update is 569 // done regardless of the minimumLookahead, we always want to buffer at 570 // least one more piece than the current piece. 571 nextIndex := index + 1 572 if nextIndex*dataSectionSize < dataSize { 573 s.lru.callUpdate(nextIndex) 574 } 575 576 // For repair streams we don't prepare anymore sections since we usually 577 // only download one chunk at-a-time anyway and repairs don't require 578 // high latency guarantees. 579 if s.staticRepair { 580 return 581 } 582 583 // Keep adding more pieces to the buffer until we have buffered at least 584 // minimumLookahead total data or have reached the end of the stream. 585 nextIndex++ 586 for i := dataSectionSize * 2; i < minimumLookahead && nextIndex*dataSectionSize < dataSize; i += dataSectionSize { 587 s.lru.callUpdate(nextIndex) 588 nextIndex++ 589 } 590 } 591 592 // callFetchDataSection will increment the refcount of a dataSection in the 593 // stream buffer. If the dataSection is not currently available in the stream 594 // buffer, the data section will be fetched from the dataSource. 595 func (sb *streamBuffer) callFetchDataSection(index uint64) { 596 sb.mu.Lock() 597 defer sb.mu.Unlock() 598 599 // Fetch the relevant dataSection, creating a new one if necessary. 600 dataSection, exists := sb.dataSections[index] 601 602 // If the data section exists, check if the data is valid. 603 if exists { 604 var replace bool 605 select { 606 case <-dataSection.dataAvailable: 607 // If the cached section is invalid, replace it. 608 replace = dataSection.externErr != nil 609 default: 610 } 611 // If the section exists and shouldn't be replaced, just increment the 612 // refcount. 613 if !replace { 614 // Increment the refcount of the dataSection. 615 dataSection.refCount++ 616 return 617 } 618 } 619 620 // Otherwise we create a new datasection and either set the refcount to 621 // 1 or the previous count + 1. 622 refCount := uint64(1) 623 if exists { 624 refCount = dataSection.refCount + 1 625 } 626 627 dataSection = sb.newDataSection(index) 628 dataSection.refCount = refCount 629 } 630 631 // callRemoveDataSection will decrement the refcount of a data section in the 632 // stream buffer. If the refcount reaches zero, the data section will be deleted 633 // from the stream buffer. 634 func (sb *streamBuffer) callRemoveDataSection(index uint64) { 635 sb.mu.Lock() 636 defer sb.mu.Unlock() 637 638 // Fetch the data section. 639 ds, exists := sb.dataSections[index] 640 if !exists { 641 build.Critical("remove called on data section that does not exist") 642 return 643 } 644 // Decrement the refcount. 645 ds.refCount-- 646 // Delete the data section if the refcount has fallen to zero. 647 if ds.refCount == 0 { 648 // Also set the section in the map to nil for the garabage 649 // collector. 650 sb.dataSections[index] = nil 651 delete(sb.dataSections, index) 652 } 653 // If there are no more sections we recreate the map to allow for the 654 // old one to be garbage collected. 655 if len(sb.dataSections) == 0 { 656 sb.dataSections = make(map[uint64]*dataSection) 657 } 658 } 659 660 // managedPrepareNewStream creates a new stream from an existing stream buffer. 661 // The ref count for the buffer needs to be incremented under the 662 // streamBufferSet lock, before this method is called. 663 func (sb *streamBuffer) managedPrepareNewStream(ctx context.Context, initialOffset uint64, timeout time.Duration, cachedSections uint64, repair bool) *stream { 664 // Determine how many data sections the stream should cache. 665 dataSize := sb.staticDataSource.DataSize() 666 sectionSize := sb.staticDataSource.RequestSize() 667 dataSectionsToCache := bytesBufferedPerStream / sectionSize 668 if dataSectionsToCache < minimumDataSections { 669 dataSectionsToCache = minimumDataSections 670 } 671 672 // Check how many sections are already in the persisted cache relative 673 // to all potential sections we could cache. 674 totalSections := dataSize / sectionSize 675 if dataSize%sectionSize != 0 { 676 totalSections++ 677 } 678 // We add +1 to the total sections since there is one section reserved 679 // for the base sector as well. 680 totalSections++ 681 682 var cacheRatio float64 683 if totalSections > 0 { 684 cacheRatio = float64(100*cachedSections/totalSections) / 100 685 } 686 687 // Create a stream that points to the stream buffer. 688 stream := &stream{ 689 lru: newLeastRecentlyUsedCache(dataSectionsToCache, sb), 690 offset: initialOffset, 691 692 staticCacheRatio: cacheRatio, 693 staticContext: sb.staticTG.StopCtx(), 694 staticReadTimeout: timeout, 695 staticRepair: repair, 696 staticStreamBuffer: sb, 697 staticSpan: opentracing.SpanFromContext(ctx), 698 } 699 stream.prepareOffset() 700 return stream 701 } 702 703 // newDataSection will create a new data section for the streamBuffer and spin 704 // up a goroutine to pull the data from the data source. 705 func (sb *streamBuffer) newDataSection(index uint64) *dataSection { 706 // Create a random identifier 707 var id [8]byte 708 fastrand.Read(id[:]) 709 710 // Create the data section, allocating the right number of bytes for the 711 // ReadAt call to fill out. 712 ds := &dataSection{ 713 staticID: hex.EncodeToString(id[:]), 714 715 dataAvailable: make(chan struct{}), 716 } 717 sb.dataSections[index] = ds 718 719 // See if we can fill the data section from the cache. 720 lru := sb.staticCache 721 data, cached, err := lru.Get(sb.staticDataSource.ID(), index) 722 if err != nil { 723 build.Critical("failed to read from cache", err) 724 } 725 if err == nil && cached { 726 ds.externData = data 727 close(ds.dataAvailable) 728 return ds 729 } 730 731 // If not, perform the data fetch in a goroutine. The dataAvailable 732 // channel will be closed when the data is available. 733 go func() { 734 defer close(ds.dataAvailable) 735 736 // Create a child span for the data section 737 spanRef := opentracing.ChildOf(sb.staticSpan.Context()) 738 span := opentracing.StartSpan("newDataSection", spanRef) 739 span.LogKV("index", index) 740 defer func() { 741 if ds.externErr != nil { 742 span.LogKV("error", ds.externErr) 743 } 744 span.SetTag("success", ds.externErr == nil) 745 span.SetTag("long", ds.externDuration >= longDownloadThreshold) 746 span.Finish() 747 }() 748 749 // Ensure that the streambuffer has not closed. 750 err := sb.staticTG.Add() 751 if err != nil { 752 ds.externErr = errors.AddContext(err, "stream buffer has been shut down") 753 return 754 } 755 defer sb.staticTG.Done() 756 757 // Limit the time we wait for the section to be downloaded. 758 ctx, cancel := context.WithTimeout(sb.staticTG.StopCtx(), newDataSectionTimeout) 759 defer cancel() 760 761 // Create a context from our span 762 ctx = opentracing.ContextWithSpan(ctx, span) 763 764 // Grab the data from the data source. 765 start := time.Now() 766 responseChan, err := sb.staticDataSource.ReadSection(ctx, index, sb.staticPricePerMS) 767 if err != nil { 768 ds.externErr = errors.AddContext(err, "failed to read data section") 769 return 770 } 771 772 select { 773 case response := <-responseChan: 774 dd, err := response.Data() 775 ds.externErr = errors.AddContext(err, "data section ReadStream failed") 776 ds.externDuration = time.Since(start) 777 ds.externData = dd 778 779 if ds.externErr == nil { 780 // Add datapoint to stats. 781 sb.staticStreamBufferSet.staticStatsCollector.AddDataPoint(ds.externDuration) 782 } 783 case <-sb.staticTG.StopChan(): 784 ds.externErr = errors.AddContext(errTimeout, "failed to read response from ReadStream") 785 } 786 }() 787 return ds 788 } 789 790 // managedRemoveStream will remove a stream from a stream buffer. If the total 791 // number of streams using that stream buffer reaches zero, the stream buffer 792 // will be removed from the stream buffer set. 793 // 794 // The reference counter for a stream buffer needs to be in the domain of the 795 // stream buffer set because the stream buffer needs to be deleted from the 796 // stream buffer set simultaneously with the reference counter reaching zero. 797 func (sbs *streamBufferSet) managedRemoveStream(sb *streamBuffer) { 798 // Decrement the refcount of the streamBuffer. 799 sbs.mu.Lock() 800 sb.externRefCount-- 801 if sb.externRefCount > 0 { 802 // streamBuffer still in use, nothing to do. 803 sbs.mu.Unlock() 804 return 805 } 806 // Before deletion, nil the entry for the GC. 807 sbs.streams[sb.staticStreamID] = nil 808 delete(sbs.streams, sb.staticStreamID) 809 810 // Reallocate the map if it is empty to free more memory. 811 if len(sbs.streams) == 0 { 812 sbs.streams = make(map[skymodules.DataSourceID]*streamBuffer) 813 } 814 sbs.mu.Unlock() 815 816 // Close out the streamBuffer and its data source. Calling Stop() will block 817 // any new calls to ReadAt from executing, and will block until all existing 818 // calls are completed. This prevents any issues that could be caused by the 819 // data source being accessed after it has been closed. 820 sb.staticTG.Stop() 821 sb.staticDataSource.SilentClose() 822 }