gitlab.com/SkynetLabs/skyd@v1.6.9/skymodules/renter/downloadstreamer.go (about) 1 package renter 2 3 import ( 4 "bytes" 5 "io" 6 "sync" 7 "time" 8 9 "gitlab.com/NebulousLabs/errors" 10 11 "gitlab.com/SkynetLabs/skyd/skymodules" 12 "gitlab.com/SkynetLabs/skyd/skymodules/renter/filesystem" 13 "gitlab.com/SkynetLabs/skyd/skymodules/renter/filesystem/siafile" 14 ) 15 16 type ( 17 // streamer is a skymodules.Streamer that can be used to stream downloads from 18 // the sia network. 19 streamer struct { 20 // Reader variables. The snapshot is a snapshot of the file as it 21 // existed when it was opened, something that we do to give the streamer 22 // a consistent view of the file even if the file is being actively 23 // updated. Having this snapshot also isolates the reader from events 24 // such as name changes and deletions. 25 // 26 // We also keep the full file entry as it allows us to update metadata 27 // items in the file such as the access time. 28 staticFile *siafile.Snapshot 29 offset int64 30 staticRenter *Renter 31 32 // The cache itself is a []byte that is managed by threadedFillCache. The 33 // 'cacheOffset' indicates the starting location of the cache within the 34 // file, and all of the data in the []byte will be the actual file data 35 // that follows that offset. If the cache is empty, the length will be 36 // 0. 37 // 38 // Because the cache gets filled asynchronously, errors need to be 39 // recorded and then delivered to the user later. The errors get stored 40 // in readErr. 41 // 42 // cacheReady is a rotating channel which is used to signal to threads 43 // that the cache has been updated. When a Read call is made, the first 44 // action required is to grab a lock and then check if the cache has the 45 // requested data. If not, while still holding the lock the Read thread 46 // will grab a copy of cacheReady, and then release the lock. When the 47 // threadedFillCache thread has finished updating the cache, the thread 48 // will grab the lock and then the cacheReady channel will be closed and 49 // replaced with a new channel. This allows any number of Read threads 50 // to simultaneously block while waiting for cacheReady to be closed, 51 // and once cacheReady is closed they know to check the cache again. 52 // 53 // Multiple asynchronous calls to fill the cache may be sent out at 54 // once. To prevent race conditions, the 'cacheActive' channel is used 55 // to ensure that only one instance of 'threadedFillCache' is running at 56 // a time. If another instance of 'threadedFillCache' is active, the new 57 // call will immediately return. 58 cache []byte 59 activateCache chan struct{} 60 cacheOffset int64 61 cacheReady chan struct{} 62 staticDisableLocalFetch bool 63 readErr error 64 targetCacheSize int64 65 66 // Mutex to protect the offset variable, and all of the cacheing 67 // variables. 68 mu sync.Mutex 69 } 70 ) 71 72 // managedFillCache will determine whether or not the cache of the streamer 73 // needs to be filled, and if it does it will add data to the streamer. 74 func (s *streamer) managedFillCache() bool { 75 // Before creating a download request to fill out the cache, check whether 76 // the cache is actually in need of being filled. The cache will only fill 77 // if the current reader approaching the point of running out of data. 78 s.mu.Lock() 79 _, partialDownloadsSupported := s.staticFile.ErasureCode().SupportsPartialEncoding() 80 chunkSize := s.staticFile.ChunkSize() 81 cacheOffset := int64(s.cacheOffset) 82 streamOffset := s.offset 83 cacheLen := int64(len(s.cache)) 84 streamReadErr := s.readErr 85 fileSize := int64(s.staticFile.Size()) 86 targetCacheSize := s.targetCacheSize 87 s.mu.Unlock() 88 // If there has been a read error in the stream, abort. 89 if streamReadErr != nil { 90 return false 91 } 92 // Check whether the cache has reached the end of the file and also the 93 // streamOffset is contained within the cache. If so, no updates are needed. 94 if cacheOffset <= streamOffset && cacheOffset+cacheLen == fileSize { 95 return false 96 } 97 // If partial downloads are supported and more than half of the target cache 98 // size is remaining, then no fetching is required. 99 if partialDownloadsSupported && cacheOffset <= streamOffset && streamOffset < (cacheOffset+cacheLen-(targetCacheSize/2)) { 100 return false 101 } 102 // If partial downloads are not supported, the full chunk containing the 103 // current offset should be the cache. If the cache is the full chunk that 104 // contains current offset, then nothing needs to be done as the cache is 105 // already prepared. 106 // 107 // This should be functionally nearly identical to the previous cache that 108 // we were using which has since been disabled. 109 if !partialDownloadsSupported && cacheOffset <= streamOffset && streamOffset < cacheOffset+cacheLen && cacheLen > 0 { 110 return false 111 } 112 113 // Defer a function to rotate out the cacheReady channel, to notify all 114 // calls blocking for more cache that more data is now available. 115 defer func() { 116 s.mu.Lock() 117 close(s.cacheReady) 118 s.cacheReady = make(chan struct{}) 119 s.mu.Unlock() 120 }() 121 122 // Determine what data needs to be fetched. 123 // 124 // If there is no support for partial downloads, a whole chunk needs to be 125 // fetched, and the cache will be set equal to the chunk that currently 126 // contains the stream offset. This is because that amount of data will need 127 // to be fetched anyway, so we may as well use the full amount of data in 128 // the cache. 129 // 130 // If there is support for partial downloads but the stream offset is not 131 // contained within the existing cache, we need to fully replace the cache. 132 // At initialization, this will be the case (cacheLen of 0 cannot contain 133 // the stream offset byte within it, because it contains no bytes at all), 134 // so a check for 0-size cache is made. The full cache replacement will 135 // consist of a partial download the size of the cache starting from the 136 // stream offset. 137 // 138 // The final case is that the stream offset is contained within the current 139 // cache, but the stream offset is not the first byte of the cache. This 140 // means that we need to drop all of the bytes prior to the stream offset 141 // and then more bytes so that the cache remains the same size. 142 var fetchOffset, fetchLen int64 143 if !partialDownloadsSupported { 144 // Request a full chunk of data. 145 chunkIndex, _ := s.staticFile.ChunkIndexByOffset(uint64(streamOffset)) 146 fetchOffset = int64(chunkIndex * chunkSize) 147 fetchLen = int64(chunkSize) 148 } else if streamOffset < cacheOffset || streamOffset >= cacheOffset+cacheLen { 149 // Grab enough data to fill the cache entirely starting from the current 150 // stream offset. 151 fetchOffset = streamOffset 152 fetchLen = targetCacheSize 153 } else { 154 // Set the fetch offset to the end of the current cache, and set the 155 // length equal to the number of bytes that the streamOffset has already 156 // consumed, so that the cache remains the same size after we drop all 157 // of the consumed bytes and extend the cache with new data. 158 fetchOffset = cacheOffset + cacheLen 159 fetchLen = targetCacheSize - (cacheOffset + cacheLen - streamOffset) 160 } 161 162 // Finally, check if the fetchOffset and fetchLen goes beyond the boundaries 163 // of the file. If so, the fetchLen will be truncated so that the cache only 164 // goes up to the end of the file. 165 if fetchOffset+fetchLen > fileSize { 166 fetchLen = fileSize - fetchOffset 167 } 168 169 // Perform the actual download. 170 buffer := bytes.NewBuffer([]byte{}) 171 ddw := newDownloadDestinationWriter(buffer) 172 d, err := s.staticRenter.managedNewDownload(downloadParams{ 173 destination: ddw, 174 destinationType: destinationTypeSeekStream, 175 destinationString: "httpresponse", 176 disableLocalFetch: s.staticDisableLocalFetch, 177 file: s.staticFile, 178 179 latencyTarget: 50 * time.Millisecond, // TODO: low default until full latency support is added. 180 length: uint64(fetchLen), 181 needsMemory: true, 182 offset: uint64(fetchOffset), 183 overdrive: 5, // TODO: high default until full overdrive support is added. 184 priority: 1000, // TODO: high default until full priority support is added. 185 186 staticMemoryManager: s.staticRenter.staticUserDownloadMemoryManager, // user initiated download 187 staticSpendingCategory: categoryDownload, 188 }) 189 if err != nil { 190 closeErr := ddw.Close() 191 s.mu.Lock() 192 readErr := errors.Compose(s.readErr, err, closeErr) 193 s.readErr = readErr 194 s.mu.Unlock() 195 s.staticRenter.staticLog.Println("Error downloading for stream file:", readErr) 196 return false 197 } 198 // Register some cleanup for when the download is done. 199 d.OnComplete(func(_ error) error { 200 // close the destination buffer to avoid deadlocks. 201 return ddw.Close() 202 }) 203 // Start the download. 204 if err := d.Start(); err != nil { 205 return false 206 } 207 // Block until the download has completed. 208 select { 209 case <-d.completeChan: 210 err := d.Err() 211 if err != nil { 212 completeErr := errors.AddContext(err, "download failed") 213 s.mu.Lock() 214 readErr := errors.Compose(s.readErr, completeErr) 215 s.readErr = readErr 216 s.mu.Unlock() 217 s.staticRenter.staticLog.Println("Error during stream download:", readErr) 218 return false 219 } 220 case <-s.staticRenter.tg.StopChan(): 221 stopErr := errors.New("download interrupted by shutdown") 222 s.mu.Lock() 223 readErr := errors.Compose(s.readErr, stopErr) 224 s.readErr = readErr 225 s.mu.Unlock() 226 s.staticRenter.staticLog.Debugln(stopErr) 227 return false 228 } 229 230 // Update the cache. 231 s.mu.Lock() 232 defer s.mu.Unlock() 233 234 // Before updating the cache, check if the stream has caught up in the 235 // current cache. If the stream has caught up, the cache is not filling fast 236 // enough and the target cache size should be increased. 237 // 238 // streamOffsetInTail checks if the stream offset is in the final quarter of 239 // the cache. If it is, we consider the cache to be not filling fast enough, 240 // and we extend the size of the cache. 241 // 242 // A final check for cacheExists is performed, because if there currently is 243 // no cache at all, this must be the first fetch, and there is no reason to 244 // extend the cache size. 245 cacheLen = int64(len(s.cache)) 246 streamOffsetInCache := s.cacheOffset <= s.offset && s.offset <= s.cacheOffset+cacheLen // NOTE: it's '<=' so that we also count being 1 byte beyond the cache 247 streamOffsetInTail := streamOffsetInCache && s.offset >= s.cacheOffset+(cacheLen/4)+(cacheLen/2) 248 targetCacheUnderLimit := s.targetCacheSize < maxStreamerCacheSize 249 cacheExists := cacheLen > 0 250 if cacheExists && partialDownloadsSupported && targetCacheUnderLimit && streamOffsetInTail { 251 if s.targetCacheSize*2 > maxStreamerCacheSize { 252 s.targetCacheSize = maxStreamerCacheSize 253 } else { 254 s.targetCacheSize *= 2 255 } 256 } 257 258 // Update the cache based on whether the entire cache needs to be replaced 259 // or whether only some of the cache is being replaced. The whole cache 260 // needs to be replaced in the even that partial downloads are not 261 // supported, and also in the event that the stream offset is complete 262 // outside the previous cache. 263 if !partialDownloadsSupported || streamOffset >= cacheOffset+cacheLen || streamOffset < cacheOffset { 264 s.cache = buffer.Bytes() 265 s.cacheOffset = fetchOffset 266 } else { 267 s.cache = s.cache[streamOffset-cacheOffset:] 268 s.cache = append(s.cache, buffer.Bytes()...) 269 s.cacheOffset = streamOffset 270 } 271 272 // Return true, indicating that this function should be called again, 273 // because there may be more cache that has been requested or used since the 274 // previous request. 275 return true 276 } 277 278 // threadedFillCache is a background thread that keeps the cache full as data is 279 // read out of the cache. The Read and Seek functions have access to a channel 280 // that they can use to signal that the cache should be refilled. To ensure that 281 // the cache is always being filled, 'managedFillCache' will return a value 282 // indicating whether it should be called again after completion based on 283 // whether the cache was emptied further since the previous call. 284 func (s *streamer) threadedFillCache() { 285 // Add this thread to the renter's threadgroup. 286 err := s.staticRenter.tg.Add() 287 if err != nil { 288 s.staticRenter.staticLog.Debugln("threadedFillCache terminating early because renter has stopped") 289 } 290 defer s.staticRenter.tg.Done() 291 292 // Kick things off by filling the cache for the first time. 293 fetchMore := s.managedFillCache() 294 for fetchMore { 295 fetchMore = s.managedFillCache() 296 } 297 298 for { 299 // Block until receiving notice that the cache needs to be updated, 300 // shutting down if a shutdown signal is received. 301 select { 302 case <-s.activateCache: 303 case <-s.staticRenter.tg.StopChan(): 304 return 305 } 306 307 // Update the cache. Sometimes the cache will know that it is already 308 // out of date by the time it is returning, in those cases call the 309 // function again. 310 fetchMore = s.managedFillCache() 311 for fetchMore { 312 fetchMore = s.managedFillCache() 313 } 314 } 315 } 316 317 // Close closes the streamer. 318 func (s *streamer) Close() error { 319 return nil 320 } 321 322 // Read will check the stream cache for the data that is being requested. If the 323 // data is fully or partially there, Read will return what data is available 324 // without error. If the data is not there, Read will issue a call to fill the 325 // cache and then block until the data is at least partially available. 326 func (s *streamer) Read(p []byte) (int, error) { 327 // Wait in a loop until the requested data is available, or until an error 328 // is recovered. The loop needs to release the lock between iterations, but 329 // the lock that it grabs needs to be held after the loops termination if 330 // the right conditions are met, resulting in an ugly/complex locking 331 // strategy. 332 for { 333 // Grab the lock and check that the cache has data which we want. If the 334 // cache does have data that we want, we will keep the lock and exit the 335 // loop. If there's an error, we will drop the lock and return the 336 // error. If the cache does not have the data we want but there is no 337 // error, we will drop the lock and spin up a thread to fill the cache, 338 // and then block until the cache has been updated. 339 s.mu.Lock() 340 // Get the file's size and check for EOF. 341 fileSize := int64(s.staticFile.Size()) 342 if s.offset >= fileSize { 343 s.mu.Unlock() 344 return 0, io.EOF 345 } 346 347 // If there is a cache error, drop the lock and return. This check 348 // should happen before anything else. 349 if s.readErr != nil { 350 err := s.readErr 351 s.mu.Unlock() 352 return 0, err 353 } 354 355 // Do a check that the cache size is at least twice as large as the read 356 // size, to ensure that data is being fetched sufficiently far in 357 // advance. 358 twiceReadLen := int64(len(p) * 2) 359 if s.targetCacheSize < twiceReadLen { 360 if twiceReadLen > maxStreamerCacheSize { 361 s.targetCacheSize = maxStreamerCacheSize 362 } else { 363 s.targetCacheSize = twiceReadLen 364 } 365 } 366 367 // Check if the cache contains data that we are interested in. If so, 368 // break out of the cache-fetch loop while still holding the lock. 369 if s.cacheOffset <= s.offset && s.offset < s.cacheOffset+int64(len(s.cache)) { 370 break 371 } 372 373 // There is no error, but the data that we want is also unavailable. 374 // Grab the cacheReady channel to detect when the cache has been 375 // updated, and then drop the lock and block until there has been a 376 // cache update. 377 // 378 // Notably, it should not be necessary to spin up a new cache thread. 379 // There are four conditions which may cause the stream offset to be 380 // located outside of the existing cache, and all conditions will result 381 // with a thread being spun up regardless. The first condition is 382 // initialization, where no cache exists. A fill cache thread is spun up 383 // upon initialization. The second condition is after a Seek, which may 384 // move the offset outside of the current cache. The call to Seek also 385 // spins up a cache filling thread. The third condition is after a read, 386 // which adjusts the stream offset. A new cache fill thread gets spun up 387 // in this case as well, immediately after the stream offset is 388 // adjusted. Finally, there is the case where a cache fill thread was 389 // spun up, but then immediately spun down due to another cache fill 390 // thread already running. But this case is handled as well, because a 391 // cache fill thread will spin up another cache fill thread when it 392 // finishes specifically to cover this case. 393 cacheReady := s.cacheReady 394 s.mu.Unlock() 395 <-cacheReady 396 397 // Upon iterating, the lock is not held, so the call to grab the lock at 398 // the top of the function should not cause a deadlock. 399 } 400 // This code should only be reachable if the lock is still being held and 401 // there is also data in the cache for us. Defer releasing the lock. 402 defer s.mu.Unlock() 403 404 dataStart := int(s.offset - s.cacheOffset) 405 dataEnd := dataStart + len(p) 406 // If the read request extends beyond the cache, truncate it to include 407 // only up to where the cache ends. 408 if dataEnd > len(s.cache) { 409 dataEnd = len(s.cache) 410 } 411 copy(p, s.cache[dataStart:dataEnd]) 412 s.offset += int64(dataEnd - dataStart) 413 414 // Now that data has been consumed, request more data. 415 select { 416 case s.activateCache <- struct{}{}: 417 default: 418 } 419 420 return dataEnd - dataStart, nil 421 } 422 423 // Seek sets the offset for the next Read to offset, interpreted 424 // according to whence: SeekStart means relative to the start of the file, 425 // SeekCurrent means relative to the current offset, and SeekEnd means relative 426 // to the end. Seek returns the new offset relative to the start of the file 427 // and an error, if any. 428 func (s *streamer) Seek(offset int64, whence int) (int64, error) { 429 s.mu.Lock() 430 defer s.mu.Unlock() 431 432 var newOffset int64 433 switch whence { 434 case io.SeekStart: 435 newOffset = 0 436 case io.SeekCurrent: 437 newOffset = s.offset 438 case io.SeekEnd: 439 newOffset = int64(s.staticFile.Size()) 440 } 441 newOffset += offset 442 if newOffset < 0 { 443 return s.offset, errors.New("cannot seek to negative offset") 444 } 445 // If the Seek is a no-op, do not invalidate the cache. 446 if newOffset == s.offset { 447 return 0, nil 448 } 449 450 // Reset the target cache size upon seek to be the default again. This is in 451 // place because some programs will rapidly consume the cache to build up 452 // their own buffer. This can result in the cache growing very large, which 453 // hurts seek times. By resetting the cache size upon seek, we ensure that 454 // the user gets a consistent experience when seeking. In a perfect world, 455 // we'd have an easy way to measure the bitrate of the file being streamed, 456 // so that we could set a target cache size according to that, but at the 457 // moment we don't have an easy way to get that information. 458 s.targetCacheSize = initialStreamerCacheSize 459 460 // Update the offset of the stream and immediately send a thread to update 461 // the cache. 462 s.offset = newOffset 463 464 // Now that data has been consumed, request more data. 465 select { 466 case s.activateCache <- struct{}{}: 467 default: 468 } 469 470 return newOffset, nil 471 } 472 473 // Streamer creates a skymodules.Streamer that can be used to stream downloads from 474 // the sia network. 475 func (r *Renter) Streamer(siaPath skymodules.SiaPath, disableLocalFetch bool) (_ string, _ skymodules.Streamer, err error) { 476 if err := r.tg.Add(); err != nil { 477 return "", nil, err 478 } 479 defer r.tg.Done() 480 481 // Lookup the file associated with the nickname. 482 node, err := r.staticFileSystem.OpenSiaFile(siaPath) 483 if err != nil { 484 return "", nil, err 485 } 486 defer func() { 487 err = errors.Compose(err, node.Close()) 488 }() 489 490 // Create the streamer 491 snap, err := node.Snapshot(siaPath) 492 if err != nil { 493 return "", nil, err 494 } 495 s := r.managedStreamer(snap, disableLocalFetch) 496 return siaPath.String(), s, nil 497 } 498 499 // StreamerByNode will open a streamer for the renter, taking a FileNode as 500 // input instead of a siapath. This is important for fuse, which has filenodes 501 // that could be getting renamed before the streams are opened. 502 func (r *Renter) StreamerByNode(node *filesystem.FileNode, disableLocalFetch bool) (skymodules.Streamer, error) { 503 if err := r.tg.Add(); err != nil { 504 return nil, err 505 } 506 defer r.tg.Done() 507 508 // Grab the current SiaPath of the FileNode and then create a snapshot. 509 sp := r.staticFileSystem.FileSiaPath(node) 510 snap, err := node.Snapshot(sp) 511 if err != nil { 512 return nil, err 513 } 514 s := r.managedStreamer(snap, disableLocalFetch) 515 return s, nil 516 } 517 518 // managedStreamer creates a streamer from a siafile snapshot and starts filling 519 // its cache. 520 func (r *Renter) managedStreamer(snapshot *siafile.Snapshot, disableLocalFetch bool) skymodules.Streamer { 521 s := &streamer{ 522 staticFile: snapshot, 523 staticRenter: r, 524 525 activateCache: make(chan struct{}), 526 cacheReady: make(chan struct{}), 527 staticDisableLocalFetch: disableLocalFetch, 528 targetCacheSize: initialStreamerCacheSize, 529 } 530 go s.threadedFillCache() 531 return s 532 }