gitlab.com/SiaPrime/SiaPrime@v1.4.1/modules/renter/downloadstreamer.go (about) 1 package renter 2 3 import ( 4 "bytes" 5 "io" 6 "sync" 7 "time" 8 9 "gitlab.com/NebulousLabs/errors" 10 11 "gitlab.com/SiaPrime/SiaPrime/modules" 12 "gitlab.com/SiaPrime/SiaPrime/modules/renter/siafile" 13 ) 14 15 type ( 16 // streamer is a modules.Streamer that can be used to stream downloads from 17 // the sia network. 18 streamer struct { 19 // Reader variables. The snapshot is a snapshot of the file as it 20 // existed when it was opened, something that we do to give the streamer 21 // a consistent view of the file even if the file is being actively 22 // updated. Having this snapshot also isolates the reader from events 23 // such as name changes and deletions. 24 // 25 // We also keep the full file entry as it allows us to update metadata 26 // items in the file such as the access time. 27 staticFile *siafile.Snapshot 28 offset int64 29 r *Renter 30 31 // The cache itself is a []byte that is managed by threadedFillCache. The 32 // 'cacheOffset' indicates the starting location of the cache within the 33 // file, and all of the data in the []byte will be the actual file data 34 // that follows that offset. If the cache is empty, the length will be 35 // 0. 36 // 37 // Because the cache gets filled asynchronously, errors need to be 38 // recorded and then delivered to the user later. The errors get stored 39 // in readErr. 40 // 41 // cacheReady is a rotating channel which is used to signal to threads 42 // that the cache has been updated. When a Read call is made, the first 43 // action required is to grab a lock and then check if the cache has the 44 // requested data. If not, while still holding the lock the Read thread 45 // will grab a copy of cacheReady, and then release the lock. When the 46 // threadedFillCache thread has finished updating the cache, the thread 47 // will grab the lock and then the cacheReady channel will be closed and 48 // replaced with a new channel. This allows any number of Read threads 49 // to simultaneously block while waiting for cacheReady to be closed, 50 // and once cacheReady is closed they know to check the cache again. 51 // 52 // Multiple asynchronous calls to fill the cache may be sent out at 53 // once. To prevent race conditions, the 'cacheActive' channel is used 54 // to ensure that only one instance of 'threadedFillCache' is running at 55 // a time. If another instance of 'threadedFillCache' is active, the new 56 // call will immediately return. 57 cache []byte 58 activateCache chan struct{} 59 cacheOffset int64 60 cacheReady chan struct{} 61 readErr error 62 targetCacheSize int64 63 64 // Mutex to protect the offset variable, and all of the cacheing 65 // variables. 66 mu sync.Mutex 67 } 68 ) 69 70 // managedFillCache will determine whether or not the cache of the streamer 71 // needs to be filled, and if it does it will add data to the streamer. 72 func (s *streamer) managedFillCache() bool { 73 // Before creating a download request to fill out the cache, check whether 74 // the cache is actually in need of being filled. The cache will only fill 75 // if the current reader approaching the point of running out of data. 76 s.mu.Lock() 77 partialDownloadsSupported := s.staticFile.ErasureCode().SupportsPartialEncoding() 78 chunkSize := s.staticFile.ChunkSize() 79 cacheOffset := int64(s.cacheOffset) 80 streamOffset := s.offset 81 cacheLen := int64(len(s.cache)) 82 streamReadErr := s.readErr 83 fileSize := int64(s.staticFile.Size()) 84 targetCacheSize := s.targetCacheSize 85 s.mu.Unlock() 86 // If there has been a read error in the stream, abort. 87 if streamReadErr != nil { 88 return false 89 } 90 // Check whether the cache has reached the end of the file and also the 91 // streamOffset is contained within the cache. If so, no updates are needed. 92 if cacheOffset <= streamOffset && cacheOffset+cacheLen == fileSize { 93 return false 94 } 95 // If partial downloads are supported and the stream offset is in the first 96 // half of the cache, then no fetching is required. 97 // 98 // An extra check that there is any data in the cache needs to be made so 99 // that the cache fill function runs immediately after initialization. 100 if partialDownloadsSupported && cacheOffset <= streamOffset && streamOffset-cacheOffset < cacheLen/2 { 101 return false 102 } 103 // If partial downloads are not supported, the full chunk containing the 104 // current offset should be the cache. If the cache is the full chunk that 105 // contains current offset, then nothing needs to be done as the cache is 106 // already prepared. 107 // 108 // This should be functionally nearly identical to the previous cache that 109 // we were using which has since been disabled. 110 if !partialDownloadsSupported && cacheOffset <= streamOffset && streamOffset < cacheOffset+cacheLen && cacheLen > 0 { 111 return false 112 } 113 114 // Defer a function to rotate out the cacheReady channel, to notify all 115 // calls blocking for more cache that more data is now available. 116 defer func() { 117 s.mu.Lock() 118 close(s.cacheReady) 119 s.cacheReady = make(chan struct{}) 120 s.mu.Unlock() 121 }() 122 123 // Determine what data needs to be fetched. 124 // 125 // If there is no support for partial downloads, a whole chunk needs to be 126 // fetched, and the cache will be set equal to the chunk that currently 127 // contains the stream offset. This is because that amount of data will need 128 // to be fetched anyway, so we may as well use the full amount of data in 129 // the cache. 130 // 131 // If there is support for partial downloads but the stream offset is not 132 // contained within the existing cache, we need to fully replace the cache. 133 // At initialization, this will be the case (cacheLen of 0 cannot contain 134 // the stream offset byte within it, because it contains no bytes at all), 135 // so a check for 0-size cache is made. The full cache replacement will 136 // consist of a partial download the size of the cache starting from the 137 // stream offset. 138 // 139 // The final case is that the stream offset is contained within the current 140 // cache, but the stream offset is not the first byte of the cache. This 141 // means that we need to drop all of the bytes prior to the stream offset 142 // and then more bytes so that the cache remains the same size. 143 var fetchOffset, fetchLen int64 144 if !partialDownloadsSupported { 145 // Request a full chunk of data. 146 chunkIndex, _ := s.staticFile.ChunkIndexByOffset(uint64(streamOffset)) 147 fetchOffset = int64(chunkIndex * chunkSize) 148 fetchLen = int64(chunkSize) 149 } else if streamOffset < cacheOffset || streamOffset >= cacheOffset+cacheLen { 150 // Grab enough data to fill the cache entirely starting from the current 151 // stream offset. 152 fetchOffset = streamOffset 153 fetchLen = targetCacheSize 154 } else { 155 // Set the fetch offset to the end of the current cache, and set the 156 // length equal to the number of bytes that the streamOffset has already 157 // consumed, so that the cache remains the same size after we drop all 158 // of the consumed bytes and extend the cache with new data. 159 fetchOffset = cacheOffset + cacheLen 160 fetchLen = targetCacheSize - (streamOffset - cacheOffset) 161 } 162 163 // Finally, check if the fetchOffset and fetchLen goes beyond the boundaries 164 // of the file. If so, the fetchLen will be truncated so that the cache only 165 // goes up to the end of the file. 166 if fetchOffset+fetchLen > fileSize { 167 fetchLen = fileSize - fetchOffset 168 } 169 170 // Perform the actual download. 171 buffer := bytes.NewBuffer([]byte{}) 172 ddw := newDownloadDestinationWriter(buffer) 173 d, err := s.r.managedNewDownload(downloadParams{ 174 destination: ddw, 175 destinationType: destinationTypeSeekStream, 176 destinationString: "httpresponse", 177 file: s.staticFile, 178 179 latencyTarget: 50 * time.Millisecond, // TODO: low default until full latency support is added. 180 length: uint64(fetchLen), 181 needsMemory: true, 182 offset: uint64(fetchOffset), 183 overdrive: 5, // TODO: high default until full overdrive support is added. 184 priority: 1000, // TODO: high default until full priority support is added. 185 }) 186 if err != nil { 187 closeErr := ddw.Close() 188 s.mu.Lock() 189 readErr := errors.Compose(s.readErr, err, closeErr) 190 s.readErr = readErr 191 s.mu.Unlock() 192 s.r.log.Println("Error downloading for stream file:", readErr) 193 return false 194 } 195 // Register some cleanup for when the download is done. 196 d.OnComplete(func(_ error) error { 197 // close the destination buffer to avoid deadlocks. 198 return ddw.Close() 199 }) 200 // Set the in-memory buffer to nil just to be safe in case of a memory 201 // leak. 202 defer func() { 203 d.destination = nil 204 }() 205 // Block until the download has completed. 206 select { 207 case <-d.completeChan: 208 err := d.Err() 209 if err != nil { 210 completeErr := errors.AddContext(err, "download failed") 211 s.mu.Lock() 212 readErr := errors.Compose(s.readErr, completeErr) 213 s.readErr = readErr 214 s.mu.Unlock() 215 s.r.log.Println("Error during stream download:", readErr) 216 return false 217 } 218 case <-s.r.tg.StopChan(): 219 stopErr := errors.New("download interrupted by shutdown") 220 s.mu.Lock() 221 readErr := errors.Compose(s.readErr, stopErr) 222 s.readErr = readErr 223 s.mu.Unlock() 224 s.r.log.Debugln(stopErr) 225 return false 226 } 227 228 // Update the cache. 229 s.mu.Lock() 230 defer s.mu.Unlock() 231 232 // Before updating the cache, check if the stream has caught up in the 233 // current cache. If the stream has caught up, the cache is not filling fast 234 // enough and the target cache size should be increased. 235 // 236 // streamOffsetInTail checks if the stream offset is in the final quarter of 237 // the cache. If it is, we consider the cache to be not filling fast enough, 238 // and we extend the size of the cache. 239 // 240 // A final check for cacheExists is performed, because if there currently is 241 // no cache at all, this must be the first fetch, and there is no reason to 242 // extend the cache size. 243 cacheLen = int64(len(s.cache)) 244 streamOffsetInCache := s.cacheOffset <= s.offset && s.offset <= s.cacheOffset+cacheLen // NOTE: it's '<=' so that we also count being 1 byte beyond the cache 245 streamOffsetInTail := streamOffsetInCache && s.offset >= s.cacheOffset+(cacheLen/4)+(cacheLen/2) 246 targetCacheUnderLimit := s.targetCacheSize < maxStreamerCacheSize 247 cacheExists := cacheLen > 0 248 if cacheExists && partialDownloadsSupported && targetCacheUnderLimit && streamOffsetInTail { 249 if s.targetCacheSize*2 > maxStreamerCacheSize { 250 s.targetCacheSize = maxStreamerCacheSize 251 } else { 252 s.targetCacheSize *= 2 253 } 254 } 255 256 // Update the cache based on whether the entire cache needs to be replaced 257 // or whether only some of the cache is being replaced. The whole cache 258 // needs to be replaced in the even that partial downloads are not 259 // supported, and also in the event that the stream offset is complete 260 // outside the previous cache. 261 if !partialDownloadsSupported || streamOffset >= cacheOffset+cacheLen || streamOffset < cacheOffset { 262 s.cache = buffer.Bytes() 263 s.cacheOffset = fetchOffset 264 } else { 265 s.cache = s.cache[streamOffset-cacheOffset:] 266 s.cache = append(s.cache, buffer.Bytes()...) 267 s.cacheOffset = streamOffset 268 } 269 270 // Return true, indicating that this function should be called again, 271 // because there may be more cache that has been requested or used since the 272 // previous request. 273 return true 274 } 275 276 // threadedFillCache is a background thread that keeps the cache full as data is 277 // read out of the cache. The Read and Seek functions have access to a channel 278 // that they can use to signal that the cache should be refilled. To ensure that 279 // the cache is always being filled, 'managedFillCache' will return a value 280 // indicating whether it should be called again after completion based on 281 // whether the cache was emptied further since the previous call. 282 func (s *streamer) threadedFillCache() { 283 // Add this thread to the renter's threadgroup. 284 err := s.r.tg.Add() 285 if err != nil { 286 s.r.log.Debugln("threadedFillCache terminating early because renter has stopped") 287 } 288 defer s.r.tg.Done() 289 290 // Kick things off by filling the cache for the first time. 291 fetchMore := s.managedFillCache() 292 for fetchMore { 293 fetchMore = s.managedFillCache() 294 } 295 296 for { 297 // Block until receiving notice that the cache needs to be updated, 298 // shutting down if a shutdown signal is received. 299 select { 300 case <-s.activateCache: 301 case <-s.r.tg.StopChan(): 302 return 303 } 304 305 // Update the cache. Sometimes the cache will know that it is already 306 // out of date by the time it is returning, in those cases call the 307 // function again. 308 fetchMore = s.managedFillCache() 309 for fetchMore { 310 fetchMore = s.managedFillCache() 311 } 312 } 313 } 314 315 // Close closes the streamer. 316 func (s *streamer) Close() error { 317 return nil 318 } 319 320 // Read will check the stream cache for the data that is being requested. If the 321 // data is fully or partially there, Read will return what data is available 322 // without error. If the data is not there, Read will issue a call to fill the 323 // cache and then block until the data is at least partially available. 324 func (s *streamer) Read(p []byte) (int, error) { 325 // Wait in a loop until the requested data is available, or until an error 326 // is recovered. The loop needs to release the lock between iterations, but 327 // the lock that it grabs needs to be held after the loops termination if 328 // the right conditions are met, resulting in an ugly/complex locking 329 // strategy. 330 for { 331 // Grab the lock and check that the cache has data which we want. If the 332 // cache does have data that we want, we will keep the lock and exit the 333 // loop. If there's an error, we will drop the lock and return the 334 // error. If the cache does not have the data we want but there is no 335 // error, we will drop the lock and spin up a thread to fill the cache, 336 // and then block until the cache has been updated. 337 s.mu.Lock() 338 // Get the file's size and check for EOF. 339 fileSize := int64(s.staticFile.Size()) 340 if s.offset >= fileSize { 341 s.mu.Unlock() 342 return 0, io.EOF 343 } 344 345 // If there is a cache error, drop the lock and return. This check 346 // should happen before anything else. 347 if s.readErr != nil { 348 err := s.readErr 349 s.mu.Unlock() 350 return 0, err 351 } 352 353 // Do a check that the cache size is at least twice as large as the read 354 // size, to ensure that data is being fetched sufficiently far in 355 // advance. 356 twiceReadLen := int64(len(p) * 2) 357 if s.targetCacheSize < twiceReadLen { 358 if twiceReadLen > maxStreamerCacheSize { 359 s.targetCacheSize = maxStreamerCacheSize 360 } else { 361 s.targetCacheSize = twiceReadLen 362 } 363 } 364 365 // Check if the cache contains data that we are interested in. If so, 366 // break out of the cache-fetch loop while still holding the lock. 367 if s.cacheOffset <= s.offset && s.offset < s.cacheOffset+int64(len(s.cache)) { 368 break 369 } 370 371 // There is no error, but the data that we want is also unavailable. 372 // Grab the cacheReady channel to detect when the cache has been 373 // updated, and then drop the lock and block until there has been a 374 // cache update. 375 // 376 // Notably, it should not be necessary to spin up a new cache thread. 377 // There are four conditions which may cause the stream offset to be 378 // located outside of the existing cache, and all conditions will result 379 // with a thread being spun up regardless. The first condition is 380 // initialization, where no cache exists. A fill cache thread is spun up 381 // upon initialization. The second condition is after a Seek, which may 382 // move the offset outside of the current cache. The call to Seek also 383 // spins up a cache filling thread. The third condition is after a read, 384 // which adjusts the stream offset. A new cache fill thread gets spun up 385 // in this case as well, immediately after the stream offset is 386 // adjusted. Finally, there is the case where a cache fill thread was 387 // spun up, but then immediately spun down due to another cache fill 388 // thread already running. But this case is handled as well, because a 389 // cache fill thread will spin up another cache fill thread when it 390 // finishes specifically to cover this case. 391 cacheReady := s.cacheReady 392 s.mu.Unlock() 393 <-cacheReady 394 395 // Upon iterating, the lock is not held, so the call to grab the lock at 396 // the top of the function should not cause a deadlock. 397 } 398 // This code should only be reachable if the lock is still being held and 399 // there is also data in the cache for us. Defer releasing the lock. 400 defer s.mu.Unlock() 401 402 dataStart := int(s.offset - s.cacheOffset) 403 dataEnd := dataStart + len(p) 404 // If the read request extends beyond the cache, truncate it to include 405 // only up to where the cache ends. 406 if dataEnd > len(s.cache) { 407 dataEnd = len(s.cache) 408 } 409 copy(p, s.cache[dataStart:dataEnd]) 410 s.offset += int64(dataEnd - dataStart) 411 412 // Now that data has been consumed, request more data. 413 select { 414 case s.activateCache <- struct{}{}: 415 default: 416 } 417 418 return dataEnd - dataStart, nil 419 } 420 421 // Seek sets the offset for the next Read to offset, interpreted 422 // according to whence: SeekStart means relative to the start of the file, 423 // SeekCurrent means relative to the current offset, and SeekEnd means relative 424 // to the end. Seek returns the new offset relative to the start of the file 425 // and an error, if any. 426 func (s *streamer) Seek(offset int64, whence int) (int64, error) { 427 s.mu.Lock() 428 defer s.mu.Unlock() 429 430 var newOffset int64 431 switch whence { 432 case io.SeekStart: 433 newOffset = 0 434 case io.SeekCurrent: 435 newOffset = s.offset 436 case io.SeekEnd: 437 newOffset = int64(s.staticFile.Size()) 438 } 439 newOffset += offset 440 if newOffset < 0 { 441 return s.offset, errors.New("cannot seek to negative offset") 442 } 443 444 // Reset the target cache size upon seek to be the default again. This is in 445 // place because some programs will rapidly consume the cache to build up 446 // their own buffer. This can result in the cache growing very large, which 447 // hurts seek times. By resetting the cache size upon seek, we ensure that 448 // the user gets a consistent experience when seeking. In a perfect world, 449 // we'd have an easy way to measure the bitrate of the file being streamed, 450 // so that we could set a target cache size according to that, but at the 451 // moment we don't have an easy way to get that information. 452 s.targetCacheSize = initialStreamerCacheSize 453 454 // Update the offset of the stream and immediately send a thread to update 455 // the cache. 456 s.offset = newOffset 457 458 // Now that data has been consumed, request more data. 459 select { 460 case s.activateCache <- struct{}{}: 461 default: 462 } 463 464 return newOffset, nil 465 } 466 467 // Streamer creates a modules.Streamer that can be used to stream downloads from 468 // the sia network. 469 func (r *Renter) Streamer(siaPath modules.SiaPath) (string, modules.Streamer, error) { 470 if err := r.tg.Add(); err != nil { 471 return "", nil, err 472 } 473 defer r.tg.Done() 474 // Lookup the file associated with the nickname. 475 entry, err := r.staticFileSet.Open(siaPath) 476 if err != nil { 477 return "", nil, err 478 } 479 defer entry.Close() 480 481 // Create the streamer 482 snap, err := entry.Snapshot() 483 if err != nil { 484 return "", nil, err 485 } 486 s := r.managedStreamer(snap) 487 return r.staticFileSet.SiaPath(entry).String(), s, nil 488 } 489 490 // managedStreamer creates a streamer from a siafile snapshot and starts filling 491 // its cache. 492 func (r *Renter) managedStreamer(snapshot *siafile.Snapshot) modules.Streamer { 493 s := &streamer{ 494 staticFile: snapshot, 495 r: r, 496 497 activateCache: make(chan struct{}), 498 cacheReady: make(chan struct{}), 499 targetCacheSize: initialStreamerCacheSize, 500 } 501 go s.threadedFillCache() 502 return s 503 }