github.com/anacrolix/torrent@v1.61.0/reader.go (about) 1 package torrent 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "log/slog" 9 "sync" 10 11 "github.com/anacrolix/log" 12 "github.com/anacrolix/missinggo/v2" 13 "github.com/anacrolix/missinggo/v2/panicif" 14 ) 15 16 // Accesses Torrent data via a Client. Reads block until the data is available. Seeks and readahead 17 // also drive Client behaviour. Not safe for concurrent use. There are Torrent, File and Piece 18 // constructors for this. 19 type Reader interface { 20 // Set the context for reads. When done, reads should get cancelled so they don't get stuck 21 // waiting for data. 22 SetContext(context.Context) 23 // Read/Seek and not ReadAt because we want to return data as soon as it's available, and 24 // because we want a single read head. 25 io.ReadSeekCloser 26 // Deprecated: This prevents type asserting for optional interfaces because a wrapper is 27 // required to adapt back to io.Reader. 28 missinggo.ReadContexter 29 // Configure the number of bytes ahead of a read that should also be prioritized in preparation 30 // for further reads. Overridden by non-nil readahead func, see SetReadaheadFunc. 31 SetReadahead(int64) 32 // If non-nil, the provided function is called when the implementation needs to know the 33 // readahead for the current reader. Calls occur during Reads and Seeks, and while the Client is 34 // locked. 35 SetReadaheadFunc(ReadaheadFunc) 36 // Don't wait for pieces to complete and be verified. Read calls return as soon as they can when 37 // the underlying chunks become available. May be deprecated, although BitTorrent v2 will mean 38 // we can support this without piece hashing. 39 SetResponsive() 40 } 41 42 // Piece range by piece index, [begin, end). 43 type pieceRange struct { 44 begin, end pieceIndex 45 } 46 47 type ReadaheadContext struct { 48 ContiguousReadStartPos int64 49 CurrentPos int64 50 } 51 52 // Returns the desired readahead for a Reader. 53 type ReadaheadFunc func(ReadaheadContext) int64 54 55 type reader struct { 56 t *Torrent 57 // Adjust the read/seek window to handle Readers locked to File extents and the like. 58 offset, length int64 59 60 storageReader storageReader 61 62 // Function to dynamically calculate readahead. If nil, readahead is static. 63 readaheadFunc ReadaheadFunc 64 65 // This is not protected by a lock because you should be coordinating setting this. If you want 66 // different contexts, you should have different Readers. 67 ctx context.Context 68 69 // Required when modifying pos and readahead. 70 mu sync.Locker 71 72 readahead, pos int64 73 // Position that reads have continued contiguously from. 74 contiguousReadStartPos int64 75 // The cached piece range this reader wants downloaded. The zero value corresponds to nothing. 76 // We cache this so that changes can be detected, and bubbled up to the Torrent only as 77 // required. 78 pieces pieceRange 79 80 // Reads have been initiated since the last seek. This is used to prevent readaheads occurring 81 // after a seek or with a new reader at the starting position. 82 reading bool 83 responsive bool 84 } 85 86 func (r *reader) SetContext(ctx context.Context) { 87 r.ctx = ctx 88 } 89 90 var _ io.ReadSeekCloser = (*reader)(nil) 91 92 func (r *reader) SetResponsive() { 93 r.responsive = true 94 r.t.cl.event.Broadcast() 95 } 96 97 // Disable responsive mode. TODO: Remove? 98 func (r *reader) SetNonResponsive() { 99 r.responsive = false 100 r.t.cl.event.Broadcast() 101 } 102 103 func (r *reader) SetReadahead(readahead int64) { 104 r.mu.Lock() 105 r.readahead = readahead 106 r.readaheadFunc = nil 107 r.posChanged() 108 r.mu.Unlock() 109 } 110 111 func (r *reader) SetReadaheadFunc(f ReadaheadFunc) { 112 r.mu.Lock() 113 r.readaheadFunc = f 114 r.posChanged() 115 r.mu.Unlock() 116 } 117 118 // How many bytes are available to read. Max is the most we could require. 119 func (r *reader) available(off, max int64) (ret int64) { 120 off += r.offset 121 for max > 0 { 122 req, ok := r.t.offsetRequest(off) 123 if !ok { 124 break 125 } 126 if !r.responsive && !r.t.pieceComplete(pieceIndex(req.Index)) { 127 break 128 } 129 if !r.t.haveChunk(req) { 130 break 131 } 132 len1 := int64(req.Length) - (off - r.t.requestOffset(req)) 133 max -= len1 134 ret += len1 135 off += len1 136 } 137 // Ensure that ret hasn't exceeded our original max. 138 if max < 0 { 139 ret += max 140 } 141 return 142 } 143 144 // Calculates the pieces this reader wants downloaded, ignoring the cached value at r.pieces. 145 func (r *reader) piecesUncached() (ret pieceRange) { 146 ra := r.readahead 147 if r.readaheadFunc != nil { 148 ra = r.readaheadFunc(ReadaheadContext{ 149 ContiguousReadStartPos: r.contiguousReadStartPos, 150 CurrentPos: r.pos, 151 }) 152 } 153 if ra < 1 { 154 // Needs to be at least 1, because [x, x) means we don't want 155 // anything. 156 ra = 1 157 } 158 if !r.reading { 159 ra = 0 160 } 161 if ra > r.length-r.pos { 162 ra = r.length - r.pos 163 } 164 ret.begin, ret.end = r.t.byteRegionPieces(r.torrentOffset(r.pos), ra) 165 return 166 } 167 168 func (r *reader) Read(b []byte) (n int, err error) { 169 return r.read(b) 170 } 171 172 func (r *reader) read(b []byte) (n int, err error) { 173 return r.readContext(r.ctx, b) 174 } 175 176 // Deprecated: Use SetContext and Read. TODO: I've realised this breaks the ability to pass through 177 // optional interfaces like io.WriterTo and io.ReaderFrom. Go sux. Context should be provided 178 // somewhere else. 179 func (r *reader) ReadContext(ctx context.Context, b []byte) (n int, err error) { 180 r.ctx = ctx 181 return r.Read(b) 182 } 183 184 // We still pass ctx here, although it's a reader field now. 185 func (r *reader) readContext(ctx context.Context, b []byte) (n int, err error) { 186 if len(b) > 0 { 187 r.reading = true 188 // TODO: Rework reader piece priorities so we don't have to push updates in to the Client 189 // and take the lock here. 190 r.mu.Lock() 191 r.posChanged() 192 r.mu.Unlock() 193 } 194 n, err = r.readAt(ctx, b, r.pos) 195 if n == 0 { 196 if err == nil && len(b) > 0 { 197 panic("expected error") 198 } else { 199 return 200 } 201 } 202 203 r.mu.Lock() 204 r.pos += int64(n) 205 r.posChanged() 206 r.mu.Unlock() 207 if r.pos >= r.length { 208 err = io.EOF 209 } else if err == io.EOF { 210 err = io.ErrUnexpectedEOF 211 } 212 return 213 } 214 215 var closedChan = make(chan struct{}) 216 217 func init() { 218 close(closedChan) 219 } 220 221 // Wait until some data should be available to read. Tickles the client if it isn't. Returns how 222 // much should be readable without blocking. `block` is whether to block if nothing is available, 223 // for successive reads for example. 224 func (r *reader) waitAvailable( 225 ctx context.Context, 226 pos, wanted int64, 227 block bool, 228 ) (avail int64, err error) { 229 t := r.t 230 for { 231 t.cl.rLock() 232 avail = r.available(pos, wanted) 233 readerCond := t.piece(int((r.offset + pos) / t.info.PieceLength)).readerCond.Signaled() 234 t.cl.rUnlock() 235 if avail != 0 { 236 return 237 } 238 var dontWait <-chan struct{} 239 if !block || wanted == 0 { 240 dontWait = closedChan 241 } 242 select { 243 case <-readerCond: 244 continue 245 case <-r.t.closed.Done(): 246 err = errTorrentClosed 247 case <-ctx.Done(): 248 err = ctx.Err() 249 case <-r.t.dataDownloadDisallowed.On(): 250 err = errors.New("torrent data downloading disabled") 251 case <-r.t.networkingEnabled.Off(): 252 err = errors.New("torrent networking disabled") 253 case <-dontWait: 254 } 255 return 256 } 257 } 258 259 // Adds the reader's torrent offset to the reader object offset (for example the reader might be 260 // constrained to a particular file within the torrent). 261 func (r *reader) torrentOffset(readerPos int64) int64 { 262 return r.offset + readerPos 263 } 264 265 // Performs at most one successful read to torrent storage. 266 func (r *reader) readOnceAt(ctx context.Context, b []byte, pos int64) (n int, err error) { 267 var avail int64 268 avail, err = r.waitAvailable(ctx, pos, int64(len(b)), n == 0) 269 if avail == 0 || err != nil { 270 return 271 } 272 firstPieceIndex := pieceIndex(r.torrentOffset(pos) / r.t.info.PieceLength) 273 firstPieceOffset := r.torrentOffset(pos) % r.t.info.PieceLength 274 b1 := b[:min(int64(len(b)), avail)] 275 // I think we can get EOF here due to the ReadAt contract. Previously we were forgetting to 276 // return an error so it wasn't noticed. We now try again if there's a storage cap otherwise 277 // convert it to io.UnexpectedEOF. 278 r.initStorageReader() 279 n, err = r.storageReader.ReadAt(b1, r.torrentOffset(pos)) 280 //n, err = r.t.readAt(b1, r.torrentOffset(pos)) 281 if n != 0 { 282 err = nil 283 return 284 } 285 panicif.Nil(err) 286 if r.t.closed.IsSet() { 287 err = fmt.Errorf("reading from closed torrent: %w", err) 288 return 289 } 290 attrs := [...]any{ 291 "piece", firstPieceIndex, 292 "offset", firstPieceOffset, 293 "bytes", len(b1), 294 "err", err, 295 } 296 if r.t.hasStorageCap() { 297 r.slogger().Debug("error reading from capped storage", attrs[:]...) 298 } else { 299 r.slogger().Error("error reading", attrs[:]...) 300 } 301 return 302 } 303 304 // Performs at most one successful read to torrent storage. Try reading, first with the storage 305 // reader we already have, then after resetting it (in case data moved for 306 // completed/incomplete/promoted etc.). Then try resetting the piece completions. Then after all 307 // that if the storage is supposed to be flaky, try all over again. TODO: Filter errors and set log 308 // levels appropriately. 309 func (r *reader) readAt(ctx context.Context, b []byte, pos int64) (n int, err error) { 310 if pos >= r.length { 311 err = io.EOF 312 return 313 } 314 n, err = r.readOnceAt(ctx, b, pos) 315 if err == nil { 316 return 317 } 318 r.slogger().Error("initial read failed", "err", err) 319 320 err = r.clearStorageReader() 321 if err != nil { 322 err = fmt.Errorf("closing storage reader after first read failed: %w", err) 323 return 324 } 325 r.storageReader = nil 326 327 n, err = r.readOnceAt(ctx, b, pos) 328 if err == nil { 329 return 330 } 331 r.slogger().Error("read failed after reader reset", "err", err) 332 333 r.updatePieceCompletion(pos) 334 335 n, err = r.readOnceAt(ctx, b, pos) 336 if err == nil { 337 return 338 } 339 r.slogger().Error("read failed after completion resync", "err", err) 340 341 if r.t.hasStorageCap() { 342 // Ensure params weren't modified (Go sux). Recurse to detect infinite loops. TODO: I expect 343 // only some errors should pass through here, this might cause us to get stuck if we retry 344 // for any error. 345 return r.readAt(ctx, b, pos) 346 } 347 348 // There should have been something available, avail != 0 here. 349 if err == io.EOF { 350 err = io.ErrUnexpectedEOF 351 } 352 return 353 } 354 355 // We pass pos in case we go ahead and implement multiple reads per ReadAt. 356 func (r *reader) updatePieceCompletion(pos int64) { 357 firstPieceIndex := pieceIndex(r.torrentOffset(pos) / r.t.info.PieceLength) 358 r.t.cl.lock() 359 // I think there's a panic here caused by the Client being closed before obtaining this 360 // lock. TestDropTorrentWithMmapStorageWhileHashing seems to tickle occasionally in CI. 361 // Just add exceptions already. 362 defer r.t.cl.unlock() 363 if r.t.closed.IsSet() { 364 // Can't update because Torrent's piece order is removed from Client. 365 return 366 } 367 // TODO: Just reset pieces in the readahead window. This might help 368 // prevent thrashing with small caches and file and piece priorities. 369 if !r.t.updatePieceCompletion(firstPieceIndex) { 370 r.logger().Levelf(log.Debug, "piece %d completion unchanged", firstPieceIndex) 371 } 372 // Update the rest of the piece completions in the readahead window, without alerting to 373 // changes (since only the first piece, the one above, could have generated the read error 374 // we're currently handling). 375 if r.pieces.begin != firstPieceIndex { 376 panic(fmt.Sprint(r.pieces.begin, firstPieceIndex)) 377 } 378 for index := r.pieces.begin + 1; index < r.pieces.end; index++ { 379 r.t.updatePieceCompletion(index) 380 } 381 } 382 383 // Hodor 384 func (r *reader) Close() error { 385 r.t.cl.lock() 386 r.t.deleteReader(r) 387 r.t.cl.unlock() 388 return r.clearStorageReader() 389 } 390 391 func (r *reader) posChanged() { 392 to := r.piecesUncached() 393 from := r.pieces 394 if to == from { 395 return 396 } 397 r.pieces = to 398 // log.Printf("reader pos changed %v->%v", from, to) 399 r.t.readerPosChanged(from, to) 400 } 401 402 func (r *reader) Seek(off int64, whence int) (newPos int64, err error) { 403 switch whence { 404 case io.SeekStart: 405 newPos = off 406 r.mu.Lock() 407 case io.SeekCurrent: 408 r.mu.Lock() 409 newPos = r.pos + off 410 case io.SeekEnd: 411 newPos = r.length + off 412 r.mu.Lock() 413 default: 414 return 0, errors.New("bad whence") 415 } 416 if newPos != r.pos { 417 r.reading = false 418 r.pos = newPos 419 r.contiguousReadStartPos = newPos 420 r.posChanged() 421 } 422 r.mu.Unlock() 423 return 424 } 425 426 func (r *reader) logger() log.Logger { 427 return r.t.logger 428 } 429 430 // Implementation inspired by https://news.ycombinator.com/item?id=27019613. 431 func defaultReadaheadFunc(r ReadaheadContext) int64 { 432 return r.CurrentPos - r.ContiguousReadStartPos 433 } 434 435 func (r *reader) slogger() *slog.Logger { 436 return r.t.slogger() 437 } 438 439 func (r *reader) initStorageReader() { 440 if r.storageReader == nil { 441 r.storageReader = r.t.storageReader() 442 } 443 } 444 445 func (r *reader) clearStorageReader() (err error) { 446 if r.storageReader != nil { 447 err = r.storageReader.Close() 448 if err != nil { 449 return 450 } 451 } 452 r.storageReader = nil 453 return 454 }