github.com/anacrolix/torrent@v1.61.0/reader.go (about)

     1  package torrent
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"log/slog"
     9  	"sync"
    10  
    11  	"github.com/anacrolix/log"
    12  	"github.com/anacrolix/missinggo/v2"
    13  	"github.com/anacrolix/missinggo/v2/panicif"
    14  )
    15  
    16  // Accesses Torrent data via a Client. Reads block until the data is available. Seeks and readahead
    17  // also drive Client behaviour. Not safe for concurrent use. There are Torrent, File and Piece
    18  // constructors for this.
    19  type Reader interface {
    20  	// Set the context for reads. When done, reads should get cancelled so they don't get stuck
    21  	// waiting for data.
    22  	SetContext(context.Context)
    23  	// Read/Seek and not ReadAt because we want to return data as soon as it's available, and
    24  	// because we want a single read head.
    25  	io.ReadSeekCloser
    26  	// Deprecated: This prevents type asserting for optional interfaces because a wrapper is
    27  	// required to adapt back to io.Reader.
    28  	missinggo.ReadContexter
    29  	// Configure the number of bytes ahead of a read that should also be prioritized in preparation
    30  	// for further reads. Overridden by non-nil readahead func, see SetReadaheadFunc.
    31  	SetReadahead(int64)
    32  	// If non-nil, the provided function is called when the implementation needs to know the
    33  	// readahead for the current reader. Calls occur during Reads and Seeks, and while the Client is
    34  	// locked.
    35  	SetReadaheadFunc(ReadaheadFunc)
    36  	// Don't wait for pieces to complete and be verified. Read calls return as soon as they can when
    37  	// the underlying chunks become available. May be deprecated, although BitTorrent v2 will mean
    38  	// we can support this without piece hashing.
    39  	SetResponsive()
    40  }
    41  
    42  // Piece range by piece index, [begin, end).
    43  type pieceRange struct {
    44  	begin, end pieceIndex
    45  }
    46  
    47  type ReadaheadContext struct {
    48  	ContiguousReadStartPos int64
    49  	CurrentPos             int64
    50  }
    51  
    52  // Returns the desired readahead for a Reader.
    53  type ReadaheadFunc func(ReadaheadContext) int64
    54  
    55  type reader struct {
    56  	t *Torrent
    57  	// Adjust the read/seek window to handle Readers locked to File extents and the like.
    58  	offset, length int64
    59  
    60  	storageReader storageReader
    61  
    62  	// Function to dynamically calculate readahead. If nil, readahead is static.
    63  	readaheadFunc ReadaheadFunc
    64  
    65  	// This is not protected by a lock because you should be coordinating setting this. If you want
    66  	// different contexts, you should have different Readers.
    67  	ctx context.Context
    68  
    69  	// Required when modifying pos and readahead.
    70  	mu sync.Locker
    71  
    72  	readahead, pos int64
    73  	// Position that reads have continued contiguously from.
    74  	contiguousReadStartPos int64
    75  	// The cached piece range this reader wants downloaded. The zero value corresponds to nothing.
    76  	// We cache this so that changes can be detected, and bubbled up to the Torrent only as
    77  	// required.
    78  	pieces pieceRange
    79  
    80  	// Reads have been initiated since the last seek. This is used to prevent readaheads occurring
    81  	// after a seek or with a new reader at the starting position.
    82  	reading    bool
    83  	responsive bool
    84  }
    85  
    86  func (r *reader) SetContext(ctx context.Context) {
    87  	r.ctx = ctx
    88  }
    89  
    90  var _ io.ReadSeekCloser = (*reader)(nil)
    91  
    92  func (r *reader) SetResponsive() {
    93  	r.responsive = true
    94  	r.t.cl.event.Broadcast()
    95  }
    96  
    97  // Disable responsive mode. TODO: Remove?
    98  func (r *reader) SetNonResponsive() {
    99  	r.responsive = false
   100  	r.t.cl.event.Broadcast()
   101  }
   102  
   103  func (r *reader) SetReadahead(readahead int64) {
   104  	r.mu.Lock()
   105  	r.readahead = readahead
   106  	r.readaheadFunc = nil
   107  	r.posChanged()
   108  	r.mu.Unlock()
   109  }
   110  
   111  func (r *reader) SetReadaheadFunc(f ReadaheadFunc) {
   112  	r.mu.Lock()
   113  	r.readaheadFunc = f
   114  	r.posChanged()
   115  	r.mu.Unlock()
   116  }
   117  
   118  // How many bytes are available to read. Max is the most we could require.
   119  func (r *reader) available(off, max int64) (ret int64) {
   120  	off += r.offset
   121  	for max > 0 {
   122  		req, ok := r.t.offsetRequest(off)
   123  		if !ok {
   124  			break
   125  		}
   126  		if !r.responsive && !r.t.pieceComplete(pieceIndex(req.Index)) {
   127  			break
   128  		}
   129  		if !r.t.haveChunk(req) {
   130  			break
   131  		}
   132  		len1 := int64(req.Length) - (off - r.t.requestOffset(req))
   133  		max -= len1
   134  		ret += len1
   135  		off += len1
   136  	}
   137  	// Ensure that ret hasn't exceeded our original max.
   138  	if max < 0 {
   139  		ret += max
   140  	}
   141  	return
   142  }
   143  
   144  // Calculates the pieces this reader wants downloaded, ignoring the cached value at r.pieces.
   145  func (r *reader) piecesUncached() (ret pieceRange) {
   146  	ra := r.readahead
   147  	if r.readaheadFunc != nil {
   148  		ra = r.readaheadFunc(ReadaheadContext{
   149  			ContiguousReadStartPos: r.contiguousReadStartPos,
   150  			CurrentPos:             r.pos,
   151  		})
   152  	}
   153  	if ra < 1 {
   154  		// Needs to be at least 1, because [x, x) means we don't want
   155  		// anything.
   156  		ra = 1
   157  	}
   158  	if !r.reading {
   159  		ra = 0
   160  	}
   161  	if ra > r.length-r.pos {
   162  		ra = r.length - r.pos
   163  	}
   164  	ret.begin, ret.end = r.t.byteRegionPieces(r.torrentOffset(r.pos), ra)
   165  	return
   166  }
   167  
   168  func (r *reader) Read(b []byte) (n int, err error) {
   169  	return r.read(b)
   170  }
   171  
   172  func (r *reader) read(b []byte) (n int, err error) {
   173  	return r.readContext(r.ctx, b)
   174  }
   175  
   176  // Deprecated: Use SetContext and Read. TODO: I've realised this breaks the ability to pass through
   177  // optional interfaces like io.WriterTo and io.ReaderFrom. Go sux. Context should be provided
   178  // somewhere else.
   179  func (r *reader) ReadContext(ctx context.Context, b []byte) (n int, err error) {
   180  	r.ctx = ctx
   181  	return r.Read(b)
   182  }
   183  
   184  // We still pass ctx here, although it's a reader field now.
   185  func (r *reader) readContext(ctx context.Context, b []byte) (n int, err error) {
   186  	if len(b) > 0 {
   187  		r.reading = true
   188  		// TODO: Rework reader piece priorities so we don't have to push updates in to the Client
   189  		// and take the lock here.
   190  		r.mu.Lock()
   191  		r.posChanged()
   192  		r.mu.Unlock()
   193  	}
   194  	n, err = r.readAt(ctx, b, r.pos)
   195  	if n == 0 {
   196  		if err == nil && len(b) > 0 {
   197  			panic("expected error")
   198  		} else {
   199  			return
   200  		}
   201  	}
   202  
   203  	r.mu.Lock()
   204  	r.pos += int64(n)
   205  	r.posChanged()
   206  	r.mu.Unlock()
   207  	if r.pos >= r.length {
   208  		err = io.EOF
   209  	} else if err == io.EOF {
   210  		err = io.ErrUnexpectedEOF
   211  	}
   212  	return
   213  }
   214  
   215  var closedChan = make(chan struct{})
   216  
   217  func init() {
   218  	close(closedChan)
   219  }
   220  
   221  // Wait until some data should be available to read. Tickles the client if it isn't. Returns how
   222  // much should be readable without blocking. `block` is whether to block if nothing is available,
   223  // for successive reads for example.
   224  func (r *reader) waitAvailable(
   225  	ctx context.Context,
   226  	pos, wanted int64,
   227  	block bool,
   228  ) (avail int64, err error) {
   229  	t := r.t
   230  	for {
   231  		t.cl.rLock()
   232  		avail = r.available(pos, wanted)
   233  		readerCond := t.piece(int((r.offset + pos) / t.info.PieceLength)).readerCond.Signaled()
   234  		t.cl.rUnlock()
   235  		if avail != 0 {
   236  			return
   237  		}
   238  		var dontWait <-chan struct{}
   239  		if !block || wanted == 0 {
   240  			dontWait = closedChan
   241  		}
   242  		select {
   243  		case <-readerCond:
   244  			continue
   245  		case <-r.t.closed.Done():
   246  			err = errTorrentClosed
   247  		case <-ctx.Done():
   248  			err = ctx.Err()
   249  		case <-r.t.dataDownloadDisallowed.On():
   250  			err = errors.New("torrent data downloading disabled")
   251  		case <-r.t.networkingEnabled.Off():
   252  			err = errors.New("torrent networking disabled")
   253  		case <-dontWait:
   254  		}
   255  		return
   256  	}
   257  }
   258  
   259  // Adds the reader's torrent offset to the reader object offset (for example the reader might be
   260  // constrained to a particular file within the torrent).
   261  func (r *reader) torrentOffset(readerPos int64) int64 {
   262  	return r.offset + readerPos
   263  }
   264  
   265  // Performs at most one successful read to torrent storage.
   266  func (r *reader) readOnceAt(ctx context.Context, b []byte, pos int64) (n int, err error) {
   267  	var avail int64
   268  	avail, err = r.waitAvailable(ctx, pos, int64(len(b)), n == 0)
   269  	if avail == 0 || err != nil {
   270  		return
   271  	}
   272  	firstPieceIndex := pieceIndex(r.torrentOffset(pos) / r.t.info.PieceLength)
   273  	firstPieceOffset := r.torrentOffset(pos) % r.t.info.PieceLength
   274  	b1 := b[:min(int64(len(b)), avail)]
   275  	// I think we can get EOF here due to the ReadAt contract. Previously we were forgetting to
   276  	// return an error so it wasn't noticed. We now try again if there's a storage cap otherwise
   277  	// convert it to io.UnexpectedEOF.
   278  	r.initStorageReader()
   279  	n, err = r.storageReader.ReadAt(b1, r.torrentOffset(pos))
   280  	//n, err = r.t.readAt(b1, r.torrentOffset(pos))
   281  	if n != 0 {
   282  		err = nil
   283  		return
   284  	}
   285  	panicif.Nil(err)
   286  	if r.t.closed.IsSet() {
   287  		err = fmt.Errorf("reading from closed torrent: %w", err)
   288  		return
   289  	}
   290  	attrs := [...]any{
   291  		"piece", firstPieceIndex,
   292  		"offset", firstPieceOffset,
   293  		"bytes", len(b1),
   294  		"err", err,
   295  	}
   296  	if r.t.hasStorageCap() {
   297  		r.slogger().Debug("error reading from capped storage", attrs[:]...)
   298  	} else {
   299  		r.slogger().Error("error reading", attrs[:]...)
   300  	}
   301  	return
   302  }
   303  
   304  // Performs at most one successful read to torrent storage. Try reading, first with the storage
   305  // reader we already have, then after resetting it (in case data moved for
   306  // completed/incomplete/promoted etc.). Then try resetting the piece completions. Then after all
   307  // that if the storage is supposed to be flaky, try all over again. TODO: Filter errors and set log
   308  // levels appropriately.
   309  func (r *reader) readAt(ctx context.Context, b []byte, pos int64) (n int, err error) {
   310  	if pos >= r.length {
   311  		err = io.EOF
   312  		return
   313  	}
   314  	n, err = r.readOnceAt(ctx, b, pos)
   315  	if err == nil {
   316  		return
   317  	}
   318  	r.slogger().Error("initial read failed", "err", err)
   319  
   320  	err = r.clearStorageReader()
   321  	if err != nil {
   322  		err = fmt.Errorf("closing storage reader after first read failed: %w", err)
   323  		return
   324  	}
   325  	r.storageReader = nil
   326  
   327  	n, err = r.readOnceAt(ctx, b, pos)
   328  	if err == nil {
   329  		return
   330  	}
   331  	r.slogger().Error("read failed after reader reset", "err", err)
   332  
   333  	r.updatePieceCompletion(pos)
   334  
   335  	n, err = r.readOnceAt(ctx, b, pos)
   336  	if err == nil {
   337  		return
   338  	}
   339  	r.slogger().Error("read failed after completion resync", "err", err)
   340  
   341  	if r.t.hasStorageCap() {
   342  		// Ensure params weren't modified (Go sux). Recurse to detect infinite loops. TODO: I expect
   343  		// only some errors should pass through here, this might cause us to get stuck if we retry
   344  		// for any error.
   345  		return r.readAt(ctx, b, pos)
   346  	}
   347  
   348  	// There should have been something available, avail != 0 here.
   349  	if err == io.EOF {
   350  		err = io.ErrUnexpectedEOF
   351  	}
   352  	return
   353  }
   354  
   355  // We pass pos in case we go ahead and implement multiple reads per ReadAt.
   356  func (r *reader) updatePieceCompletion(pos int64) {
   357  	firstPieceIndex := pieceIndex(r.torrentOffset(pos) / r.t.info.PieceLength)
   358  	r.t.cl.lock()
   359  	// I think there's a panic here caused by the Client being closed before obtaining this
   360  	// lock. TestDropTorrentWithMmapStorageWhileHashing seems to tickle occasionally in CI.
   361  	// Just add exceptions already.
   362  	defer r.t.cl.unlock()
   363  	if r.t.closed.IsSet() {
   364  		// Can't update because Torrent's piece order is removed from Client.
   365  		return
   366  	}
   367  	// TODO: Just reset pieces in the readahead window. This might help
   368  	// prevent thrashing with small caches and file and piece priorities.
   369  	if !r.t.updatePieceCompletion(firstPieceIndex) {
   370  		r.logger().Levelf(log.Debug, "piece %d completion unchanged", firstPieceIndex)
   371  	}
   372  	// Update the rest of the piece completions in the readahead window, without alerting to
   373  	// changes (since only the first piece, the one above, could have generated the read error
   374  	// we're currently handling).
   375  	if r.pieces.begin != firstPieceIndex {
   376  		panic(fmt.Sprint(r.pieces.begin, firstPieceIndex))
   377  	}
   378  	for index := r.pieces.begin + 1; index < r.pieces.end; index++ {
   379  		r.t.updatePieceCompletion(index)
   380  	}
   381  }
   382  
   383  // Hodor
   384  func (r *reader) Close() error {
   385  	r.t.cl.lock()
   386  	r.t.deleteReader(r)
   387  	r.t.cl.unlock()
   388  	return r.clearStorageReader()
   389  }
   390  
   391  func (r *reader) posChanged() {
   392  	to := r.piecesUncached()
   393  	from := r.pieces
   394  	if to == from {
   395  		return
   396  	}
   397  	r.pieces = to
   398  	// log.Printf("reader pos changed %v->%v", from, to)
   399  	r.t.readerPosChanged(from, to)
   400  }
   401  
   402  func (r *reader) Seek(off int64, whence int) (newPos int64, err error) {
   403  	switch whence {
   404  	case io.SeekStart:
   405  		newPos = off
   406  		r.mu.Lock()
   407  	case io.SeekCurrent:
   408  		r.mu.Lock()
   409  		newPos = r.pos + off
   410  	case io.SeekEnd:
   411  		newPos = r.length + off
   412  		r.mu.Lock()
   413  	default:
   414  		return 0, errors.New("bad whence")
   415  	}
   416  	if newPos != r.pos {
   417  		r.reading = false
   418  		r.pos = newPos
   419  		r.contiguousReadStartPos = newPos
   420  		r.posChanged()
   421  	}
   422  	r.mu.Unlock()
   423  	return
   424  }
   425  
   426  func (r *reader) logger() log.Logger {
   427  	return r.t.logger
   428  }
   429  
   430  // Implementation inspired by https://news.ycombinator.com/item?id=27019613.
   431  func defaultReadaheadFunc(r ReadaheadContext) int64 {
   432  	return r.CurrentPos - r.ContiguousReadStartPos
   433  }
   434  
   435  func (r *reader) slogger() *slog.Logger {
   436  	return r.t.slogger()
   437  }
   438  
   439  func (r *reader) initStorageReader() {
   440  	if r.storageReader == nil {
   441  		r.storageReader = r.t.storageReader()
   442  	}
   443  }
   444  
   445  func (r *reader) clearStorageReader() (err error) {
   446  	if r.storageReader != nil {
   447  		err = r.storageReader.Close()
   448  		if err != nil {
   449  			return
   450  		}
   451  	}
   452  	r.storageReader = nil
   453  	return
   454  }