github.com/anacrolix/torrent@v1.61.0/webseed-requesting.go (about)

     1  package torrent
     2  
     3  import (
     4  	"bytes"
     5  	"cmp"
     6  	"context"
     7  	"fmt"
     8  	"iter"
     9  	"log/slog"
    10  	"maps"
    11  	"os"
    12  	"runtime/pprof"
    13  	"slices"
    14  	"strings"
    15  	"sync"
    16  	"time"
    17  	"unique"
    18  
    19  	g "github.com/anacrolix/generics"
    20  	"github.com/anacrolix/generics/heap"
    21  	"github.com/anacrolix/missinggo/v2/panicif"
    22  	"github.com/anacrolix/torrent/internal/extracmp"
    23  	"github.com/davecgh/go-spew/spew"
    24  
    25  	"github.com/anacrolix/torrent/internal/request-strategy"
    26  	"github.com/anacrolix/torrent/metainfo"
    27  	"github.com/anacrolix/torrent/webseed"
    28  )
    29  
    30  // Default is based on experience with CloudFlare.
    31  var webseedHostRequestConcurrency = initIntFromEnv("TORRENT_WEBSEED_HOST_REQUEST_CONCURRENCY", 25, 0)
    32  
    33  type (
    34  	webseedHostKey       string
    35  	webseedHostKeyHandle = unique.Handle[webseedHostKey]
    36  	webseedUrlKey        unique.Handle[string]
    37  )
    38  
    39  func (me webseedUrlKey) Value() string {
    40  	return unique.Handle[string](me).Value()
    41  }
    42  
    43  func (me webseedUrlKey) String() string {
    44  	return me.Value()
    45  }
    46  
    47  type webseedRequestHeapElem struct {
    48  	webseedUniqueRequestKey
    49  	webseedRequestOrderValue
    50  	// Not sure this is even worth it now.
    51  	mightHavePartialFiles bool
    52  }
    53  
    54  /*
    55  - Go through all the requestable pieces in order of priority, availability, whether there are peer requests, partial, infohash.
    56  - For each piece calculate files involved. Record each file not seen before and the piece index.
    57  - Cancel any outstanding requests that don't match a final file/piece-index pair.
    58  - Initiate missing requests that fit into the available limits.
    59  */
    60  func (cl *Client) updateWebseedRequests() {
    61  	existingRequests := maps.Collect(cl.iterCurrentWebseedRequestsFromClient())
    62  	panicif.False(maps.Equal(existingRequests, maps.Collect(cl.iterCurrentWebseedRequests())))
    63  
    64  	g.MakeMapIfNil(&cl.aprioriMap)
    65  	aprioriMap := cl.aprioriMap
    66  	clear(aprioriMap)
    67  	for uniqueKey, value := range cl.iterPossibleWebseedRequests() {
    68  		//if len(aprioriMap) >= webseedHostRequestConcurrency {
    69  		//	break
    70  		//}
    71  		if g.MapContains(existingRequests, uniqueKey) {
    72  			continue
    73  		}
    74  		cur, ok := aprioriMap[uniqueKey]
    75  		if ok {
    76  			// Shared in the lookup above.
    77  			t := uniqueKey.t
    78  			// TODO: Change to "slice has requests"
    79  			hasPeerConnRequest := func(reqIndex RequestIndex) bool {
    80  				return t.requestingPeer(reqIndex) != nil
    81  			}
    82  			// Skip the webseed request unless it has a higher priority, is less requested by peer
    83  			// conns, or has a lower start offset. Including peer conn requests here will bump
    84  			// webseed requests in favour of peer conns unless there's nothing else to do.
    85  			if cmp.Or(
    86  				cmp.Compare(value.priority, cur.priority),
    87  				extracmp.CompareBool(hasPeerConnRequest(cur.startRequest), hasPeerConnRequest(value.startRequest)),
    88  				cmp.Compare(cur.startRequest, value.startRequest),
    89  			) <= 0 {
    90  				continue
    91  			}
    92  		}
    93  		aprioriMap[uniqueKey] = value
    94  	}
    95  
    96  	heapSlice := cl.heapSlice[:0]
    97  	requiredCap := len(aprioriMap) + len(existingRequests)
    98  	if cap(heapSlice) < requiredCap {
    99  		heapSlice = slices.Grow(heapSlice, requiredCap-cap(heapSlice))
   100  	}
   101  	defer func() {
   102  		// Will this let GC collect values? If not do we need to clear? :(
   103  		cl.heapSlice = heapSlice[:0]
   104  	}()
   105  
   106  	for key, value := range aprioriMap {
   107  		// Should be filtered earlier.
   108  		panicif.True(g.MapContains(existingRequests, key))
   109  		heapSlice = append(heapSlice, webseedRequestHeapElem{
   110  			key,
   111  			webseedRequestOrderValue{
   112  				aprioriMapValue: value,
   113  			},
   114  			key.t.filesInRequestRangeMightBePartial(
   115  				value.startRequest,
   116  				key.t.endRequestForAlignedWebseedResponse(key.sliceIndex),
   117  			),
   118  		})
   119  	}
   120  
   121  	// Add remaining existing requests.
   122  	for key, value := range existingRequests {
   123  		// Don't reconsider existing requests that aren't wanted anymore.
   124  		if key.t.dataDownloadDisallowed.IsSet() {
   125  			continue
   126  		}
   127  		wr := value.existingWebseedRequest
   128  		heapSlice = append(heapSlice, webseedRequestHeapElem{
   129  			key,
   130  			value,
   131  			key.t.filesInRequestRangeMightBePartial(wr.next, wr.end),
   132  		})
   133  	}
   134  
   135  	aprioriHeap := heap.InterfaceForSlice(
   136  		&heapSlice,
   137  		func(l webseedRequestHeapElem, r webseedRequestHeapElem) bool {
   138  			// Not stable ordering but being sticky to existing webseeds should be enough.
   139  			ret := cmp.Or(
   140  				// Prefer highest priority
   141  				-cmp.Compare(l.priority, r.priority),
   142  				// Then existing requests
   143  				extracmp.CompareBool(l.existingWebseedRequest == nil, r.existingWebseedRequest == nil),
   144  				// Prefer not competing with active peer connections.
   145  				cmp.Compare(len(l.t.conns), len(r.t.conns)),
   146  				// Try to complete partial slices first.
   147  				-extracmp.CompareBool(l.mightHavePartialFiles, r.mightHavePartialFiles),
   148  				// No need to prefer longer files anymore now that we're using slices?
   149  				//// Longer files first.
   150  				//-cmp.Compare(l.longestFile().Unwrap(), r.longestFile().Unwrap()),
   151  				// Easier to debug than infohashes...
   152  				cmp.Compare(l.t.info.Name, r.t.info.Name),
   153  				bytes.Compare(l.t.canonicalShortInfohash()[:], r.t.canonicalShortInfohash()[:]),
   154  				// Doing earlier chunks first means more compact files for partial file hashing.
   155  				cmp.Compare(l.sliceIndex, r.sliceIndex),
   156  			)
   157  			// Requests should be unique unless they're for different URLs.
   158  			if ret == 0 && l.url == r.url {
   159  				cfg := spew.NewDefaultConfig()
   160  				cfg.Dump(l)
   161  				cfg.Dump(r)
   162  				panic("webseed request heap ordering is not stable")
   163  			}
   164  			return ret < 0
   165  		},
   166  	)
   167  
   168  	unwantedExistingRequests := maps.Clone(existingRequests)
   169  
   170  	heap.Init(aprioriHeap)
   171  	var plan webseedRequestPlan
   172  	// Could also return early here if all known costKeys are fully assigned.
   173  	for aprioriHeap.Len() > 0 {
   174  		elem := heap.Pop(aprioriHeap)
   175  		// Pulling the pregenerated form avoids unique.Handle, and possible URL parsing and error
   176  		// handling overhead. Need the value to avoid looking this up again.
   177  		costKey := elem.costKey
   178  		panicif.Zero(costKey)
   179  		if elem.existingWebseedRequest == nil {
   180  			// Existing requests might be within the allowed discard range.
   181  			panicif.Eq(elem.priority, PiecePriorityNone)
   182  		}
   183  		panicif.True(elem.t.dataDownloadDisallowed.IsSet())
   184  		panicif.True(elem.t.closed.IsSet())
   185  		if len(plan.byCost[costKey]) >= webseedHostRequestConcurrency {
   186  			continue
   187  		}
   188  		g.MakeMapIfNil(&plan.byCost)
   189  		requestKey := elem.webseedUniqueRequestKey
   190  		plan.byCost[costKey] = append(plan.byCost[costKey], plannedWebseedRequest{
   191  			url:        elem.url,
   192  			t:          elem.t,
   193  			startIndex: elem.startRequest,
   194  			sliceIndex: elem.sliceIndex,
   195  		})
   196  		delete(unwantedExistingRequests, requestKey)
   197  	}
   198  
   199  	// Cancel any existing requests that are no longer wanted.
   200  	for key, value := range unwantedExistingRequests {
   201  		// Should we skip cancelling requests that are ended and just haven't cleaned up yet?
   202  		value.existingWebseedRequest.Cancel("deprioritized", key.t)
   203  	}
   204  
   205  	printPlan := sync.OnceFunc(func() {
   206  		if webseed.PrintDebug {
   207  			//fmt.Println(plan)
   208  			//fmt.Println(formatMap(existingRequests))
   209  		}
   210  	})
   211  
   212  	// TODO: Do we deduplicate requests across different webseeds?
   213  
   214  	for costKey, plannedRequests := range plan.byCost {
   215  		for _, request := range plannedRequests {
   216  			// This could happen if a request is cancelled but hasn't removed itself from the active
   217  			// list yet. This helps with backpressure as the requests can sleep to rate limit.
   218  			if !cl.underWebSeedHttpRequestLimit(costKey) {
   219  				break
   220  			}
   221  			existingRequestKey := request.toChunkedWebseedRequestKey()
   222  			if g.MapContains(existingRequests, existingRequestKey) {
   223  				// A request exists to the webseed slice already. This doesn't check the request
   224  				// indexes match.
   225  
   226  				// Check we didn't just cancel the same request.
   227  				panicif.True(g.MapContains(unwantedExistingRequests, existingRequestKey))
   228  				continue
   229  			}
   230  			t := request.t
   231  			peer := t.webSeeds[request.url]
   232  			panicif.NotEq(peer.hostKey, costKey)
   233  			printPlan()
   234  
   235  			debugLogger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{
   236  				Level:     slog.LevelDebug,
   237  				AddSource: true,
   238  			})).With(
   239  				"webseedUrl", request.url,
   240  				"webseedChunkIndex", request.sliceIndex)
   241  
   242  			begin := request.startIndex
   243  			// TODO: Requests aren't limited by the pieces a peer has.
   244  			end := t.getWebseedRequestEnd(begin, request.sliceIndex, debugLogger)
   245  			panicif.LessThanOrEqual(end, begin)
   246  
   247  			peer.spawnRequest(begin, end, debugLogger)
   248  		}
   249  	}
   250  }
   251  
   252  var shortenWebseedRequests = true
   253  
   254  func init() {
   255  	s, ok := os.LookupEnv("TORRENT_SHORTEN_WEBSEED_REQUESTS")
   256  	if !ok {
   257  		return
   258  	}
   259  	shortenWebseedRequests = s != ""
   260  }
   261  
   262  func (t *Torrent) getWebseedRequestEnd(begin RequestIndex, slice webseedSliceIndex, debugLogger *slog.Logger) RequestIndex {
   263  	chunkEnd := t.endRequestForAlignedWebseedResponse(slice)
   264  	if !shortenWebseedRequests {
   265  		// Pending fix to pendingPieces matching piece request order due to missing initial pieces
   266  		// checks?
   267  		return chunkEnd
   268  	}
   269  	// Shorten webseed requests to avoid being penalized by webseeds for cancelling requests.
   270  	panicif.False(t.wantReceiveChunk(begin))
   271  	var end = begin + 1
   272  	for ; end < chunkEnd && t.wantReceiveChunk(end); end++ {
   273  	}
   274  	panicif.GreaterThan(end, chunkEnd)
   275  	if webseed.PrintDebug && end != chunkEnd {
   276  		debugLogger.Debug(
   277  			"shortened webseed request",
   278  			"from", endExclusiveString(begin, chunkEnd),
   279  			"to", endExclusiveString(begin, end))
   280  	}
   281  	return end
   282  }
   283  
   284  // Cloudflare caches up to 512 MB responses by default. This is also an alignment. Making this
   285  // smaller will allow requests to complete a smaller set of files faster.
   286  var webseedRequestChunkSize = initUIntFromEnv[uint64]("TORRENT_WEBSEED_REQUEST_CHUNK_SIZE", 64<<20, 64)
   287  
   288  // Can return the same as start if the request is at the end of the torrent.
   289  func (t *Torrent) endRequestForAlignedWebseedResponse(slice webseedSliceIndex) RequestIndex {
   290  	end := min(
   291  		t.maxEndRequest(),
   292  		RequestIndex(slice+1)*t.chunksPerAlignedWebseedResponse())
   293  	return end
   294  }
   295  
   296  func (t *Torrent) chunksPerAlignedWebseedResponse() RequestIndex {
   297  	// This is the same as webseedRequestChunkSize, but in terms of RequestIndex.
   298  	return RequestIndex(webseedRequestChunkSize / t.chunkSize.Uint64())
   299  }
   300  
   301  func (t *Torrent) requestIndexToWebseedSliceIndex(requestIndex RequestIndex) webseedSliceIndex {
   302  	return webseedSliceIndex(requestIndex / t.chunksPerAlignedWebseedResponse())
   303  }
   304  
   305  func (cl *Client) dumpCurrentWebseedRequests() {
   306  	if webseed.PrintDebug {
   307  		fmt.Println("current webseed requests:")
   308  		for key, value := range cl.iterCurrentWebseedRequests() {
   309  			fmt.Printf("\t%v: %v, priority %v\n", key, value.existingWebseedRequest, value.priority)
   310  		}
   311  	}
   312  }
   313  
   314  type webseedRequestPlan struct {
   315  	byCost map[webseedHostKeyHandle][]plannedWebseedRequest
   316  }
   317  
   318  // Needed components to generate a webseed request.
   319  type plannedWebseedRequest struct {
   320  	url        webseedUrlKey
   321  	t          *Torrent
   322  	sliceIndex webseedSliceIndex
   323  	startIndex RequestIndex
   324  }
   325  
   326  func (me *plannedWebseedRequest) toChunkedWebseedRequestKey() webseedUniqueRequestKey {
   327  	return webseedUniqueRequestKey{
   328  		url:        me.url,
   329  		t:          me.t,
   330  		sliceIndex: me.sliceIndex,
   331  	}
   332  }
   333  
   334  func (me webseedRequestPlan) String() string {
   335  	var sb strings.Builder
   336  	for costKey, requestKeys := range me.byCost {
   337  		fmt.Fprintf(&sb, "%v\n", costKey.Value())
   338  		for _, requestKey := range requestKeys {
   339  			fmt.Fprintf(&sb, "\t%v\n", requestKey)
   340  		}
   341  	}
   342  	return strings.TrimSuffix(sb.String(), "\n")
   343  }
   344  
   345  // Distinct webseed request data when different offsets are not allowed.
   346  type webseedUniqueRequestKey struct {
   347  	url        webseedUrlKey
   348  	t          *Torrent
   349  	sliceIndex webseedSliceIndex
   350  }
   351  
   352  type aprioriMapValue struct {
   353  	costKey      webseedHostKeyHandle
   354  	priority     PiecePriority
   355  	startRequest RequestIndex
   356  }
   357  
   358  func (me webseedUniqueRequestKey) String() string {
   359  	return fmt.Sprintf("torrent %v: webseed %v: slice %v", me.t, me.url, me.sliceIndex)
   360  }
   361  
   362  // Non-distinct proposed webseed request data.
   363  type webseedRequestOrderValue struct {
   364  	aprioriMapValue
   365  	// Used for cancellation if this is deprioritized. Also, a faster way to sort for existing
   366  	// requests.
   367  	existingWebseedRequest *webseedRequest
   368  }
   369  
   370  func (me webseedRequestOrderValue) String() string {
   371  	return fmt.Sprintf("%#v", me)
   372  }
   373  
   374  // Yields possible webseed requests by piece. Caller should filter and prioritize these.
   375  func (cl *Client) iterPossibleWebseedRequests() iter.Seq2[webseedUniqueRequestKey, aprioriMapValue] {
   376  	return func(yield func(webseedUniqueRequestKey, aprioriMapValue) bool) {
   377  		for key, value := range cl.pieceRequestOrder {
   378  			input := key.getRequestStrategyInput(cl)
   379  			if !requestStrategy.GetRequestablePieces(
   380  				input,
   381  				value.pieces,
   382  				func(ih metainfo.Hash, pieceIndex int, orderState requestStrategy.PieceRequestOrderState) bool {
   383  					t := cl.torrentsByShortHash[ih]
   384  					if len(t.webSeeds) == 0 {
   385  						return true
   386  					}
   387  					p := t.piece(pieceIndex)
   388  					cleanOpt := p.firstCleanChunk()
   389  					if !cleanOpt.Ok {
   390  						return true
   391  					}
   392  					// Pretty sure we want this and not the order state priority. That one is for
   393  					// client piece request order and ignores other states like hashing, marking
   394  					// etc. Order state priority would be faster otherwise.
   395  					priority := p.effectivePriority()
   396  					firstRequest := p.requestIndexBegin() + cleanOpt.Value
   397  					panicif.GreaterThanOrEqual(firstRequest, t.maxEndRequest())
   398  					webseedSliceIndex := t.requestIndexToWebseedSliceIndex(firstRequest)
   399  					for url, ws := range t.webSeeds {
   400  						if ws.suspended() {
   401  							continue
   402  						}
   403  						if !ws.peer.peerHasPiece(pieceIndex) {
   404  							continue
   405  						}
   406  						// Return value from this function (RequestPieceFunc) doesn't terminate
   407  						// iteration, so propagate that to not handling the yield return value.
   408  						if !yield(
   409  							webseedUniqueRequestKey{
   410  								t:          t,
   411  								sliceIndex: webseedSliceIndex,
   412  								url:        url,
   413  							},
   414  							aprioriMapValue{
   415  								priority:     priority,
   416  								costKey:      ws.hostKey,
   417  								startRequest: firstRequest,
   418  							},
   419  						) {
   420  							return false
   421  						}
   422  					}
   423  					return true
   424  				},
   425  			) {
   426  				break
   427  			}
   428  		}
   429  	}
   430  }
   431  
   432  func (cl *Client) updateWebseedRequestsWithReason(reason updateRequestReason) {
   433  	// Should we wrap this with pprof labels?
   434  	cl.scheduleImmediateWebseedRequestUpdate(reason)
   435  }
   436  
   437  // This has awful naming, I'm not quite sure what to call this.
   438  func (cl *Client) yieldKeyAndValue(
   439  	yield func(webseedUniqueRequestKey, webseedRequestOrderValue) bool,
   440  	key webseedUniqueRequestKey,
   441  	ar *webseedRequest,
   442  ) bool {
   443  	t := key.t
   444  	url := key.url
   445  	hostKey := t.webSeeds[url].hostKey
   446  	// Don't spawn requests before old requests are cancelled.
   447  	if false {
   448  		if ar.cancelled.Load() {
   449  			cl.slogger.Debug("iter current webseed requests: skipped cancelled webseed request")
   450  			// This should prevent overlapping webseed requests that are just filling
   451  			// slots waiting to cancel from conflicting.
   452  			return true
   453  		}
   454  	}
   455  	priority := PiecePriorityNone
   456  	if ar.next < ar.end {
   457  		p := t.piece(t.pieceIndexOfRequestIndex(ar.next))
   458  		priority = p.effectivePriority()
   459  	}
   460  	return yield(
   461  		webseedUniqueRequestKey{
   462  			t:          t,
   463  			sliceIndex: t.requestIndexToWebseedSliceIndex(ar.begin),
   464  			url:        url,
   465  		},
   466  		webseedRequestOrderValue{
   467  			aprioriMapValue{
   468  				priority:     priority,
   469  				costKey:      hostKey,
   470  				startRequest: ar.next,
   471  			},
   472  			ar,
   473  		},
   474  	)
   475  }
   476  
   477  func (cl *Client) iterCurrentWebseedRequestsFromClient() iter.Seq2[webseedUniqueRequestKey, webseedRequestOrderValue] {
   478  	return func(yield func(webseedUniqueRequestKey, webseedRequestOrderValue) bool) {
   479  		for key, ar := range cl.activeWebseedRequests {
   480  			if !cl.yieldKeyAndValue(yield, key, ar) {
   481  				return
   482  			}
   483  		}
   484  	}
   485  }
   486  
   487  // This exists to compare old behaviour with Client active requests.
   488  func (cl *Client) iterCurrentWebseedRequests() iter.Seq2[webseedUniqueRequestKey, webseedRequestOrderValue] {
   489  	return func(yield func(webseedUniqueRequestKey, webseedRequestOrderValue) bool) {
   490  		for t := range cl.torrents {
   491  			for url, ws := range t.webSeeds {
   492  				for ar := range ws.activeRequests {
   493  					key := webseedUniqueRequestKey{
   494  						t:          t,
   495  						sliceIndex: t.requestIndexToWebseedSliceIndex(ar.begin),
   496  						url:        url,
   497  					}
   498  					if !cl.yieldKeyAndValue(yield, key, ar) {
   499  						return
   500  					}
   501  				}
   502  			}
   503  		}
   504  	}
   505  }
   506  
   507  func (cl *Client) scheduleImmediateWebseedRequestUpdate(reason updateRequestReason) {
   508  	if !cl.webseedRequestTimer.Stop() {
   509  		// Timer function already running, let it do its thing.
   510  		return
   511  	}
   512  	// Set the timer to fire right away (this will coalesce consecutive updates without forcing an
   513  	// update on every call to this method). Since we're holding the Client lock, and we cancelled
   514  	// the timer, and it wasn't active, nobody else should have reset it before us. Do we need to
   515  	// introduce a "reason" field here, (albeit Client-level?).
   516  	cl.webseedUpdateReason = cmp.Or(cl.webseedUpdateReason, reason)
   517  	panicif.True(cl.webseedRequestTimer.Reset(0))
   518  }
   519  
   520  func (cl *Client) updateWebseedRequestsTimerFunc() {
   521  	if cl.closed.IsSet() {
   522  		return
   523  	}
   524  	// This won't get set elsewhere if the timer has fired, which it has for us to be here.
   525  	cl.webseedUpdateReason = cmp.Or(cl.webseedUpdateReason, "timer")
   526  	cl.lock()
   527  	defer cl.unlock()
   528  	cl.updateWebseedRequestsAndResetTimer()
   529  }
   530  
   531  func (cl *Client) updateWebseedRequestsAndResetTimer() {
   532  	pprof.Do(context.Background(), pprof.Labels(
   533  		"reason", string(cl.webseedUpdateReason),
   534  	), func(_ context.Context) {
   535  		started := time.Now()
   536  		reason := cl.webseedUpdateReason
   537  		cl.webseedUpdateReason = ""
   538  		cl.updateWebseedRequests()
   539  		panicif.NotZero(cl.webseedUpdateReason)
   540  		if webseed.PrintDebug {
   541  			now := time.Now()
   542  			fmt.Printf("%v: updateWebseedRequests took %v (reason: %v)\n", now, now.Sub(started), reason)
   543  		}
   544  	})
   545  	// Timer should always be stopped before the last call. TODO: Don't reset timer if there's
   546  	// nothing to do (no possible requests in update).
   547  	panicif.True(cl.webseedRequestTimer.Reset(webseedRequestUpdateTimerInterval))
   548  
   549  }
   550  
   551  type endExclusive[T any] struct {
   552  	start, end T
   553  }
   554  
   555  func (me endExclusive[T]) String() string {
   556  	return fmt.Sprintf("[%v-%v)", me.start, me.end)
   557  }
   558  
   559  func endExclusiveString[T any](start, end T) string {
   560  	return endExclusive[T]{start, end}.String()
   561  }