github.com/anacrolix/torrent@v1.61.0/webseed-requesting.go (about) 1 package torrent 2 3 import ( 4 "bytes" 5 "cmp" 6 "context" 7 "fmt" 8 "iter" 9 "log/slog" 10 "maps" 11 "os" 12 "runtime/pprof" 13 "slices" 14 "strings" 15 "sync" 16 "time" 17 "unique" 18 19 g "github.com/anacrolix/generics" 20 "github.com/anacrolix/generics/heap" 21 "github.com/anacrolix/missinggo/v2/panicif" 22 "github.com/anacrolix/torrent/internal/extracmp" 23 "github.com/davecgh/go-spew/spew" 24 25 "github.com/anacrolix/torrent/internal/request-strategy" 26 "github.com/anacrolix/torrent/metainfo" 27 "github.com/anacrolix/torrent/webseed" 28 ) 29 30 // Default is based on experience with CloudFlare. 31 var webseedHostRequestConcurrency = initIntFromEnv("TORRENT_WEBSEED_HOST_REQUEST_CONCURRENCY", 25, 0) 32 33 type ( 34 webseedHostKey string 35 webseedHostKeyHandle = unique.Handle[webseedHostKey] 36 webseedUrlKey unique.Handle[string] 37 ) 38 39 func (me webseedUrlKey) Value() string { 40 return unique.Handle[string](me).Value() 41 } 42 43 func (me webseedUrlKey) String() string { 44 return me.Value() 45 } 46 47 type webseedRequestHeapElem struct { 48 webseedUniqueRequestKey 49 webseedRequestOrderValue 50 // Not sure this is even worth it now. 51 mightHavePartialFiles bool 52 } 53 54 /* 55 - Go through all the requestable pieces in order of priority, availability, whether there are peer requests, partial, infohash. 56 - For each piece calculate files involved. Record each file not seen before and the piece index. 57 - Cancel any outstanding requests that don't match a final file/piece-index pair. 58 - Initiate missing requests that fit into the available limits. 59 */ 60 func (cl *Client) updateWebseedRequests() { 61 existingRequests := maps.Collect(cl.iterCurrentWebseedRequestsFromClient()) 62 panicif.False(maps.Equal(existingRequests, maps.Collect(cl.iterCurrentWebseedRequests()))) 63 64 g.MakeMapIfNil(&cl.aprioriMap) 65 aprioriMap := cl.aprioriMap 66 clear(aprioriMap) 67 for uniqueKey, value := range cl.iterPossibleWebseedRequests() { 68 //if len(aprioriMap) >= webseedHostRequestConcurrency { 69 // break 70 //} 71 if g.MapContains(existingRequests, uniqueKey) { 72 continue 73 } 74 cur, ok := aprioriMap[uniqueKey] 75 if ok { 76 // Shared in the lookup above. 77 t := uniqueKey.t 78 // TODO: Change to "slice has requests" 79 hasPeerConnRequest := func(reqIndex RequestIndex) bool { 80 return t.requestingPeer(reqIndex) != nil 81 } 82 // Skip the webseed request unless it has a higher priority, is less requested by peer 83 // conns, or has a lower start offset. Including peer conn requests here will bump 84 // webseed requests in favour of peer conns unless there's nothing else to do. 85 if cmp.Or( 86 cmp.Compare(value.priority, cur.priority), 87 extracmp.CompareBool(hasPeerConnRequest(cur.startRequest), hasPeerConnRequest(value.startRequest)), 88 cmp.Compare(cur.startRequest, value.startRequest), 89 ) <= 0 { 90 continue 91 } 92 } 93 aprioriMap[uniqueKey] = value 94 } 95 96 heapSlice := cl.heapSlice[:0] 97 requiredCap := len(aprioriMap) + len(existingRequests) 98 if cap(heapSlice) < requiredCap { 99 heapSlice = slices.Grow(heapSlice, requiredCap-cap(heapSlice)) 100 } 101 defer func() { 102 // Will this let GC collect values? If not do we need to clear? :( 103 cl.heapSlice = heapSlice[:0] 104 }() 105 106 for key, value := range aprioriMap { 107 // Should be filtered earlier. 108 panicif.True(g.MapContains(existingRequests, key)) 109 heapSlice = append(heapSlice, webseedRequestHeapElem{ 110 key, 111 webseedRequestOrderValue{ 112 aprioriMapValue: value, 113 }, 114 key.t.filesInRequestRangeMightBePartial( 115 value.startRequest, 116 key.t.endRequestForAlignedWebseedResponse(key.sliceIndex), 117 ), 118 }) 119 } 120 121 // Add remaining existing requests. 122 for key, value := range existingRequests { 123 // Don't reconsider existing requests that aren't wanted anymore. 124 if key.t.dataDownloadDisallowed.IsSet() { 125 continue 126 } 127 wr := value.existingWebseedRequest 128 heapSlice = append(heapSlice, webseedRequestHeapElem{ 129 key, 130 value, 131 key.t.filesInRequestRangeMightBePartial(wr.next, wr.end), 132 }) 133 } 134 135 aprioriHeap := heap.InterfaceForSlice( 136 &heapSlice, 137 func(l webseedRequestHeapElem, r webseedRequestHeapElem) bool { 138 // Not stable ordering but being sticky to existing webseeds should be enough. 139 ret := cmp.Or( 140 // Prefer highest priority 141 -cmp.Compare(l.priority, r.priority), 142 // Then existing requests 143 extracmp.CompareBool(l.existingWebseedRequest == nil, r.existingWebseedRequest == nil), 144 // Prefer not competing with active peer connections. 145 cmp.Compare(len(l.t.conns), len(r.t.conns)), 146 // Try to complete partial slices first. 147 -extracmp.CompareBool(l.mightHavePartialFiles, r.mightHavePartialFiles), 148 // No need to prefer longer files anymore now that we're using slices? 149 //// Longer files first. 150 //-cmp.Compare(l.longestFile().Unwrap(), r.longestFile().Unwrap()), 151 // Easier to debug than infohashes... 152 cmp.Compare(l.t.info.Name, r.t.info.Name), 153 bytes.Compare(l.t.canonicalShortInfohash()[:], r.t.canonicalShortInfohash()[:]), 154 // Doing earlier chunks first means more compact files for partial file hashing. 155 cmp.Compare(l.sliceIndex, r.sliceIndex), 156 ) 157 // Requests should be unique unless they're for different URLs. 158 if ret == 0 && l.url == r.url { 159 cfg := spew.NewDefaultConfig() 160 cfg.Dump(l) 161 cfg.Dump(r) 162 panic("webseed request heap ordering is not stable") 163 } 164 return ret < 0 165 }, 166 ) 167 168 unwantedExistingRequests := maps.Clone(existingRequests) 169 170 heap.Init(aprioriHeap) 171 var plan webseedRequestPlan 172 // Could also return early here if all known costKeys are fully assigned. 173 for aprioriHeap.Len() > 0 { 174 elem := heap.Pop(aprioriHeap) 175 // Pulling the pregenerated form avoids unique.Handle, and possible URL parsing and error 176 // handling overhead. Need the value to avoid looking this up again. 177 costKey := elem.costKey 178 panicif.Zero(costKey) 179 if elem.existingWebseedRequest == nil { 180 // Existing requests might be within the allowed discard range. 181 panicif.Eq(elem.priority, PiecePriorityNone) 182 } 183 panicif.True(elem.t.dataDownloadDisallowed.IsSet()) 184 panicif.True(elem.t.closed.IsSet()) 185 if len(plan.byCost[costKey]) >= webseedHostRequestConcurrency { 186 continue 187 } 188 g.MakeMapIfNil(&plan.byCost) 189 requestKey := elem.webseedUniqueRequestKey 190 plan.byCost[costKey] = append(plan.byCost[costKey], plannedWebseedRequest{ 191 url: elem.url, 192 t: elem.t, 193 startIndex: elem.startRequest, 194 sliceIndex: elem.sliceIndex, 195 }) 196 delete(unwantedExistingRequests, requestKey) 197 } 198 199 // Cancel any existing requests that are no longer wanted. 200 for key, value := range unwantedExistingRequests { 201 // Should we skip cancelling requests that are ended and just haven't cleaned up yet? 202 value.existingWebseedRequest.Cancel("deprioritized", key.t) 203 } 204 205 printPlan := sync.OnceFunc(func() { 206 if webseed.PrintDebug { 207 //fmt.Println(plan) 208 //fmt.Println(formatMap(existingRequests)) 209 } 210 }) 211 212 // TODO: Do we deduplicate requests across different webseeds? 213 214 for costKey, plannedRequests := range plan.byCost { 215 for _, request := range plannedRequests { 216 // This could happen if a request is cancelled but hasn't removed itself from the active 217 // list yet. This helps with backpressure as the requests can sleep to rate limit. 218 if !cl.underWebSeedHttpRequestLimit(costKey) { 219 break 220 } 221 existingRequestKey := request.toChunkedWebseedRequestKey() 222 if g.MapContains(existingRequests, existingRequestKey) { 223 // A request exists to the webseed slice already. This doesn't check the request 224 // indexes match. 225 226 // Check we didn't just cancel the same request. 227 panicif.True(g.MapContains(unwantedExistingRequests, existingRequestKey)) 228 continue 229 } 230 t := request.t 231 peer := t.webSeeds[request.url] 232 panicif.NotEq(peer.hostKey, costKey) 233 printPlan() 234 235 debugLogger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{ 236 Level: slog.LevelDebug, 237 AddSource: true, 238 })).With( 239 "webseedUrl", request.url, 240 "webseedChunkIndex", request.sliceIndex) 241 242 begin := request.startIndex 243 // TODO: Requests aren't limited by the pieces a peer has. 244 end := t.getWebseedRequestEnd(begin, request.sliceIndex, debugLogger) 245 panicif.LessThanOrEqual(end, begin) 246 247 peer.spawnRequest(begin, end, debugLogger) 248 } 249 } 250 } 251 252 var shortenWebseedRequests = true 253 254 func init() { 255 s, ok := os.LookupEnv("TORRENT_SHORTEN_WEBSEED_REQUESTS") 256 if !ok { 257 return 258 } 259 shortenWebseedRequests = s != "" 260 } 261 262 func (t *Torrent) getWebseedRequestEnd(begin RequestIndex, slice webseedSliceIndex, debugLogger *slog.Logger) RequestIndex { 263 chunkEnd := t.endRequestForAlignedWebseedResponse(slice) 264 if !shortenWebseedRequests { 265 // Pending fix to pendingPieces matching piece request order due to missing initial pieces 266 // checks? 267 return chunkEnd 268 } 269 // Shorten webseed requests to avoid being penalized by webseeds for cancelling requests. 270 panicif.False(t.wantReceiveChunk(begin)) 271 var end = begin + 1 272 for ; end < chunkEnd && t.wantReceiveChunk(end); end++ { 273 } 274 panicif.GreaterThan(end, chunkEnd) 275 if webseed.PrintDebug && end != chunkEnd { 276 debugLogger.Debug( 277 "shortened webseed request", 278 "from", endExclusiveString(begin, chunkEnd), 279 "to", endExclusiveString(begin, end)) 280 } 281 return end 282 } 283 284 // Cloudflare caches up to 512 MB responses by default. This is also an alignment. Making this 285 // smaller will allow requests to complete a smaller set of files faster. 286 var webseedRequestChunkSize = initUIntFromEnv[uint64]("TORRENT_WEBSEED_REQUEST_CHUNK_SIZE", 64<<20, 64) 287 288 // Can return the same as start if the request is at the end of the torrent. 289 func (t *Torrent) endRequestForAlignedWebseedResponse(slice webseedSliceIndex) RequestIndex { 290 end := min( 291 t.maxEndRequest(), 292 RequestIndex(slice+1)*t.chunksPerAlignedWebseedResponse()) 293 return end 294 } 295 296 func (t *Torrent) chunksPerAlignedWebseedResponse() RequestIndex { 297 // This is the same as webseedRequestChunkSize, but in terms of RequestIndex. 298 return RequestIndex(webseedRequestChunkSize / t.chunkSize.Uint64()) 299 } 300 301 func (t *Torrent) requestIndexToWebseedSliceIndex(requestIndex RequestIndex) webseedSliceIndex { 302 return webseedSliceIndex(requestIndex / t.chunksPerAlignedWebseedResponse()) 303 } 304 305 func (cl *Client) dumpCurrentWebseedRequests() { 306 if webseed.PrintDebug { 307 fmt.Println("current webseed requests:") 308 for key, value := range cl.iterCurrentWebseedRequests() { 309 fmt.Printf("\t%v: %v, priority %v\n", key, value.existingWebseedRequest, value.priority) 310 } 311 } 312 } 313 314 type webseedRequestPlan struct { 315 byCost map[webseedHostKeyHandle][]plannedWebseedRequest 316 } 317 318 // Needed components to generate a webseed request. 319 type plannedWebseedRequest struct { 320 url webseedUrlKey 321 t *Torrent 322 sliceIndex webseedSliceIndex 323 startIndex RequestIndex 324 } 325 326 func (me *plannedWebseedRequest) toChunkedWebseedRequestKey() webseedUniqueRequestKey { 327 return webseedUniqueRequestKey{ 328 url: me.url, 329 t: me.t, 330 sliceIndex: me.sliceIndex, 331 } 332 } 333 334 func (me webseedRequestPlan) String() string { 335 var sb strings.Builder 336 for costKey, requestKeys := range me.byCost { 337 fmt.Fprintf(&sb, "%v\n", costKey.Value()) 338 for _, requestKey := range requestKeys { 339 fmt.Fprintf(&sb, "\t%v\n", requestKey) 340 } 341 } 342 return strings.TrimSuffix(sb.String(), "\n") 343 } 344 345 // Distinct webseed request data when different offsets are not allowed. 346 type webseedUniqueRequestKey struct { 347 url webseedUrlKey 348 t *Torrent 349 sliceIndex webseedSliceIndex 350 } 351 352 type aprioriMapValue struct { 353 costKey webseedHostKeyHandle 354 priority PiecePriority 355 startRequest RequestIndex 356 } 357 358 func (me webseedUniqueRequestKey) String() string { 359 return fmt.Sprintf("torrent %v: webseed %v: slice %v", me.t, me.url, me.sliceIndex) 360 } 361 362 // Non-distinct proposed webseed request data. 363 type webseedRequestOrderValue struct { 364 aprioriMapValue 365 // Used for cancellation if this is deprioritized. Also, a faster way to sort for existing 366 // requests. 367 existingWebseedRequest *webseedRequest 368 } 369 370 func (me webseedRequestOrderValue) String() string { 371 return fmt.Sprintf("%#v", me) 372 } 373 374 // Yields possible webseed requests by piece. Caller should filter and prioritize these. 375 func (cl *Client) iterPossibleWebseedRequests() iter.Seq2[webseedUniqueRequestKey, aprioriMapValue] { 376 return func(yield func(webseedUniqueRequestKey, aprioriMapValue) bool) { 377 for key, value := range cl.pieceRequestOrder { 378 input := key.getRequestStrategyInput(cl) 379 if !requestStrategy.GetRequestablePieces( 380 input, 381 value.pieces, 382 func(ih metainfo.Hash, pieceIndex int, orderState requestStrategy.PieceRequestOrderState) bool { 383 t := cl.torrentsByShortHash[ih] 384 if len(t.webSeeds) == 0 { 385 return true 386 } 387 p := t.piece(pieceIndex) 388 cleanOpt := p.firstCleanChunk() 389 if !cleanOpt.Ok { 390 return true 391 } 392 // Pretty sure we want this and not the order state priority. That one is for 393 // client piece request order and ignores other states like hashing, marking 394 // etc. Order state priority would be faster otherwise. 395 priority := p.effectivePriority() 396 firstRequest := p.requestIndexBegin() + cleanOpt.Value 397 panicif.GreaterThanOrEqual(firstRequest, t.maxEndRequest()) 398 webseedSliceIndex := t.requestIndexToWebseedSliceIndex(firstRequest) 399 for url, ws := range t.webSeeds { 400 if ws.suspended() { 401 continue 402 } 403 if !ws.peer.peerHasPiece(pieceIndex) { 404 continue 405 } 406 // Return value from this function (RequestPieceFunc) doesn't terminate 407 // iteration, so propagate that to not handling the yield return value. 408 if !yield( 409 webseedUniqueRequestKey{ 410 t: t, 411 sliceIndex: webseedSliceIndex, 412 url: url, 413 }, 414 aprioriMapValue{ 415 priority: priority, 416 costKey: ws.hostKey, 417 startRequest: firstRequest, 418 }, 419 ) { 420 return false 421 } 422 } 423 return true 424 }, 425 ) { 426 break 427 } 428 } 429 } 430 } 431 432 func (cl *Client) updateWebseedRequestsWithReason(reason updateRequestReason) { 433 // Should we wrap this with pprof labels? 434 cl.scheduleImmediateWebseedRequestUpdate(reason) 435 } 436 437 // This has awful naming, I'm not quite sure what to call this. 438 func (cl *Client) yieldKeyAndValue( 439 yield func(webseedUniqueRequestKey, webseedRequestOrderValue) bool, 440 key webseedUniqueRequestKey, 441 ar *webseedRequest, 442 ) bool { 443 t := key.t 444 url := key.url 445 hostKey := t.webSeeds[url].hostKey 446 // Don't spawn requests before old requests are cancelled. 447 if false { 448 if ar.cancelled.Load() { 449 cl.slogger.Debug("iter current webseed requests: skipped cancelled webseed request") 450 // This should prevent overlapping webseed requests that are just filling 451 // slots waiting to cancel from conflicting. 452 return true 453 } 454 } 455 priority := PiecePriorityNone 456 if ar.next < ar.end { 457 p := t.piece(t.pieceIndexOfRequestIndex(ar.next)) 458 priority = p.effectivePriority() 459 } 460 return yield( 461 webseedUniqueRequestKey{ 462 t: t, 463 sliceIndex: t.requestIndexToWebseedSliceIndex(ar.begin), 464 url: url, 465 }, 466 webseedRequestOrderValue{ 467 aprioriMapValue{ 468 priority: priority, 469 costKey: hostKey, 470 startRequest: ar.next, 471 }, 472 ar, 473 }, 474 ) 475 } 476 477 func (cl *Client) iterCurrentWebseedRequestsFromClient() iter.Seq2[webseedUniqueRequestKey, webseedRequestOrderValue] { 478 return func(yield func(webseedUniqueRequestKey, webseedRequestOrderValue) bool) { 479 for key, ar := range cl.activeWebseedRequests { 480 if !cl.yieldKeyAndValue(yield, key, ar) { 481 return 482 } 483 } 484 } 485 } 486 487 // This exists to compare old behaviour with Client active requests. 488 func (cl *Client) iterCurrentWebseedRequests() iter.Seq2[webseedUniqueRequestKey, webseedRequestOrderValue] { 489 return func(yield func(webseedUniqueRequestKey, webseedRequestOrderValue) bool) { 490 for t := range cl.torrents { 491 for url, ws := range t.webSeeds { 492 for ar := range ws.activeRequests { 493 key := webseedUniqueRequestKey{ 494 t: t, 495 sliceIndex: t.requestIndexToWebseedSliceIndex(ar.begin), 496 url: url, 497 } 498 if !cl.yieldKeyAndValue(yield, key, ar) { 499 return 500 } 501 } 502 } 503 } 504 } 505 } 506 507 func (cl *Client) scheduleImmediateWebseedRequestUpdate(reason updateRequestReason) { 508 if !cl.webseedRequestTimer.Stop() { 509 // Timer function already running, let it do its thing. 510 return 511 } 512 // Set the timer to fire right away (this will coalesce consecutive updates without forcing an 513 // update on every call to this method). Since we're holding the Client lock, and we cancelled 514 // the timer, and it wasn't active, nobody else should have reset it before us. Do we need to 515 // introduce a "reason" field here, (albeit Client-level?). 516 cl.webseedUpdateReason = cmp.Or(cl.webseedUpdateReason, reason) 517 panicif.True(cl.webseedRequestTimer.Reset(0)) 518 } 519 520 func (cl *Client) updateWebseedRequestsTimerFunc() { 521 if cl.closed.IsSet() { 522 return 523 } 524 // This won't get set elsewhere if the timer has fired, which it has for us to be here. 525 cl.webseedUpdateReason = cmp.Or(cl.webseedUpdateReason, "timer") 526 cl.lock() 527 defer cl.unlock() 528 cl.updateWebseedRequestsAndResetTimer() 529 } 530 531 func (cl *Client) updateWebseedRequestsAndResetTimer() { 532 pprof.Do(context.Background(), pprof.Labels( 533 "reason", string(cl.webseedUpdateReason), 534 ), func(_ context.Context) { 535 started := time.Now() 536 reason := cl.webseedUpdateReason 537 cl.webseedUpdateReason = "" 538 cl.updateWebseedRequests() 539 panicif.NotZero(cl.webseedUpdateReason) 540 if webseed.PrintDebug { 541 now := time.Now() 542 fmt.Printf("%v: updateWebseedRequests took %v (reason: %v)\n", now, now.Sub(started), reason) 543 } 544 }) 545 // Timer should always be stopped before the last call. TODO: Don't reset timer if there's 546 // nothing to do (no possible requests in update). 547 panicif.True(cl.webseedRequestTimer.Reset(webseedRequestUpdateTimerInterval)) 548 549 } 550 551 type endExclusive[T any] struct { 552 start, end T 553 } 554 555 func (me endExclusive[T]) String() string { 556 return fmt.Sprintf("[%v-%v)", me.start, me.end) 557 } 558 559 func endExclusiveString[T any](start, end T) string { 560 return endExclusive[T]{start, end}.String() 561 }