gitlab.com/SkynetLabs/skyd@v1.6.9/skymodules/renter/projectdownloadworker.go (about) 1 package renter 2 3 import ( 4 "fmt" 5 "math" 6 "reflect" 7 "sort" 8 "sync" 9 "time" 10 11 "github.com/opentracing/opentracing-go" 12 "gitlab.com/NebulousLabs/errors" 13 "gitlab.com/SkynetLabs/skyd/build" 14 "gitlab.com/SkynetLabs/skyd/skymodules" 15 "gitlab.com/SkynetLabs/skyd/skymodules/gouging" 16 "go.sia.tech/siad/modules" 17 "go.sia.tech/siad/types" 18 ) 19 20 // DOWNLOAD CODE IMPROVEMENTS: 21 // 22 // the current design of the download algorithm has some key places where 23 // there's still room for improvement. Below we list some of the ideas that 24 // could contribute to a faster and more robust algorithm. 25 // 26 // 1. optimize the amount of memory allocations by using a sync.Pool: while 27 // benchmarking and profiling the current download algorithm we found that the 28 // downloads are usually bottlencecked by cpu, mostly coming from 29 // runtime.scanobject which indicates the GC is triggered too often. By looking 30 // at the memory profile, both using -inuse_objects and -alloc_objects, we can 31 // see that `managedHandleResponse`, `managedExecuteProgram`, 32 // `managedHasSector`,... all allocate a bunch of memory. In a lot of these 33 // areas we can use a sync.Pool with preallocated memory that gets recycled, 34 // avoiding needless reallocation of new memory. 35 36 // 2. add a fixed cost to account for our own bandwidth expenses: because the 37 // host network on Sia possibly has a lot of very cheap hosts, we should be 38 // offsetting the cost with a fixed cost to account for our own bandwidth 39 // expenses. E.g. if we use a fixed cost of 2$/TB, so ~100SC, then a worker that 40 // cost 1SC becomes 101SC and a worker that costs 100SC becomes 200SC. That 41 // makes it so the difference between those workers is 2x instead of the 100x it 42 // was before the fixed cost. 43 // 44 // 3. add a mechanism to slow down the DTs or switch to different DTs if we have 45 // too many jobs launched at once: the distribution tracker does not take into 46 // account worker load, that means they're chance values become too optimistic, 47 // which might hurt the performance of the algorithm when not taken into 48 // account. 49 // 50 // 4. play with the 50% number, and account for the cost of being unlucky: a 51 // worker set's total chance has to be greater than 50% in order for it be 52 // accepted as a viable worker set, this 50% number is essentially arbitry, and 53 // even at 50% there's still a 50% chance we fall on the other side of the 54 // fence. This should/could be taken into account. 55 // 56 // 5. fix the algorithm that chooses which worker to replace in your set: the 57 // algorithm that decides what worker to replace by a cheaper worker in the 58 // current working set is a bit naievely implemented. Figuring out what worker 59 // to replace can be a very complex algorithm. There's definitely room for 60 // improvement here. 61 // 62 // 6. fix the algorithm that constructs chimeras: chimeras are currently 63 // constructed for every bucket duration, however we could also rebuild 64 // chimeras, or partially rebuild chimeras, when we are swapping out cheaper 65 // workers in the working set. The currently algorithm considers the chimera 66 // worker and its cost as fixed, but that does not necessarily have to be the 67 // case. We can further improve this by swapping out series of workers for 68 // cheaper workers inside of the chimera itself. 69 70 const ( 71 // bucketIndexScanStep defines the step size with which we increment the 72 // bucket index when we try and find the best set the first time around. 73 // This works more or less in a binary search fashion where we try and 74 // quickly approximate the bucket index, and then scan -10|+10 buckets 75 // before and after the index we found. 76 // NOTE: bucketIndexScanStep needs to cleanly divide the number of total 77 // buckets in the distribution tracker. Otherwise we might miss buckets 78 // at the end. 79 bucketIndexScanStep = 10 80 81 // chimeraAvailabilityRateThreshold defines the number that must be reached 82 // when composing a chimera from unresolved workers. If the sum of the 83 // availability rate of each worker reaches this threshold we build a 84 // chimera out of them. 85 chimeraAvailabilityRateThreshold = float64(2) 86 ) 87 88 var ( 89 // maxWaitUnresolvedWorkerUpdate defines the maximum amount of time we want 90 // to wait for unresolved workers to become resolved before trying to 91 // recreate the worker set. 92 // 93 // maxWaitUpdateWorkers defines the maximum amount of time we want to wait 94 // for workers to be updated. 95 // 96 // NOTE: these variables are lowered in test environment currently to avoid 97 // a large amount of parallel downloads. We've found that the host is 98 // currently facing a locking issue causing slow reads on the CI when 99 // there's a lot of parallel reads taking place. This issue is tackled by 100 // the following PR https://github.com/SiaFoundation/siad/pull/50 101 // (partially) and thus this build var should be removed again when that is 102 // merged and rolled out fully. 103 maxWaitUpdateWorkers = build.Select(build.Var{ 104 Standard: 25 * time.Millisecond, 105 Dev: 25 * time.Millisecond, 106 Testing: 250 * time.Millisecond, 107 }).(time.Duration) 108 maxWaitUnresolvedWorkerUpdate = build.Select(build.Var{ 109 Standard: 50 * time.Millisecond, 110 Dev: 50 * time.Millisecond, 111 Testing: 250 * time.Millisecond, 112 }).(time.Duration) 113 ) 114 115 // NOTE: all of the following defined types are used by the PDC, which is 116 // inherently thread un-safe, that means that these types don't not need to be 117 // thread safe either. If fields are marked `static` it is meant to signal they 118 // won't change after being set. 119 type ( 120 // downloadWorker is an interface implemented by both the individual and 121 // chimera workers that represents a worker that can be used for downloads. 122 downloadWorker interface { 123 // completeChanceCached returns the chance this download worker 124 // completes a read within a certain duration. This value is cached and 125 // gets recomputed for every bucket index (which represents a duration). 126 // The value returned here is recomputed by 'recalculateCompleteChance'. 127 completeChanceCached() float64 128 129 // cost returns the expected job cost for downloading a piece. If the 130 // worker has already been launched, its cost will be zero. 131 cost() float64 132 133 // getPieceForDownload returns the piece to download next 134 getPieceForDownload() uint64 135 136 // identifier returns a unique identifier for the download worker, this 137 // identifier can be used as a key when mapping the download worker 138 identifier() uint32 139 140 // markPieceForDownload allows specifying what piece to download for 141 // this worker in the case the worker resolved multiple pieces 142 markPieceForDownload(pieceIndex uint64) 143 144 // pieces returns all piece indices this worker can resolve 145 pieces(pdc *projectDownloadChunk) []uint64 146 147 // worker returns the underlying worker 148 worker() *worker 149 } 150 151 // chimeraWorker is a worker that's built from unresolved workers until the 152 // chance it has a piece is at least 'chimeraAvailabilityRateThreshold'. At 153 // that point we can treat the chimera worker the same as a resolved worker 154 // in the download algorithm that constructs the best worker set. 155 chimeraWorker struct { 156 // staticChanceComplete is the chance this worker completes after the 157 // duration at which this chimera worker was built. 158 staticChanceComplete float64 159 160 // staticCost returns the cost of the chimera worker, which is the 161 // average cost taken across all workers this chimera worker is 162 // comprised of, it is static because it never gets updated after the 163 // chimera is finalized and this field is calculated 164 staticCost float64 165 166 // staticIdentifier uniquely identifies the chimera worker 167 staticIdentifier uint32 168 } 169 170 // individualWorker represents a single worker object, both resolved and 171 // unresolved workers in the pdc can be represented by an individual worker. 172 // An individual worker can be used to build a chimera worker with. For 173 // every useful worker in the workerpool, an individual worker is created, 174 // this worker is update as the download progresses with information from 175 // the PCWS (resolved status and pieces). 176 individualWorker struct { 177 // the following fields are cached and recalculated at exact times 178 // within the download code algorithm 179 // 180 // cachedCompleteChance is the chance the worker completes after the 181 // current duration with which this value was recalculated 182 // 183 // cachedLookupIndex is the index corresponding to the estimated 184 // duration of the lookup DT. 185 // 186 // cachedReadDTChances is the cached chances value of the read DT 187 // 188 // cachedReadDTChancesInitialized is used to prevent needless 189 // recalculating the read DT chances, if a worker is resolved but not 190 // launched, its read DT chances do not change as they don't shift. 191 cachedCompleteChance float64 192 cachedLookupIndex int 193 cachedReadDTChances skymodules.Chances 194 cachedReadDTChancesInitialized bool 195 196 // the following fields are continuously updated on the worker, all 197 // individual workers are not resolved initially, when a worker resolves 198 // the piece indices are updated and the resolved status is adjusted 199 pieceIndices []uint64 200 resolved bool 201 202 // onCoolDown is a flag that indicates whether this worker's HS or RS 203 // queues are on cooldown, a worker with a queue on cooldown is not 204 // necessarily discounted as not useful for downloading, instead it's 205 // marked as on cooldown and only used if it comes off of cooldown 206 onCoolDown bool 207 208 // currentPiece is the piece that was marked by the download algorithm 209 // as the piece to download next, this is used to ensure that workers 210 // in the worker are not selected for duplicate piece indices. 211 currentPiece uint64 212 currentPieceLaunchedAt time.Time 213 214 // static fields on the individual worker 215 staticAvailabilityRate float64 216 staticCost float64 217 staticDownloadLaunchTime time.Time 218 staticIdentifier uint32 219 staticLookupDistribution skymodules.Distribution 220 staticReadDistribution skymodules.Distribution 221 staticWorker *worker 222 } 223 224 // workerSet is a collection of workers that may or may not have been 225 // launched yet in order to fulfil a download. 226 workerSet struct { 227 workers []downloadWorker 228 229 staticBucketDuration time.Duration 230 staticBucketIndex int 231 staticMinPieces int 232 staticNumOverdrive int 233 234 staticPDC *projectDownloadChunk 235 } 236 237 // coinflips is a collection of chances where every item is the chance the 238 // coin will turn up heads. We use the concept of a coin because it allows 239 // to more easily reason about chance calculations. 240 coinflips []float64 241 ) 242 243 // BufferedDownloadState is a helper type which contains fields which we only 244 // want to allocate once and then reuse between iterations of the download 245 // algorithm for optimization reasons. 246 type bufferedDownloadState struct { 247 downloadWorkers []downloadWorker 248 mostLikely []downloadWorker 249 lessLikely []downloadWorker 250 pieces map[uint64]struct{} 251 added map[uint32]struct{} 252 sortedDownloadWorkers sortedDownloadWorkers 253 } 254 255 // Reset resets the download state without freeing its memory for the next 256 // iteration of the loop. 257 func (ds *bufferedDownloadState) Reset() { 258 ds.downloadWorkers = ds.downloadWorkers[:0] 259 ds.mostLikely = ds.mostLikely[:0] 260 ds.lessLikely = ds.lessLikely[:0] 261 ds.sortedDownloadWorkers = ds.sortedDownloadWorkers[:0] 262 ds.pieces = make(map[uint64]struct{}, len(ds.pieces)) 263 ds.added = make(map[uint32]struct{}, len(ds.added)) 264 } 265 266 // pdcGougingCache is a helper type to cache pdc gouging results for workers. 267 type pdcGougingCache struct { 268 staticCache map[string]pdcGougingResult 269 mu sync.Mutex 270 } 271 272 // pdcGougingResults contains a project download chunk gouging result for a 273 // given allowance and price table ID. 274 type pdcGougingResult struct { 275 staticAllowance skymodules.Allowance 276 staticPTID modules.UniqueID 277 staticIsGouging error 278 } 279 280 // pcwsGougingCache is a helper type to cache pcws gouging results for workers. 281 type pcwsGougingCache struct { 282 staticCache map[string]map[int]pcwsGougingResult 283 mu sync.Mutex 284 } 285 286 // pcwsGougingResult contains a project chunk workerset gouging result for a 287 // given allowance, number of workers and price table. 288 type pcwsGougingResult struct { 289 staticAllowance skymodules.Allowance 290 staticNumWorkers int 291 staticPTID modules.UniqueID 292 staticIsGouging error 293 } 294 295 // sortedDownloadWorker is a helper type for working workers by completeChance. 296 type sortedDownloadWorker struct { 297 originalIndex int 298 completeChance float64 299 } 300 301 // sortedDownloadWorkers is a helper type to implement the sort.Interface 302 // interface. 303 type sortedDownloadWorkers []sortedDownloadWorker 304 305 // Len returns the length of the slice. 306 func (sdw sortedDownloadWorkers) Len() int { return len(sdw) } 307 308 // Less returns whether the completeChance at index i is less than at index j. 309 func (sdw sortedDownloadWorkers) Less(i, j int) bool { 310 return sdw[i].completeChance > sdw[j].completeChance 311 } 312 313 // Swap swaps two workers in the slice. 314 func (sdw sortedDownloadWorkers) Swap(i, j int) { 315 sdw[i], sdw[j] = sdw[j], sdw[i] 316 } 317 318 // sortedIndividualWorkers is a helper type to implement the sort.Interface 319 // interface. 320 type sortedIndividualWorkers []*individualWorker 321 322 // Len returns the length of the slice. 323 func (siw sortedIndividualWorkers) Len() int { return len(siw) } 324 325 // Less returns whether the cachedCompleteChance at index i is less than at 326 // index j. 327 func (siw sortedIndividualWorkers) Less(i, j int) bool { 328 eRTI := siw[i].cachedCompleteChance 329 eRTJ := siw[j].cachedCompleteChance 330 return eRTI > eRTJ 331 } 332 333 // Swap swaps two workers in the slice. 334 func (siw sortedIndividualWorkers) Swap(i, j int) { 335 siw[i], siw[j] = siw[j], siw[i] 336 } 337 338 // NewChimeraWorker returns a new chimera worker object. 339 func NewChimeraWorker(workers []*individualWorker, identifier uint32) *chimeraWorker { 340 // calculate the average cost and average (weighted) complete chance 341 var totalCompleteChance float64 342 var totalCost float64 343 for _, w := range workers { 344 // sanity check the worker is unresolved 345 if w.isResolved() { 346 build.Critical("developer error, a chimera is built using unresolved workers only") 347 } 348 349 totalCompleteChance += w.cachedCompleteChance * w.staticAvailabilityRate 350 totalCost += w.staticCost 351 } 352 353 totalWorkers := float64(len(workers)) 354 avgChance := totalCompleteChance / totalWorkers 355 avgCost := totalCost / totalWorkers 356 357 return &chimeraWorker{ 358 staticChanceComplete: avgChance, 359 staticCost: avgCost, 360 staticIdentifier: identifier, 361 } 362 } 363 364 // completeChanceCached returns the chance this chimera completes 365 func (cw *chimeraWorker) completeChanceCached() float64 { 366 return cw.staticChanceComplete 367 } 368 369 // cost returns the cost for this chimera worker, this method can only be called 370 // on a chimera that is finalized 371 func (cw *chimeraWorker) cost() float64 { 372 return cw.staticCost 373 } 374 375 // getPieceForDownload returns the piece to download next, for a chimera worker 376 // this is always 0 and should never be called, which is why we add a 377 // build.Critical to signal developer error. 378 func (cw *chimeraWorker) getPieceForDownload() uint64 { 379 build.Critical("developer error, should not get called on a chimera worker") 380 return 0 381 } 382 383 // identifier returns a unqiue identifier for this worker. 384 func (cw *chimeraWorker) identifier() uint32 { 385 return cw.staticIdentifier 386 } 387 388 // markPieceForDownload takes a piece index and marks it as the piece to 389 // download for this worker. In the case of a chimera worker this method is 390 // essentially a no-op since chimera workers are never launched 391 func (cw *chimeraWorker) markPieceForDownload(pieceIndex uint64) { 392 // this is a no-op 393 } 394 395 // pieces returns the piece indices this worker can download, chimera workers 396 // return all pieces as we don't know yet what pieces they can resolve, note 397 // that all possible piece indices are defined on the pdc to avoid unnecessary 398 // slice allocations for every chimera 399 func (cw *chimeraWorker) pieces(pdc *projectDownloadChunk) []uint64 { 400 return pdc.staticPieceIndices 401 } 402 403 // worker returns the worker, for chimeras this is always nil since it's a 404 // combination of multiple workers 405 func (cw *chimeraWorker) worker() *worker { 406 return nil 407 } 408 409 // cost returns the cost for this worker, depending on whether it is launched or 410 // not it will return either 0, or the static cost variable. 411 func (iw *individualWorker) cost() float64 { 412 if iw.isLaunched() { 413 return 0 414 } 415 return iw.staticCost 416 } 417 418 // recalculateDistributionChances gets called when the download algorithm 419 // decides it has to recalculate the chances that are based on the worker's 420 // distributions. This function will apply the necessary shifts and recalculate 421 // the cached fields. 422 func (iw *individualWorker) recalculateDistributionChances() { 423 // if the read dt chances are not initialized, initialize them first 424 if !iw.cachedReadDTChancesInitialized { 425 iw.cachedReadDTChances = iw.staticReadDistribution.ChancesAfter() 426 iw.cachedReadDTChancesInitialized = true 427 } 428 429 // if the worker is launched, we want to shift the read dt 430 if iw.isLaunched() { 431 readDT := iw.staticReadDistribution.Clone() 432 readDT.Shift(time.Since(iw.currentPieceLaunchedAt)) 433 iw.cachedReadDTChances = readDT.ChancesAfter() 434 } 435 436 // if the worker is not resolved yet, we want to always shift the lookup dt 437 // and use that to recalculate the expected duration index 438 if !iw.isResolved() { 439 shift := time.Since(iw.staticDownloadLaunchTime) 440 ed := iw.staticLookupDistribution.ExpectedDurationWithShift(shift) 441 iw.cachedLookupIndex = skymodules.DistributionBucketIndexForDuration(ed) 442 } 443 } 444 445 // recalculateCompleteChance calculates the chance this worker completes at 446 // given index. This chance is a combination of the chance it resolves and the 447 // chance it completes the read by the given index. The resolve (or lookup) 448 // chance only plays a part for workers that have not resolved yet. 449 // 450 // This function calculates the complete chance by approximation, meaning if we 451 // request the complete chance at 200ms, for an unresolved worker, we will 452 // offset the read chance with the expected duration of the lookup DT. E.g. if 453 // the lookup DT's expected duration is 40ms, we return the complete chance at 454 // 160ms. Instead of durations though, we use the indices that correspond to the 455 // durations. 456 func (iw *individualWorker) recalculateCompleteChance(index int) { 457 // if the worker is resolved, simply return the read chance at given index 458 if iw.isResolved() { 459 iw.cachedCompleteChance = iw.cachedReadDTChances[index] 460 return 461 } 462 463 // if it's not resolved, and the index is smaller than our cached lookup 464 // index, we return a complete chance of zero because it has no chance of 465 // completing since it's not expected to have been resolved yet 466 if index < iw.cachedLookupIndex { 467 iw.cachedCompleteChance = 0 468 return 469 } 470 471 // otherwise return the read chance offset by the lookup index 472 iw.cachedCompleteChance = iw.cachedReadDTChances[index-iw.cachedLookupIndex] 473 } 474 475 // completeChanceCached returns the chance this worker will complete 476 func (iw *individualWorker) completeChanceCached() float64 { 477 return iw.cachedCompleteChance 478 } 479 480 // getPieceForDownload returns the piece to download next 481 func (iw *individualWorker) getPieceForDownload() uint64 { 482 return iw.currentPiece 483 } 484 485 // identifier returns a unqiue identifier for this worker, for an individual 486 // worker this is equal to the short string version of the worker's host pubkey. 487 func (iw *individualWorker) identifier() uint32 { 488 return iw.staticIdentifier 489 } 490 491 // isLaunched returns true when this workers has been launched. 492 func (iw *individualWorker) isLaunched() bool { 493 return !iw.currentPieceLaunchedAt.IsZero() 494 } 495 496 // isOnCooldown returns whether this individual worker is on cooldown. 497 func (iw *individualWorker) isOnCooldown() bool { 498 return iw.onCoolDown 499 } 500 501 // isResolved returns whether this individual worker has resolved. 502 func (iw *individualWorker) isResolved() bool { 503 return iw.resolved 504 } 505 506 // markPieceForDownload takes a piece index and marks it as the piece to 507 // download next for this worker. 508 func (iw *individualWorker) markPieceForDownload(pieceIndex uint64) { 509 // sanity check the given piece is a piece present in the worker's pieces 510 if build.Release == "testing" { 511 var found bool 512 for _, availPieceIndex := range iw.pieceIndices { 513 if pieceIndex == availPieceIndex { 514 found = true 515 break 516 } 517 } 518 if !found { 519 build.Critical(fmt.Sprintf("markPieceForDownload is marking a piece that is not present in the worker's piece indices, %v does not include %v", iw.pieceIndices, pieceIndex)) 520 } 521 } 522 iw.currentPiece = pieceIndex 523 } 524 525 // pieces returns the piece indices this worker can download. 526 func (iw *individualWorker) pieces(_ *projectDownloadChunk) []uint64 { 527 return iw.pieceIndices 528 } 529 530 // worker returns the worker. 531 func (iw *individualWorker) worker() *worker { 532 return iw.staticWorker 533 } 534 535 // clone returns a shallow copy of the worker set. 536 func (ws *workerSet) clone() *workerSet { 537 return &workerSet{ 538 workers: append([]downloadWorker{}, ws.workers...), 539 540 staticBucketDuration: ws.staticBucketDuration, 541 staticBucketIndex: ws.staticBucketIndex, 542 staticMinPieces: ws.staticMinPieces, 543 staticNumOverdrive: ws.staticNumOverdrive, 544 545 staticPDC: ws.staticPDC, 546 } 547 } 548 549 // cheaperSetFromCandidate returns a new worker set if the given candidate 550 // worker can improve the cost of the worker set. The worker that is being 551 // swapped by the candidate is the most expensive worker possible, which is not 552 // necessarily the most expensive worker in the set because we have to take into 553 // account the pieces the worker can download. 554 func (ws *workerSet) cheaperSetFromCandidate(candidate downloadWorker) *workerSet { 555 // convenience variables 556 pdc := ws.staticPDC 557 558 // build two maps for fast lookups 559 originalIndexMap := make(map[uint32]int) 560 piecesToIndexMap := make(map[uint64]int) 561 for i, w := range ws.workers { 562 originalIndexMap[w.identifier()] = i 563 if _, ok := w.(*individualWorker); ok { 564 piecesToIndexMap[w.getPieceForDownload()] = i 565 } 566 } 567 568 // sort the workers by cost, most expensive to cheapest 569 byCostDesc := append([]downloadWorker{}, ws.workers...) 570 sort.Slice(byCostDesc, func(i, j int) bool { 571 wCostI := byCostDesc[i].cost() 572 wCostJ := byCostDesc[j].cost() 573 return wCostI > wCostJ 574 }) 575 576 // range over the workers 577 swapIndex := -1 578 LOOP: 579 for _, w := range byCostDesc { 580 // if the candidate is not cheaper than this worker we can stop looking 581 // to build a cheaper set since the workers are sorted by cost 582 if candidate.cost() >= w.cost() { 583 break 584 } 585 586 // if the current worker is launched, don't swap it out 587 expensiveWorkerPiece, launched, _ := pdc.workerProgress(w) 588 if launched { 589 continue 590 } 591 592 // if the current worker is a chimera worker, and we're cheaper, swap 593 expensiveWorkerIndex := originalIndexMap[w.identifier()] 594 if _, ok := w.(*chimeraWorker); ok { 595 swapIndex = expensiveWorkerIndex 596 break LOOP 597 } 598 599 // range over the candidate's pieces and see whether we can swap 600 for _, piece := range candidate.pieces(pdc) { 601 // if the candidate can download the same piece as the expensive 602 // worker, swap it out because it's cheaper 603 if piece == expensiveWorkerPiece { 604 swapIndex = expensiveWorkerIndex 605 break LOOP 606 } 607 608 // if the candidate can download a piece that is currently not being 609 // downloaded by anyone else, swap it as well 610 _, workerForPiece := piecesToIndexMap[piece] 611 if !workerForPiece { 612 swapIndex = expensiveWorkerIndex 613 break LOOP 614 } 615 } 616 } 617 618 if swapIndex > -1 { 619 cheaperSet := ws.clone() 620 cheaperSet.workers[swapIndex] = candidate 621 return cheaperSet 622 } 623 return nil 624 } 625 626 // adjustedDuration returns the cost adjusted expected duration of the worker 627 // set using the given price per ms. 628 func (ws *workerSet) adjustedDuration(ppms types.Currency) time.Duration { 629 // calculate the total cost of the worker set 630 var totalCost float64 631 for _, w := range ws.workers { 632 totalCost += w.cost() 633 } 634 635 // calculate the cost penalty using the given price per ms and apply it to 636 // the worker set's expected duration. 637 totalCostCurr := types.NewCurrency64(uint64(totalCost)) 638 return addCostPenalty(ws.staticBucketDuration, totalCostCurr, ppms) 639 } 640 641 // chancesAfter is a small helper function that returns a list of every worker's 642 // chance it's completed after the given duration. 643 func (ws *workerSet) chancesAfter() coinflips { 644 chances := make(coinflips, len(ws.workers)) 645 for i, w := range ws.workers { 646 chances[i] = w.completeChanceCached() 647 } 648 return chances 649 } 650 651 // chanceGreaterThanHalf returns whether the total chance this worker set 652 // completes the download before the given duration is more than 50%. 653 // 654 // NOTE: this function abstracts the chance a worker resolves after the given 655 // duration as a coinflip to make it easier to reason about the problem given 656 // that the workerset consists out of one or more overdrive workers. 657 func (ws *workerSet) chanceGreaterThanHalf() bool { 658 // convert every worker into a coinflip 659 coinflips := ws.chancesAfter() 660 661 var chance float64 662 switch ws.staticNumOverdrive { 663 case 0: 664 // if we don't have to consider any overdrive workers, the chance it's 665 // all heads is the chance that needs to be greater than half 666 chance = coinflips.chanceAllHeads() 667 case 1: 668 // if there is 1 overdrive worker, we can essentially have one of the 669 // coinflips come up as tails, as long as all the others are heads 670 chance = coinflips.chanceHeadsAllowOneTails() 671 case 2: 672 // if there are 2 overdrive workers, we can have two of them come up as 673 // tails, as long as all the others are heads 674 chance = coinflips.chanceHeadsAllowTwoTails() 675 default: 676 // if there are a lot of overdrive workers, we use an approximation by 677 // summing all coinflips to see whether we are expected to be able to 678 // download min pieces within the given duration 679 return coinflips.chanceSum() > float64(ws.staticMinPieces) 680 } 681 682 return chance > 0.5 683 } 684 685 // chanceAllHeads returns the chance all coins show heads. 686 func (cf coinflips) chanceAllHeads() float64 { 687 if len(cf) == 0 { 688 return 0 689 } 690 691 chanceAllHeads := float64(1) 692 for _, chanceHead := range cf { 693 chanceAllHeads *= chanceHead 694 } 695 return chanceAllHeads 696 } 697 698 // chanceHeadsAllowOneTails returns the chance at least n-1 coins show heads 699 // where n is the amount of coins. 700 func (cf coinflips) chanceHeadsAllowOneTails() float64 { 701 chanceAllHeads := cf.chanceAllHeads() 702 703 totalChance := chanceAllHeads 704 for _, chanceHead := range cf { 705 chanceTails := 1 - chanceHead 706 totalChance += (chanceAllHeads / chanceHead * chanceTails) 707 } 708 return totalChance 709 } 710 711 // chanceHeadsAllowTwoTails returns the chance at least n-2 coins show heads 712 // where n is the amount of coins. 713 func (cf coinflips) chanceHeadsAllowTwoTails() float64 { 714 chanceAllHeads := cf.chanceAllHeads() 715 totalChance := cf.chanceHeadsAllowOneTails() 716 717 for i := 0; i < len(cf)-1; i++ { 718 chanceIHeads := cf[i] 719 chanceITails := 1 - chanceIHeads 720 chanceOnlyITails := chanceAllHeads / chanceIHeads * chanceITails 721 for jj := i + 1; jj < len(cf); jj++ { 722 chanceJHeads := cf[jj] 723 chanceJTails := 1 - chanceJHeads 724 chanceOnlyIAndJJTails := chanceOnlyITails / chanceJHeads * chanceJTails 725 totalChance += chanceOnlyIAndJJTails 726 } 727 } 728 return totalChance 729 } 730 731 // chanceSum returns the sum of all chances 732 func (cf coinflips) chanceSum() float64 { 733 var sum float64 734 for _, flip := range cf { 735 sum += flip 736 } 737 return sum 738 } 739 740 // updateWorkers will update the given set of workers in-place, we update the 741 // workers instead of recreating them because we found that the process of 742 // creating an individualWorker involves some cpu intensive steps, like gouging. 743 // By updating them, rather than recreating them, we avoid doing these 744 // computations in every iteration of the download algorithm. 745 func (pdc *projectDownloadChunk) updateWorkers(workers []*individualWorker) []*individualWorker { 746 ws := pdc.workerState 747 ws.mu.Lock() 748 defer ws.mu.Unlock() 749 750 // make a map of all resolved workers to their piece indices 751 resolved := make(map[string][]uint64, len(workers)) 752 for _, rw := range ws.resolvedWorkers { 753 resolved[rw.worker.staticHostPubKeyStr] = rw.pieceIndices 754 } 755 756 // loop over all workers and update the resolved status and piece indices 757 for i := 0; i < len(workers); i++ { 758 w := workers[i] 759 760 pieceIndices, resolved := resolved[w.staticWorker.staticHostPubKeyStr] 761 if !w.isResolved() && resolved { 762 w.resolved = true 763 w.pieceIndices = pieceIndices 764 if len(w.pieceIndices) == 0 { 765 // if the worker resolved and doesn't have any 766 // pieces, remove it from the workers by 767 // swapping it to the end and shrinking the 768 // slice by 1. 769 workers[i], workers[len(workers)-1] = workers[len(workers)-1], workers[i] 770 workers = workers[:len(workers)-1] 771 i-- 772 continue 773 } 774 } 775 776 // check whether the worker is on cooldown 777 hsq := w.staticWorker.staticJobHasSectorQueue 778 rjq := w.staticWorker.staticJobReadQueue 779 w.onCoolDown = hsq.callOnCooldown() || rjq.callOnCooldown() 780 781 // recalculate the distributions 782 w.recalculateDistributionChances() 783 } 784 return workers 785 } 786 787 // workers returns both resolved and unresolved workers as a single slice of 788 // individual workers 789 func (pdc *projectDownloadChunk) workers() []*individualWorker { 790 ws := pdc.workerState 791 ws.mu.Lock() 792 defer ws.mu.Unlock() 793 794 workers := make([]*individualWorker, 0, len(ws.resolvedWorkers)+len(ws.unresolvedWorkers)) 795 796 // convenience variables 797 ec := pdc.workerSet.staticErasureCoder 798 length := pdc.pieceLength 799 numPieces := ec.NumPieces() 800 801 iws := make([]individualWorker, cap(workers)) 802 803 // add all resolved workers that are deemed good for downloading 804 var ldt *skymodules.DistributionTracker 805 var rdt *skymodules.DistributionTracker 806 var jrq *jobReadQueue 807 var hsq *jobHasSectorQueue 808 var iw *individualWorker 809 var cost float64 810 for _, rw := range ws.resolvedWorkers { 811 if !isGoodForDownload(rw.worker, rw.pieceIndices) { 812 continue 813 } 814 815 jrq = rw.worker.staticJobReadQueue 816 rdt = jrq.staticStats.distributionTrackerForLength(length) 817 cost, _ = jrq.callExpectedJobCost(length).Float64() 818 hsq = rw.worker.staticJobHasSectorQueue 819 ldt = hsq.staticDT 820 821 iw = &iws[len(workers)] //staticPoolIndividualWorkers.Get() 822 iw.resolved = true 823 iw.pieceIndices = rw.pieceIndices 824 iw.onCoolDown = jrq.callOnCooldown() || hsq.callOnCooldown() 825 iw.staticAvailabilityRate = hsq.callAvailabilityRate(numPieces) 826 iw.staticCost = cost 827 iw.staticDownloadLaunchTime = time.Now() 828 iw.staticIdentifier = uint32(len(workers)) 829 iw.staticLookupDistribution = ldt.Distribution(0) 830 iw.staticReadDistribution = rdt.Distribution(0) 831 iw.staticWorker = rw.worker 832 workers = append(workers, iw) 833 } 834 835 // add all unresolved workers that are deemed good for downloading 836 for _, uw := range ws.unresolvedWorkers { 837 // exclude workers that are not useful 838 w := uw.staticWorker 839 if !isGoodForDownload(w, pdc.staticPieceIndices) { 840 continue 841 } 842 843 jrq = w.staticJobReadQueue 844 rdt = jrq.staticStats.distributionTrackerForLength(length) 845 hsq = w.staticJobHasSectorQueue 846 ldt = hsq.staticDT 847 848 iw = &iws[len(workers)] //staticPoolIndividualWorkers.Get() 849 cost, _ = jrq.callExpectedJobCost(length).Float64() 850 iw.resolved = false 851 iw.pieceIndices = pdc.staticPieceIndices 852 iw.onCoolDown = jrq.callOnCooldown() || hsq.callOnCooldown() 853 854 iw.staticAvailabilityRate = hsq.callAvailabilityRate(numPieces) 855 iw.staticCost = cost 856 iw.staticDownloadLaunchTime = time.Now() 857 iw.staticIdentifier = uint32(len(workers)) 858 iw.staticLookupDistribution = ldt.Distribution(0) 859 iw.staticReadDistribution = rdt.Distribution(0) 860 iw.staticWorker = w 861 workers = append(workers, iw) 862 } 863 864 return workers 865 } 866 867 // workerProgress returns the piece that was marked on the worker to download 868 // next, alongside two booleans that indicate whether it was launched and 869 // whether it completed. 870 func (pdc *projectDownloadChunk) workerProgress(w downloadWorker) (uint64, bool, bool) { 871 // return defaults if the worker is a chimera worker, those are not 872 // downloading by definition 873 iw, ok := w.(*individualWorker) 874 if !ok { 875 return 0, false, false 876 } 877 878 // get the marked piece for this worker 879 currentPiece := w.getPieceForDownload() 880 881 // fetch the worker's download progress, if that does not exist, it's 882 // neither launched nor completed. 883 workerProgress, exists := pdc.workerProgressMap[iw.identifier()] 884 if !exists { 885 return currentPiece, false, false 886 } 887 888 _, launched := workerProgress.launchedPieces[currentPiece] 889 _, completed := workerProgress.completedPieces[currentPiece] 890 return currentPiece, launched, completed 891 } 892 893 // launchWorkerSet will range over the workers in the given worker set and will 894 // try to launch every worker that has not yet been launched and is ready to 895 // launch. 896 func (pdc *projectDownloadChunk) launchWorkerSet(ws *workerSet) { 897 // convenience variables 898 minPieces := pdc.workerSet.staticErasureCoder.MinPieces() 899 900 // range over all workers in the set and launch if possible 901 for _, w := range ws.workers { 902 // continue if the worker is a chimera worker 903 iw, ok := w.(*individualWorker) 904 if !ok { 905 continue 906 } 907 908 // continue if the worker is already launched 909 piece, isLaunched, _ := pdc.workerProgress(w) 910 if isLaunched { 911 continue 912 } 913 914 // launch the worker 915 isOverdrive := len(pdc.launchedWorkers) >= minPieces 916 _, gotLaunched := pdc.launchWorker(iw, piece, isOverdrive) 917 918 // log the event in case we launched a worker 919 if gotLaunched { 920 if span := opentracing.SpanFromContext(pdc.ctx); span != nil { 921 span.LogKV( 922 "aWorkerLaunched", w.identifier(), 923 "piece", piece, 924 "overdriveWorker", isOverdrive, 925 "wsDuration", ws.staticBucketDuration, 926 "wsIndex", ws.staticBucketIndex, 927 ) 928 } 929 } 930 } 931 return 932 } 933 934 // threadedLaunchProjectDownload performs the main download loop, every 935 // iteration we update the pdc's available pieces, construct a new worker set 936 // and launch every worker that can be launched from that set. Every iteration 937 // we check whether the download was finished. 938 func (pdc *projectDownloadChunk) threadedLaunchProjectDownload() { 939 // grab some variables 940 ws := pdc.workerState 941 ec := pdc.workerSet.staticErasureCoder 942 943 // grab the workers from the pdc, every iteration we will update this set of 944 // workers to avoid needless performing gouging checks on every iteration 945 workers := pdc.workers() 946 947 // verify we have enough workers to complete the download 948 if len(workers) < ec.MinPieces() { 949 pdc.fail(errors.Compose(ErrRootNotFound, errors.AddContext(errNotEnoughWorkers, fmt.Sprintf("%v < %v", len(workers), ec.MinPieces())))) 950 return 951 } 952 953 // Allocate some memory outside of the loop to reduce the number of 954 // allocations within. 955 ds := &bufferedDownloadState{ 956 downloadWorkers: make([]downloadWorker, 0, len(workers)), 957 mostLikely: make([]downloadWorker, 0, maxOverdriveWorkers+pdc.workerSet.staticErasureCoder.MinPieces()), 958 lessLikely: make([]downloadWorker, 0, len(workers)), 959 pieces: make(map[uint64]struct{}, pdc.workerSet.staticErasureCoder.NumPieces()), 960 added: make(map[uint32]struct{}, len(workers)), 961 sortedDownloadWorkers: make([]sortedDownloadWorker, 0, len(workers)), 962 } 963 964 // register for a worker update chan 965 workerUpdateChan := ws.managedRegisterForWorkerUpdate() 966 prevWorkerUpdate := time.Now() 967 968 var maxTimer *time.Timer 969 defer func() { 970 if maxTimer != nil { 971 if !maxTimer.Stop() { 972 select { 973 case <-maxTimer.C: 974 default: 975 } 976 } 977 } 978 }() 979 980 for { 981 // update the pieces 982 updated := pdc.updatePieces() 983 984 // update the workers 985 if updated || time.Since(prevWorkerUpdate) > maxWaitUpdateWorkers { 986 workers = pdc.updateWorkers(workers) 987 prevWorkerUpdate = time.Now() 988 } 989 990 // create a worker set and launch it 991 workerSet, err := pdc.createWorkerSet(workers, ds) 992 if err != nil { 993 pdc.fail(err) 994 return 995 } 996 if workerSet != nil { 997 pdc.launchWorkerSet(workerSet) 998 } 999 1000 // Drain and reset timer if necessary. 1001 if maxTimer != nil { 1002 if !maxTimer.Stop() { 1003 select { 1004 case <-maxTimer.C: 1005 default: 1006 } 1007 } 1008 maxTimer.Reset(maxWaitUnresolvedWorkerUpdate) 1009 } else { 1010 maxTimer = time.NewTimer(maxWaitUnresolvedWorkerUpdate) 1011 } 1012 1013 // iterate 1014 select { 1015 case <-maxTimer.C: 1016 // recreate the workerset after maxwait 1017 case <-workerUpdateChan: 1018 // replace the worker update channel 1019 workerUpdateChan = ws.managedRegisterForWorkerUpdate() 1020 case jrr := <-pdc.workerResponseChan: 1021 pdc.handleJobReadResponse(jrr) 1022 case <-pdc.ctx.Done(): 1023 pdc.fail(ErrProjectTimedOut) 1024 return 1025 } 1026 1027 // check whether the download is completed 1028 completed, err := pdc.finished() 1029 if completed { 1030 pdc.finalize() 1031 return 1032 } 1033 if err != nil { 1034 pdc.fail(err) 1035 return 1036 } 1037 } 1038 } 1039 1040 // createWorkerSet tries to create a worker set from the pdc's resolved and 1041 // unresolved workers, the maximum amount of overdrive workers in the set is 1042 // defined by the given 'maxOverdriveWorkers' argument. 1043 func (pdc *projectDownloadChunk) createWorkerSet(workers []*individualWorker, ds *bufferedDownloadState) (*workerSet, error) { 1044 // can't create a workerset without download workers 1045 if len(workers) == 0 { 1046 return nil, nil 1047 } 1048 1049 // convenience variables 1050 ppms := pdc.pricePerMS 1051 minPieces := pdc.workerSet.staticErasureCoder.MinPieces() 1052 1053 // loop state 1054 var bestSet *workerSet 1055 var numOverdrive int 1056 var bI int 1057 1058 // start numOverdrive at 1 if the dependency is set 1059 if pdc.workerState.staticDeps.Disrupt("OverdriveDownload") { 1060 numOverdrive = 1 1061 } 1062 1063 // approximate the bucket index by iterating over all bucket indices using a 1064 // step size greater than 1, once we've found the best set, we range over 1065 // bI-stepsize|bi+stepSize to find the best bucket index 1066 OUTER: 1067 for ; numOverdrive <= maxOverdriveWorkers; numOverdrive++ { 1068 for bI = 0; bI <= skymodules.DistributionTrackerTotalBuckets; bI += bucketIndexScanStep { 1069 if bI == skymodules.DistributionTrackerTotalBuckets { 1070 bI-- 1071 } 1072 // create the worker set 1073 bDur := skymodules.DistributionDurationForBucketIndex(bI) 1074 mostLikelySet, escape := pdc.createWorkerSetInner(workers, minPieces, numOverdrive, bI, bDur, ds) 1075 if escape { 1076 break OUTER 1077 } 1078 if mostLikelySet == nil { 1079 continue 1080 } 1081 1082 // perform price per ms comparison 1083 if bestSet == nil { 1084 bestSet = mostLikelySet 1085 } else if mostLikelySet.adjustedDuration(ppms) < bestSet.adjustedDuration(ppms) { 1086 bestSet = mostLikelySet 1087 } 1088 1089 // exit early if ppms in combination with the bucket duration 1090 // already exceeds the adjusted cost of the current best set, 1091 // workers would be too slow by definition 1092 if bestSet != nil && bDur > bestSet.adjustedDuration(ppms) { 1093 break OUTER 1094 } 1095 } 1096 } 1097 1098 // if we haven't found a set, no need to try and find the optimal index 1099 if bestSet == nil { 1100 return nil, nil 1101 } 1102 1103 // after we've found one, range over bI-12 -> bI+12 to find the optimal 1104 // bucket index 1105 bIMin, bIMax := bucketIndexRange(bI) 1106 for bI = bIMin; bI < bIMax; bI++ { 1107 // create the worker set 1108 bDur := skymodules.DistributionDurationForBucketIndex(bI) 1109 mostLikelySet, escape := pdc.createWorkerSetInner(workers, minPieces, numOverdrive, bI, bDur, ds) 1110 if escape { 1111 break 1112 } 1113 if mostLikelySet == nil { 1114 continue 1115 } 1116 1117 // perform price per ms comparison 1118 if bestSet == nil { 1119 bestSet = mostLikelySet 1120 } else if mostLikelySet.adjustedDuration(ppms) < bestSet.adjustedDuration(ppms) { 1121 bestSet = mostLikelySet 1122 } 1123 1124 // exit early if ppms in combination with the bucket duration 1125 // already exceeds the adjusted cost of the current best set, 1126 // workers would be too slow by definition 1127 if bestSet != nil && bDur > bestSet.adjustedDuration(ppms) { 1128 break 1129 } 1130 } 1131 1132 return bestSet, nil 1133 } 1134 1135 // createWorkerSetInner is the inner loop that is called by createWorkerSet, it 1136 // tries to create a worker set from the given list of workers, taking into 1137 // account the given amount of workers and overdrive workers, but also the given 1138 // bucket duration. It returns a workerset, and a boolean that indicates whether 1139 // we want to break out of the (outer) loop that surrounds this function call. 1140 func (pdc *projectDownloadChunk) createWorkerSetInner(workers []*individualWorker, minPieces, numOverdrive, bI int, bDur time.Duration, ds *bufferedDownloadState) (*workerSet, bool) { 1141 // reset the buffered state 1142 ds.Reset() 1143 1144 workersNeeded := minPieces + numOverdrive 1145 1146 // recalculate the complete chance at given index 1147 for _, w := range workers { 1148 w.recalculateCompleteChance(bI) 1149 } 1150 1151 // build the download workers 1152 downloadWorkers := pdc.buildDownloadWorkers(workers, ds) 1153 1154 // divide the workers in most likely and less likely 1155 mostLikely, lessLikely := pdc.splitMostlikelyLessLikely(downloadWorkers, workersNeeded, ds) 1156 1157 // if there aren't even likely workers, escape early 1158 if len(mostLikely) == 0 { 1159 return nil, true 1160 } 1161 1162 // build the most likely set 1163 mostLikelySet := &workerSet{ 1164 workers: mostLikely, 1165 1166 staticBucketDuration: bDur, 1167 staticBucketIndex: bI, 1168 staticNumOverdrive: numOverdrive, 1169 staticMinPieces: minPieces, 1170 1171 staticPDC: pdc, 1172 } 1173 1174 // if the chance of the most likely set does not exceed 50%, it is 1175 // not high enough to continue, no need to continue this iteration, 1176 // we need to try a slower and thus more likely bucket 1177 // 1178 // NOTE: this 50% value is arbitrary, it actually even means that in 50% of 1179 // all cases we fall at the other side of the fence... tweaking this value 1180 // and calculating how often we run a bad worker set is part of the download 1181 // improvements listed at the top of this file. 1182 if !mostLikelySet.chanceGreaterThanHalf() { 1183 return nil, false 1184 } 1185 1186 // now loop the less likely workers and try and swap them with the 1187 // most expensive workers in the most likely set 1188 for _, w := range lessLikely { 1189 cheaperSet := mostLikelySet.cheaperSetFromCandidate(w) 1190 if cheaperSet == nil { 1191 continue 1192 } 1193 1194 // if the cheaper set's chance of completing before the given 1195 // duration is not greater than half we can break because the 1196 // `lessLikely` workers were sorted by chance 1197 if !cheaperSet.chanceGreaterThanHalf() { 1198 break 1199 } 1200 1201 mostLikelySet = cheaperSet 1202 } 1203 1204 return mostLikelySet, false 1205 } 1206 1207 // addCostPenalty takes a certain job time and adds a penalty to it depending on 1208 // the jobcost and the pdc's price per MS. 1209 func addCostPenalty(jobTime time.Duration, jobCost, pricePerMS types.Currency) time.Duration { 1210 // If the pricePerMS is higher or equal than the cost of the job, simply 1211 // return without penalty. 1212 if pricePerMS.Cmp(jobCost) >= 0 { 1213 return jobTime 1214 } 1215 1216 // Otherwise, add a penalty 1217 var adjusted time.Duration 1218 penalty, err := jobCost.Div(pricePerMS).Uint64() 1219 1220 // because we multiply the penalty with milliseconds and add the jobtime we 1221 // have to check for overflows quite extensively, define a max penalty which 1222 // we'll then compare with the job time to see whether we can safely 1223 // calculate the adjusted duration 1224 penaltyMaxCheck := math.MaxInt64 / int64(time.Millisecond) 1225 if err != nil || penalty > math.MaxInt64 { 1226 adjusted = time.Duration(math.MaxInt64) 1227 } else if reduced := penaltyMaxCheck - int64(penalty); int64(jobTime) > reduced { 1228 adjusted = time.Duration(math.MaxInt64) 1229 } else { 1230 adjusted = jobTime + (time.Duration(penalty) * time.Millisecond) 1231 } 1232 return adjusted 1233 } 1234 1235 // buildChimeraWorkers turns a list of individual workers into chimera workers. 1236 func (pdc *projectDownloadChunk) buildChimeraWorkers(unresolvedWorkers []*individualWorker, lowestChimeraIdentifier uint32) []downloadWorker { 1237 // sort workers by chance they complete 1238 sort.Sort(sortedIndividualWorkers(unresolvedWorkers)) 1239 1240 // create an array that will hold all chimera workers 1241 chimeras := make([]downloadWorker, 0, len(unresolvedWorkers)) 1242 1243 // create some loop state 1244 currAvail := float64(0) 1245 start := 0 1246 1247 // loop over the unresolved workers 1248 for curr := 0; curr < len(unresolvedWorkers); curr++ { 1249 currAvail += unresolvedWorkers[curr].staticAvailabilityRate 1250 if currAvail >= chimeraAvailabilityRateThreshold { 1251 end := curr + 1 1252 chimera := NewChimeraWorker(unresolvedWorkers[start:end], lowestChimeraIdentifier) 1253 lowestChimeraIdentifier++ 1254 chimeras = append(chimeras, chimera) 1255 1256 // reset loop state 1257 start = end 1258 currAvail = 0 1259 } 1260 } 1261 return chimeras 1262 } 1263 1264 // buildDownloadWorkers is a helper function that takes a list of individual 1265 // workers and turns them into download workers. 1266 func (pdc *projectDownloadChunk) buildDownloadWorkers(workers []*individualWorker, ds *bufferedDownloadState) []downloadWorker { 1267 // create an array of download workers 1268 downloadWorkers := ds.downloadWorkers 1269 1270 // split the workers into resolved and unresolved workers, the resolved 1271 // workers can be added directly to the array of download workers 1272 resolvedWorkers, unresolvedWorkers := splitResolvedUnresolved(workers) 1273 for _, rw := range resolvedWorkers { 1274 downloadWorkers = append(downloadWorkers, rw) 1275 } 1276 1277 // the unresolved workers are used to build chimeras with 1278 chimeraWorkers := pdc.buildChimeraWorkers(unresolvedWorkers, uint32(len(workers))) 1279 return append(downloadWorkers, chimeraWorkers...) 1280 } 1281 1282 // splitMostlikelyLessLikely takes a list of download workers alongside a 1283 // duration and an amount of workers that are needed for the most likely set of 1284 // workers to compplete a download (this is not necessarily equal to 'minPieces' 1285 // workers but also takes into account an amount of overdrive workers). This 1286 // method will split the given workers array in a list of most likely workers, 1287 // and a list of less likely workers. 1288 func (pdc *projectDownloadChunk) splitMostlikelyLessLikely(workers []downloadWorker, workersNeeded int, ds *bufferedDownloadState) ([]downloadWorker, []downloadWorker) { 1289 // prepare two slices that hold the workers which are most likely and the 1290 // ones that are less likely 1291 mostLikely := ds.mostLikely 1292 lessLikely := ds.lessLikely 1293 1294 // define some state variables to ensure we select workers in a way the 1295 // pieces are unique and we are not using a worker twice 1296 pieces := ds.pieces 1297 added := ds.added 1298 1299 // add worker is a helper function that adds a worker to either the most 1300 // likely or less likely worker array and updates our state variables 1301 addWorker := func(w downloadWorker, pieceIndex uint64) { 1302 if len(mostLikely) < workersNeeded { 1303 mostLikely = append(mostLikely, w) 1304 } else { 1305 lessLikely = append(lessLikely, w) 1306 } 1307 1308 added[w.identifier()] = struct{}{} 1309 pieces[pieceIndex] = struct{}{} 1310 w.markPieceForDownload(pieceIndex) 1311 } 1312 1313 // sort the workers by percentage chance they complete after the current 1314 // bucket duration, essentially sorting them from most to least likely 1315 sdw := ds.sortedDownloadWorkers 1316 for i := range workers { 1317 sdw = append(sdw, sortedDownloadWorker{ 1318 originalIndex: i, 1319 completeChance: workers[i].completeChanceCached(), 1320 }) 1321 } 1322 sort.Sort(&sdw) 1323 1324 // loop over the workers and try to add them 1325 for _, sw := range sdw { 1326 w := workers[sw.originalIndex] 1327 // workers that have in-progress downloads are re-added as long as we 1328 // don't already have a worker for the piece they are downloading 1329 currPiece, launched, completed := pdc.workerProgress(w) 1330 if launched && !completed { 1331 _, exists := pieces[currPiece] 1332 if !exists { 1333 addWorker(w, currPiece) 1334 continue 1335 } 1336 } 1337 1338 // loop the worker's pieces to see whether it can download a piece for 1339 // which we don't have a worker yet or which we haven't downloaded yet 1340 for _, pieceIndex := range w.pieces(pdc) { 1341 if pdc.piecesInfo[pieceIndex].downloaded { 1342 continue 1343 } 1344 1345 _, exists := pieces[pieceIndex] 1346 if exists { 1347 continue 1348 } 1349 1350 addWorker(w, pieceIndex) 1351 break // only use a worker once 1352 } 1353 } 1354 1355 // loop over the workers again to fill both the most likely and less likely 1356 // array with the remainder of the workers, still ensuring a worker is only 1357 // used once, this time we don't assert the piece indices are unique as this 1358 // makes it possible to overdrive on the same piece 1359 for _, sw := range sdw { 1360 w := workers[sw.originalIndex] 1361 _, added := added[w.identifier()] 1362 if added { 1363 continue 1364 } 1365 1366 for _, pieceIndex := range w.pieces(pdc) { 1367 if pdc.piecesInfo[pieceIndex].downloaded { 1368 continue 1369 } 1370 1371 addWorker(w, pieceIndex) 1372 break // only use a worker once 1373 } 1374 } 1375 1376 return mostLikely, lessLikely 1377 } 1378 1379 // bucketIndexRange is a small helper function that returns the bucket index 1380 // range we want to loop over after finding the first bucket index approximation 1381 func bucketIndexRange(bI int) (int, int) { 1382 var bIMin int 1383 if bI-bucketIndexScanStep >= 0 { 1384 bIMin = bI - bucketIndexScanStep 1385 } 1386 1387 bIMax := skymodules.DistributionTrackerTotalBuckets - 1 1388 if bI+bucketIndexScanStep <= skymodules.DistributionTrackerTotalBuckets { 1389 bIMax = bI + bucketIndexScanStep 1390 } 1391 1392 return bIMin, bIMax 1393 } 1394 1395 // checkGougingAndUpdateCache checks if a worker is pcws gouging and updates the 1396 // cache with the result. 1397 func (c *pcwsGougingCache) checkGougingAndUpdateCache(hpks string, pt modules.RPCPriceTable, allowance skymodules.Allowance, numWorkers, numRoots int) error { 1398 err := gouging.CheckPCWS(allowance, pt, numWorkers, numRoots) 1399 1400 results, exist := staticPCWSGougingCache.staticCache[hpks] 1401 if !exist { 1402 results = make(map[int]pcwsGougingResult) 1403 staticPCWSGougingCache.staticCache[hpks] = results 1404 } 1405 results[numRoots] = pcwsGougingResult{ 1406 staticAllowance: allowance, 1407 staticNumWorkers: numWorkers, 1408 staticPTID: pt.UID, 1409 staticIsGouging: err, 1410 } 1411 return err 1412 } 1413 1414 // IsGouging performs the checkPCWSGouging check but will return a cached result 1415 // if possible. 1416 func (c *pcwsGougingCache) IsGouging(hpks string, pt modules.RPCPriceTable, allowance skymodules.Allowance, numWorkers, numRoots int) error { 1417 c.mu.Lock() 1418 defer c.mu.Unlock() 1419 1420 results, exist := staticPCWSGougingCache.staticCache[hpks] 1421 if !exist { 1422 return c.checkGougingAndUpdateCache(hpks, pt, allowance, numWorkers, numRoots) 1423 } 1424 result, exist := results[numRoots] 1425 if !exist { 1426 return c.checkGougingAndUpdateCache(hpks, pt, allowance, numWorkers, numRoots) 1427 } 1428 if pt.UID != result.staticPTID { 1429 return c.checkGougingAndUpdateCache(hpks, pt, allowance, numWorkers, numRoots) 1430 } 1431 if numWorkers != result.staticNumWorkers { 1432 return c.checkGougingAndUpdateCache(hpks, pt, allowance, numWorkers, numRoots) 1433 } 1434 if !reflect.DeepEqual(allowance, result.staticAllowance) { 1435 return c.checkGougingAndUpdateCache(hpks, pt, allowance, numWorkers, numRoots) 1436 } 1437 return result.staticIsGouging 1438 } 1439 1440 // IsGouging performs the checkProjetDownloadGouging check but will return a 1441 // cached result if possible. 1442 func (c *pdcGougingCache) IsGouging(hpks string, pt modules.RPCPriceTable, allowance skymodules.Allowance) error { 1443 c.mu.Lock() 1444 defer c.mu.Unlock() 1445 1446 cachedResult := c.staticCache[hpks] 1447 if cachedResult.staticPTID == pt.UID && reflect.DeepEqual(cachedResult.staticAllowance, allowance) { 1448 return cachedResult.staticIsGouging 1449 } 1450 1451 err := gouging.CheckProjectDownload(allowance, pt) 1452 1453 c.staticCache[hpks] = pdcGougingResult{ 1454 staticAllowance: allowance, 1455 staticPTID: pt.UID, 1456 staticIsGouging: err, 1457 } 1458 return err 1459 } 1460 1461 // PruneWorker removes the cached results for a given worker. 1462 func (c *pcwsGougingCache) PruneWorker(hpks string) { 1463 c.mu.Lock() 1464 delete(c.staticCache, hpks) 1465 c.mu.Unlock() 1466 } 1467 1468 // PruneWorker removes the cached results for a given worker. 1469 func (c *pdcGougingCache) PruneWorker(hpks string) { 1470 c.mu.Lock() 1471 delete(c.staticCache, hpks) 1472 c.mu.Unlock() 1473 } 1474 1475 var staticDownloadGougingCache = &pdcGougingCache{ 1476 staticCache: make(map[string]pdcGougingResult), 1477 } 1478 1479 var staticPCWSGougingCache = &pcwsGougingCache{ 1480 staticCache: make(map[string]map[int]pcwsGougingResult), 1481 } 1482 1483 // isGoodForDownload is a helper function that returns true if and only if the 1484 // worker meets a certain set of criteria that make it useful for downloads. 1485 // It's only useful if it is not on any type of cooldown, if it's async ready 1486 // and if it's not price gouging. 1487 func isGoodForDownload(w *worker, pieces []uint64) bool { 1488 // workers that can't download any pieces are ignored 1489 if len(pieces) == 0 { 1490 return false 1491 } 1492 1493 // workers on cooldown or that are non async ready are not useful 1494 if w.managedOnMaintenanceCooldown() || !w.managedAsyncReady() { 1495 return false 1496 } 1497 1498 // workers that are price gouging are not useful 1499 pt := w.staticPriceTable().staticPriceTable 1500 allowance := w.staticCache().staticRenterAllowance 1501 1502 // Check cache. 1503 err := staticDownloadGougingCache.IsGouging(w.staticHostPubKeyStr, pt, allowance) 1504 return err == nil 1505 } 1506 1507 // partitionWorkers partitions a slice of workers in-place. 1508 func partitionWorkers(iws []*individualWorker, isLeft func(i int) bool) (left, right []*individualWorker) { 1509 i := 0 1510 j := len(iws) - 1 1511 1512 for i <= j { 1513 if !isLeft(i) { 1514 iws[i], iws[j] = iws[j], iws[i] 1515 j-- 1516 continue 1517 } else { 1518 i++ 1519 } 1520 } 1521 return iws[:i], iws[i:] 1522 } 1523 1524 // splitResolvedUnresolved is a helper function that splits the given workers 1525 // into resolved and unresolved worker arrays. Note that if the worker is on a 1526 // cooldown we exclude it from the returned workers list. 1527 func splitResolvedUnresolved(workers []*individualWorker) ([]*individualWorker, []*individualWorker) { 1528 // filter out the workers on cooldown first. 1529 notOnCooldown, _ := partitionWorkers(workers, func(i int) bool { 1530 return !workers[i].isOnCooldown() 1531 }) 1532 resolvedWorkers, unresolvedWorkers := partitionWorkers(notOnCooldown, func(i int) bool { 1533 return workers[i].isResolved() 1534 }) 1535 return resolvedWorkers, unresolvedWorkers 1536 }