gitlab.com/SkynetLabs/skyd@v1.6.9/skymodules/renter/projectdownloadworker.go (about)

     1  package renter
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"reflect"
     7  	"sort"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/opentracing/opentracing-go"
    12  	"gitlab.com/NebulousLabs/errors"
    13  	"gitlab.com/SkynetLabs/skyd/build"
    14  	"gitlab.com/SkynetLabs/skyd/skymodules"
    15  	"gitlab.com/SkynetLabs/skyd/skymodules/gouging"
    16  	"go.sia.tech/siad/modules"
    17  	"go.sia.tech/siad/types"
    18  )
    19  
    20  // DOWNLOAD CODE IMPROVEMENTS:
    21  //
    22  // the current design of the download algorithm has some key places where
    23  // there's still room for improvement. Below we list some of the ideas that
    24  // could contribute to a faster and more robust algorithm.
    25  //
    26  // 1. optimize the amount of memory allocations by using a sync.Pool: while
    27  // benchmarking and profiling the current download algorithm we found that the
    28  // downloads are usually bottlencecked by cpu, mostly coming from
    29  // runtime.scanobject which indicates the GC is triggered too often. By looking
    30  // at the memory profile, both using -inuse_objects and -alloc_objects, we can
    31  // see that `managedHandleResponse`, `managedExecuteProgram`,
    32  // `managedHasSector`,... all allocate a bunch of memory. In a lot of these
    33  // areas we can use a sync.Pool with preallocated memory that gets recycled,
    34  // avoiding needless reallocation of new memory.
    35  
    36  // 2. add a fixed cost to account for our own bandwidth expenses: because the
    37  // host network on Sia possibly has a lot of very cheap hosts, we should be
    38  // offsetting the cost with a fixed cost to account for our own bandwidth
    39  // expenses. E.g. if we use a fixed cost of 2$/TB, so ~100SC, then a worker that
    40  // cost 1SC becomes 101SC and a worker that costs 100SC becomes 200SC. That
    41  // makes it so the difference between those workers is 2x instead of the 100x it
    42  // was before the fixed cost.
    43  //
    44  // 3. add a mechanism to slow down the DTs or switch to different DTs if we have
    45  // too many jobs launched at once: the distribution tracker does not take into
    46  // account worker load, that means they're chance values become too optimistic,
    47  // which might hurt the performance of the algorithm when not taken into
    48  // account.
    49  //
    50  // 4. play with the 50% number, and account for the cost of being unlucky: a
    51  // worker set's total chance has to be greater than 50% in order for it be
    52  // accepted as a viable worker set, this 50% number is essentially arbitry, and
    53  // even at 50% there's still a 50% chance we fall on the other side of the
    54  // fence. This should/could be taken into account.
    55  //
    56  // 5. fix the algorithm that chooses which worker to replace in your set: the
    57  // algorithm that decides what worker to replace by a cheaper worker in the
    58  // current working set is a bit naievely implemented. Figuring out what worker
    59  // to replace can be a very complex algorithm. There's definitely room for
    60  // improvement here.
    61  //
    62  // 6. fix the algorithm that constructs chimeras: chimeras are currently
    63  // constructed for every bucket duration, however we could also rebuild
    64  // chimeras, or partially rebuild chimeras, when we are swapping out cheaper
    65  // workers in the working set. The currently algorithm considers the chimera
    66  // worker and its cost as fixed, but that does not necessarily have to be the
    67  // case. We can further improve this by swapping out series of workers for
    68  // cheaper workers inside of the chimera itself.
    69  
    70  const (
    71  	// bucketIndexScanStep defines the step size with which we increment the
    72  	// bucket index when we try and find the best set the first time around.
    73  	// This works more or less in a binary search fashion where we try and
    74  	// quickly approximate the bucket index, and then scan -10|+10 buckets
    75  	// before and after the index we found.
    76  	// NOTE: bucketIndexScanStep needs to cleanly divide the number of total
    77  	// buckets in the distribution tracker. Otherwise we might miss buckets
    78  	// at the end.
    79  	bucketIndexScanStep = 10
    80  
    81  	// chimeraAvailabilityRateThreshold defines the number that must be reached
    82  	// when composing a chimera from unresolved workers. If the sum of the
    83  	// availability rate of each worker reaches this threshold we build a
    84  	// chimera out of them.
    85  	chimeraAvailabilityRateThreshold = float64(2)
    86  )
    87  
    88  var (
    89  	// maxWaitUnresolvedWorkerUpdate defines the maximum amount of time we want
    90  	// to wait for unresolved workers to become resolved before trying to
    91  	// recreate the worker set.
    92  	//
    93  	// maxWaitUpdateWorkers defines the maximum amount of time we want to wait
    94  	// for workers to be updated.
    95  	//
    96  	// NOTE: these variables are lowered in test environment currently to avoid
    97  	// a large amount of parallel downloads. We've found that the host is
    98  	// currently facing a locking issue causing slow reads on the CI when
    99  	// there's a lot of parallel reads taking place. This issue is tackled by
   100  	// the following PR https://github.com/SiaFoundation/siad/pull/50
   101  	// (partially) and thus this build var should be removed again when that is
   102  	// merged and rolled out fully.
   103  	maxWaitUpdateWorkers = build.Select(build.Var{
   104  		Standard: 25 * time.Millisecond,
   105  		Dev:      25 * time.Millisecond,
   106  		Testing:  250 * time.Millisecond,
   107  	}).(time.Duration)
   108  	maxWaitUnresolvedWorkerUpdate = build.Select(build.Var{
   109  		Standard: 50 * time.Millisecond,
   110  		Dev:      50 * time.Millisecond,
   111  		Testing:  250 * time.Millisecond,
   112  	}).(time.Duration)
   113  )
   114  
   115  // NOTE: all of the following defined types are used by the PDC, which is
   116  // inherently thread un-safe, that means that these types don't not need to be
   117  // thread safe either. If fields are marked `static` it is meant to signal they
   118  // won't change after being set.
   119  type (
   120  	// downloadWorker is an interface implemented by both the individual and
   121  	// chimera workers that represents a worker that can be used for downloads.
   122  	downloadWorker interface {
   123  		// completeChanceCached returns the chance this download worker
   124  		// completes a read within a certain duration. This value is cached and
   125  		// gets recomputed for every bucket index (which represents a duration).
   126  		// The value returned here is recomputed by 'recalculateCompleteChance'.
   127  		completeChanceCached() float64
   128  
   129  		// cost returns the expected job cost for downloading a piece. If the
   130  		// worker has already been launched, its cost will be zero.
   131  		cost() float64
   132  
   133  		// getPieceForDownload returns the piece to download next
   134  		getPieceForDownload() uint64
   135  
   136  		// identifier returns a unique identifier for the download worker, this
   137  		// identifier can be used as a key when mapping the download worker
   138  		identifier() uint32
   139  
   140  		// markPieceForDownload allows specifying what piece to download for
   141  		// this worker in the case the worker resolved multiple pieces
   142  		markPieceForDownload(pieceIndex uint64)
   143  
   144  		// pieces returns all piece indices this worker can resolve
   145  		pieces(pdc *projectDownloadChunk) []uint64
   146  
   147  		// worker returns the underlying worker
   148  		worker() *worker
   149  	}
   150  
   151  	// chimeraWorker is a worker that's built from unresolved workers until the
   152  	// chance it has a piece is at least 'chimeraAvailabilityRateThreshold'. At
   153  	// that point we can treat the chimera worker the same as a resolved worker
   154  	// in the download algorithm that constructs the best worker set.
   155  	chimeraWorker struct {
   156  		// staticChanceComplete is the chance this worker completes after the
   157  		// duration at which this chimera worker was built.
   158  		staticChanceComplete float64
   159  
   160  		// staticCost returns the cost of the chimera worker, which is the
   161  		// average cost taken across all workers this chimera worker is
   162  		// comprised of, it is static because it never gets updated after the
   163  		// chimera is finalized and this field is calculated
   164  		staticCost float64
   165  
   166  		// staticIdentifier uniquely identifies the chimera worker
   167  		staticIdentifier uint32
   168  	}
   169  
   170  	// individualWorker represents a single worker object, both resolved and
   171  	// unresolved workers in the pdc can be represented by an individual worker.
   172  	// An individual worker can be used to build a chimera worker with. For
   173  	// every useful worker in the workerpool, an individual worker is created,
   174  	// this worker is update as the download progresses with information from
   175  	// the PCWS (resolved status and pieces).
   176  	individualWorker struct {
   177  		// the following fields are cached and recalculated at exact times
   178  		// within the download code algorithm
   179  		//
   180  		// cachedCompleteChance is the chance the worker completes after the
   181  		// current duration with which this value was recalculated
   182  		//
   183  		// cachedLookupIndex is the index corresponding to the estimated
   184  		// duration of the lookup DT.
   185  		//
   186  		// cachedReadDTChances is the cached chances value of the read DT
   187  		//
   188  		// cachedReadDTChancesInitialized is used to prevent needless
   189  		// recalculating the read DT chances, if a worker is resolved but not
   190  		// launched, its read DT chances do not change as they don't shift.
   191  		cachedCompleteChance           float64
   192  		cachedLookupIndex              int
   193  		cachedReadDTChances            skymodules.Chances
   194  		cachedReadDTChancesInitialized bool
   195  
   196  		// the following fields are continuously updated on the worker, all
   197  		// individual workers are not resolved initially, when a worker resolves
   198  		// the piece indices are updated and the resolved status is adjusted
   199  		pieceIndices []uint64
   200  		resolved     bool
   201  
   202  		// onCoolDown is a flag that indicates whether this worker's HS or RS
   203  		// queues are on cooldown, a worker with a queue on cooldown is not
   204  		// necessarily discounted as not useful for downloading, instead it's
   205  		// marked as on cooldown and only used if it comes off of cooldown
   206  		onCoolDown bool
   207  
   208  		// currentPiece is the piece that was marked by the download algorithm
   209  		// as the piece to download next, this is used to ensure that workers
   210  		// in the worker are not selected for duplicate piece indices.
   211  		currentPiece           uint64
   212  		currentPieceLaunchedAt time.Time
   213  
   214  		// static fields on the individual worker
   215  		staticAvailabilityRate   float64
   216  		staticCost               float64
   217  		staticDownloadLaunchTime time.Time
   218  		staticIdentifier         uint32
   219  		staticLookupDistribution skymodules.Distribution
   220  		staticReadDistribution   skymodules.Distribution
   221  		staticWorker             *worker
   222  	}
   223  
   224  	// workerSet is a collection of workers that may or may not have been
   225  	// launched yet in order to fulfil a download.
   226  	workerSet struct {
   227  		workers []downloadWorker
   228  
   229  		staticBucketDuration time.Duration
   230  		staticBucketIndex    int
   231  		staticMinPieces      int
   232  		staticNumOverdrive   int
   233  
   234  		staticPDC *projectDownloadChunk
   235  	}
   236  
   237  	// coinflips is a collection of chances where every item is the chance the
   238  	// coin will turn up heads. We use the concept of a coin because it allows
   239  	// to more easily reason about chance calculations.
   240  	coinflips []float64
   241  )
   242  
   243  // BufferedDownloadState is a helper type which contains fields which we only
   244  // want to allocate once and then reuse between iterations of the download
   245  // algorithm for optimization reasons.
   246  type bufferedDownloadState struct {
   247  	downloadWorkers       []downloadWorker
   248  	mostLikely            []downloadWorker
   249  	lessLikely            []downloadWorker
   250  	pieces                map[uint64]struct{}
   251  	added                 map[uint32]struct{}
   252  	sortedDownloadWorkers sortedDownloadWorkers
   253  }
   254  
   255  // Reset resets the download state without freeing its memory for the next
   256  // iteration of the loop.
   257  func (ds *bufferedDownloadState) Reset() {
   258  	ds.downloadWorkers = ds.downloadWorkers[:0]
   259  	ds.mostLikely = ds.mostLikely[:0]
   260  	ds.lessLikely = ds.lessLikely[:0]
   261  	ds.sortedDownloadWorkers = ds.sortedDownloadWorkers[:0]
   262  	ds.pieces = make(map[uint64]struct{}, len(ds.pieces))
   263  	ds.added = make(map[uint32]struct{}, len(ds.added))
   264  }
   265  
   266  // pdcGougingCache is a helper type to cache pdc gouging results for workers.
   267  type pdcGougingCache struct {
   268  	staticCache map[string]pdcGougingResult
   269  	mu          sync.Mutex
   270  }
   271  
   272  // pdcGougingResults contains a project download chunk gouging result for a
   273  // given allowance and price table ID.
   274  type pdcGougingResult struct {
   275  	staticAllowance skymodules.Allowance
   276  	staticPTID      modules.UniqueID
   277  	staticIsGouging error
   278  }
   279  
   280  // pcwsGougingCache is a helper type to cache pcws gouging results for workers.
   281  type pcwsGougingCache struct {
   282  	staticCache map[string]map[int]pcwsGougingResult
   283  	mu          sync.Mutex
   284  }
   285  
   286  // pcwsGougingResult contains a project chunk workerset gouging result for a
   287  // given allowance, number of workers and price table.
   288  type pcwsGougingResult struct {
   289  	staticAllowance  skymodules.Allowance
   290  	staticNumWorkers int
   291  	staticPTID       modules.UniqueID
   292  	staticIsGouging  error
   293  }
   294  
   295  // sortedDownloadWorker is a helper type for working workers by completeChance.
   296  type sortedDownloadWorker struct {
   297  	originalIndex  int
   298  	completeChance float64
   299  }
   300  
   301  // sortedDownloadWorkers is a helper type to implement the sort.Interface
   302  // interface.
   303  type sortedDownloadWorkers []sortedDownloadWorker
   304  
   305  // Len returns the length of the slice.
   306  func (sdw sortedDownloadWorkers) Len() int { return len(sdw) }
   307  
   308  // Less returns whether the completeChance at index i is less than at index j.
   309  func (sdw sortedDownloadWorkers) Less(i, j int) bool {
   310  	return sdw[i].completeChance > sdw[j].completeChance
   311  }
   312  
   313  // Swap swaps two workers in the slice.
   314  func (sdw sortedDownloadWorkers) Swap(i, j int) {
   315  	sdw[i], sdw[j] = sdw[j], sdw[i]
   316  }
   317  
   318  // sortedIndividualWorkers is a helper type to implement the sort.Interface
   319  // interface.
   320  type sortedIndividualWorkers []*individualWorker
   321  
   322  // Len returns the length of the slice.
   323  func (siw sortedIndividualWorkers) Len() int { return len(siw) }
   324  
   325  // Less returns whether the cachedCompleteChance at index i is less than at
   326  // index j.
   327  func (siw sortedIndividualWorkers) Less(i, j int) bool {
   328  	eRTI := siw[i].cachedCompleteChance
   329  	eRTJ := siw[j].cachedCompleteChance
   330  	return eRTI > eRTJ
   331  }
   332  
   333  // Swap swaps two workers in the slice.
   334  func (siw sortedIndividualWorkers) Swap(i, j int) {
   335  	siw[i], siw[j] = siw[j], siw[i]
   336  }
   337  
   338  // NewChimeraWorker returns a new chimera worker object.
   339  func NewChimeraWorker(workers []*individualWorker, identifier uint32) *chimeraWorker {
   340  	// calculate the average cost and average (weighted) complete chance
   341  	var totalCompleteChance float64
   342  	var totalCost float64
   343  	for _, w := range workers {
   344  		// sanity check the worker is unresolved
   345  		if w.isResolved() {
   346  			build.Critical("developer error, a chimera is built using unresolved workers only")
   347  		}
   348  
   349  		totalCompleteChance += w.cachedCompleteChance * w.staticAvailabilityRate
   350  		totalCost += w.staticCost
   351  	}
   352  
   353  	totalWorkers := float64(len(workers))
   354  	avgChance := totalCompleteChance / totalWorkers
   355  	avgCost := totalCost / totalWorkers
   356  
   357  	return &chimeraWorker{
   358  		staticChanceComplete: avgChance,
   359  		staticCost:           avgCost,
   360  		staticIdentifier:     identifier,
   361  	}
   362  }
   363  
   364  // completeChanceCached returns the chance this chimera completes
   365  func (cw *chimeraWorker) completeChanceCached() float64 {
   366  	return cw.staticChanceComplete
   367  }
   368  
   369  // cost returns the cost for this chimera worker, this method can only be called
   370  // on a chimera that is finalized
   371  func (cw *chimeraWorker) cost() float64 {
   372  	return cw.staticCost
   373  }
   374  
   375  // getPieceForDownload returns the piece to download next, for a chimera worker
   376  // this is always 0 and should never be called, which is why we add a
   377  // build.Critical to signal developer error.
   378  func (cw *chimeraWorker) getPieceForDownload() uint64 {
   379  	build.Critical("developer error, should not get called on a chimera worker")
   380  	return 0
   381  }
   382  
   383  // identifier returns a unqiue identifier for this worker.
   384  func (cw *chimeraWorker) identifier() uint32 {
   385  	return cw.staticIdentifier
   386  }
   387  
   388  // markPieceForDownload takes a piece index and marks it as the piece to
   389  // download for this worker. In the case of a chimera worker this method is
   390  // essentially a no-op since chimera workers are never launched
   391  func (cw *chimeraWorker) markPieceForDownload(pieceIndex uint64) {
   392  	// this is a no-op
   393  }
   394  
   395  // pieces returns the piece indices this worker can download, chimera workers
   396  // return all pieces as we don't know yet what pieces they can resolve, note
   397  // that all possible piece indices are defined on the pdc to avoid unnecessary
   398  // slice allocations for every chimera
   399  func (cw *chimeraWorker) pieces(pdc *projectDownloadChunk) []uint64 {
   400  	return pdc.staticPieceIndices
   401  }
   402  
   403  // worker returns the worker, for chimeras this is always nil since it's a
   404  // combination of multiple workers
   405  func (cw *chimeraWorker) worker() *worker {
   406  	return nil
   407  }
   408  
   409  // cost returns the cost for this worker, depending on whether it is launched or
   410  // not it will return either 0, or the static cost variable.
   411  func (iw *individualWorker) cost() float64 {
   412  	if iw.isLaunched() {
   413  		return 0
   414  	}
   415  	return iw.staticCost
   416  }
   417  
   418  // recalculateDistributionChances gets called when the download algorithm
   419  // decides it has to recalculate the chances that are based on the worker's
   420  // distributions. This function will apply the necessary shifts and recalculate
   421  // the cached fields.
   422  func (iw *individualWorker) recalculateDistributionChances() {
   423  	// if the read dt chances are not initialized, initialize them first
   424  	if !iw.cachedReadDTChancesInitialized {
   425  		iw.cachedReadDTChances = iw.staticReadDistribution.ChancesAfter()
   426  		iw.cachedReadDTChancesInitialized = true
   427  	}
   428  
   429  	// if the worker is launched, we want to shift the read dt
   430  	if iw.isLaunched() {
   431  		readDT := iw.staticReadDistribution.Clone()
   432  		readDT.Shift(time.Since(iw.currentPieceLaunchedAt))
   433  		iw.cachedReadDTChances = readDT.ChancesAfter()
   434  	}
   435  
   436  	// if the worker is not resolved yet, we want to always shift the lookup dt
   437  	// and use that to recalculate the expected duration index
   438  	if !iw.isResolved() {
   439  		shift := time.Since(iw.staticDownloadLaunchTime)
   440  		ed := iw.staticLookupDistribution.ExpectedDurationWithShift(shift)
   441  		iw.cachedLookupIndex = skymodules.DistributionBucketIndexForDuration(ed)
   442  	}
   443  }
   444  
   445  // recalculateCompleteChance calculates the chance this worker completes at
   446  // given index. This chance is a combination of the chance it resolves and the
   447  // chance it completes the read by the given index. The resolve (or lookup)
   448  // chance only plays a part for workers that have not resolved yet.
   449  //
   450  // This function calculates the complete chance by approximation, meaning if we
   451  // request the complete chance at 200ms, for an unresolved worker, we will
   452  // offset the read chance with the expected duration of the lookup DT. E.g. if
   453  // the lookup DT's expected duration is 40ms, we return the complete chance at
   454  // 160ms. Instead of durations though, we use the indices that correspond to the
   455  // durations.
   456  func (iw *individualWorker) recalculateCompleteChance(index int) {
   457  	// if the worker is resolved, simply return the read chance at given index
   458  	if iw.isResolved() {
   459  		iw.cachedCompleteChance = iw.cachedReadDTChances[index]
   460  		return
   461  	}
   462  
   463  	// if it's not resolved, and the index is smaller than our cached lookup
   464  	// index, we return a complete chance of zero because it has no chance of
   465  	// completing since it's not expected to have been resolved yet
   466  	if index < iw.cachedLookupIndex {
   467  		iw.cachedCompleteChance = 0
   468  		return
   469  	}
   470  
   471  	// otherwise return the read chance offset by the lookup index
   472  	iw.cachedCompleteChance = iw.cachedReadDTChances[index-iw.cachedLookupIndex]
   473  }
   474  
   475  // completeChanceCached returns the chance this worker will complete
   476  func (iw *individualWorker) completeChanceCached() float64 {
   477  	return iw.cachedCompleteChance
   478  }
   479  
   480  // getPieceForDownload returns the piece to download next
   481  func (iw *individualWorker) getPieceForDownload() uint64 {
   482  	return iw.currentPiece
   483  }
   484  
   485  // identifier returns a unqiue identifier for this worker, for an individual
   486  // worker this is equal to the short string version of the worker's host pubkey.
   487  func (iw *individualWorker) identifier() uint32 {
   488  	return iw.staticIdentifier
   489  }
   490  
   491  // isLaunched returns true when this workers has been launched.
   492  func (iw *individualWorker) isLaunched() bool {
   493  	return !iw.currentPieceLaunchedAt.IsZero()
   494  }
   495  
   496  // isOnCooldown returns whether this individual worker is on cooldown.
   497  func (iw *individualWorker) isOnCooldown() bool {
   498  	return iw.onCoolDown
   499  }
   500  
   501  // isResolved returns whether this individual worker has resolved.
   502  func (iw *individualWorker) isResolved() bool {
   503  	return iw.resolved
   504  }
   505  
   506  // markPieceForDownload takes a piece index and marks it as the piece to
   507  // download next for this worker.
   508  func (iw *individualWorker) markPieceForDownload(pieceIndex uint64) {
   509  	// sanity check the given piece is a piece present in the worker's pieces
   510  	if build.Release == "testing" {
   511  		var found bool
   512  		for _, availPieceIndex := range iw.pieceIndices {
   513  			if pieceIndex == availPieceIndex {
   514  				found = true
   515  				break
   516  			}
   517  		}
   518  		if !found {
   519  			build.Critical(fmt.Sprintf("markPieceForDownload is marking a piece that is not present in the worker's piece indices, %v does not include %v", iw.pieceIndices, pieceIndex))
   520  		}
   521  	}
   522  	iw.currentPiece = pieceIndex
   523  }
   524  
   525  // pieces returns the piece indices this worker can download.
   526  func (iw *individualWorker) pieces(_ *projectDownloadChunk) []uint64 {
   527  	return iw.pieceIndices
   528  }
   529  
   530  // worker returns the worker.
   531  func (iw *individualWorker) worker() *worker {
   532  	return iw.staticWorker
   533  }
   534  
   535  // clone returns a shallow copy of the worker set.
   536  func (ws *workerSet) clone() *workerSet {
   537  	return &workerSet{
   538  		workers: append([]downloadWorker{}, ws.workers...),
   539  
   540  		staticBucketDuration: ws.staticBucketDuration,
   541  		staticBucketIndex:    ws.staticBucketIndex,
   542  		staticMinPieces:      ws.staticMinPieces,
   543  		staticNumOverdrive:   ws.staticNumOverdrive,
   544  
   545  		staticPDC: ws.staticPDC,
   546  	}
   547  }
   548  
   549  // cheaperSetFromCandidate returns a new worker set if the given candidate
   550  // worker can improve the cost of the worker set. The worker that is being
   551  // swapped by the candidate is the most expensive worker possible, which is not
   552  // necessarily the most expensive worker in the set because we have to take into
   553  // account the pieces the worker can download.
   554  func (ws *workerSet) cheaperSetFromCandidate(candidate downloadWorker) *workerSet {
   555  	// convenience variables
   556  	pdc := ws.staticPDC
   557  
   558  	// build two maps for fast lookups
   559  	originalIndexMap := make(map[uint32]int)
   560  	piecesToIndexMap := make(map[uint64]int)
   561  	for i, w := range ws.workers {
   562  		originalIndexMap[w.identifier()] = i
   563  		if _, ok := w.(*individualWorker); ok {
   564  			piecesToIndexMap[w.getPieceForDownload()] = i
   565  		}
   566  	}
   567  
   568  	// sort the workers by cost, most expensive to cheapest
   569  	byCostDesc := append([]downloadWorker{}, ws.workers...)
   570  	sort.Slice(byCostDesc, func(i, j int) bool {
   571  		wCostI := byCostDesc[i].cost()
   572  		wCostJ := byCostDesc[j].cost()
   573  		return wCostI > wCostJ
   574  	})
   575  
   576  	// range over the workers
   577  	swapIndex := -1
   578  LOOP:
   579  	for _, w := range byCostDesc {
   580  		// if the candidate is not cheaper than this worker we can stop looking
   581  		// to build a cheaper set since the workers are sorted by cost
   582  		if candidate.cost() >= w.cost() {
   583  			break
   584  		}
   585  
   586  		// if the current worker is launched, don't swap it out
   587  		expensiveWorkerPiece, launched, _ := pdc.workerProgress(w)
   588  		if launched {
   589  			continue
   590  		}
   591  
   592  		// if the current worker is a chimera worker, and we're cheaper, swap
   593  		expensiveWorkerIndex := originalIndexMap[w.identifier()]
   594  		if _, ok := w.(*chimeraWorker); ok {
   595  			swapIndex = expensiveWorkerIndex
   596  			break LOOP
   597  		}
   598  
   599  		// range over the candidate's pieces and see whether we can swap
   600  		for _, piece := range candidate.pieces(pdc) {
   601  			// if the candidate can download the same piece as the expensive
   602  			// worker, swap it out because it's cheaper
   603  			if piece == expensiveWorkerPiece {
   604  				swapIndex = expensiveWorkerIndex
   605  				break LOOP
   606  			}
   607  
   608  			// if the candidate can download a piece that is currently not being
   609  			// downloaded by anyone else, swap it as well
   610  			_, workerForPiece := piecesToIndexMap[piece]
   611  			if !workerForPiece {
   612  				swapIndex = expensiveWorkerIndex
   613  				break LOOP
   614  			}
   615  		}
   616  	}
   617  
   618  	if swapIndex > -1 {
   619  		cheaperSet := ws.clone()
   620  		cheaperSet.workers[swapIndex] = candidate
   621  		return cheaperSet
   622  	}
   623  	return nil
   624  }
   625  
   626  // adjustedDuration returns the cost adjusted expected duration of the worker
   627  // set using the given price per ms.
   628  func (ws *workerSet) adjustedDuration(ppms types.Currency) time.Duration {
   629  	// calculate the total cost of the worker set
   630  	var totalCost float64
   631  	for _, w := range ws.workers {
   632  		totalCost += w.cost()
   633  	}
   634  
   635  	// calculate the cost penalty using the given price per ms and apply it to
   636  	// the worker set's expected duration.
   637  	totalCostCurr := types.NewCurrency64(uint64(totalCost))
   638  	return addCostPenalty(ws.staticBucketDuration, totalCostCurr, ppms)
   639  }
   640  
   641  // chancesAfter is a small helper function that returns a list of every worker's
   642  // chance it's completed after the given duration.
   643  func (ws *workerSet) chancesAfter() coinflips {
   644  	chances := make(coinflips, len(ws.workers))
   645  	for i, w := range ws.workers {
   646  		chances[i] = w.completeChanceCached()
   647  	}
   648  	return chances
   649  }
   650  
   651  // chanceGreaterThanHalf returns whether the total chance this worker set
   652  // completes the download before the given duration is more than 50%.
   653  //
   654  // NOTE: this function abstracts the chance a worker resolves after the given
   655  // duration as a coinflip to make it easier to reason about the problem given
   656  // that the workerset consists out of one or more overdrive workers.
   657  func (ws *workerSet) chanceGreaterThanHalf() bool {
   658  	// convert every worker into a coinflip
   659  	coinflips := ws.chancesAfter()
   660  
   661  	var chance float64
   662  	switch ws.staticNumOverdrive {
   663  	case 0:
   664  		// if we don't have to consider any overdrive workers, the chance it's
   665  		// all heads is the chance that needs to be greater than half
   666  		chance = coinflips.chanceAllHeads()
   667  	case 1:
   668  		// if there is 1 overdrive worker, we can essentially have one of the
   669  		// coinflips come up as tails, as long as all the others are heads
   670  		chance = coinflips.chanceHeadsAllowOneTails()
   671  	case 2:
   672  		// if there are 2 overdrive workers, we can have two of them come up as
   673  		// tails, as long as all the others are heads
   674  		chance = coinflips.chanceHeadsAllowTwoTails()
   675  	default:
   676  		// if there are a lot of overdrive workers, we use an approximation by
   677  		// summing all coinflips to see whether we are expected to be able to
   678  		// download min pieces within the given duration
   679  		return coinflips.chanceSum() > float64(ws.staticMinPieces)
   680  	}
   681  
   682  	return chance > 0.5
   683  }
   684  
   685  // chanceAllHeads returns the chance all coins show heads.
   686  func (cf coinflips) chanceAllHeads() float64 {
   687  	if len(cf) == 0 {
   688  		return 0
   689  	}
   690  
   691  	chanceAllHeads := float64(1)
   692  	for _, chanceHead := range cf {
   693  		chanceAllHeads *= chanceHead
   694  	}
   695  	return chanceAllHeads
   696  }
   697  
   698  // chanceHeadsAllowOneTails returns the chance at least n-1 coins show heads
   699  // where n is the amount of coins.
   700  func (cf coinflips) chanceHeadsAllowOneTails() float64 {
   701  	chanceAllHeads := cf.chanceAllHeads()
   702  
   703  	totalChance := chanceAllHeads
   704  	for _, chanceHead := range cf {
   705  		chanceTails := 1 - chanceHead
   706  		totalChance += (chanceAllHeads / chanceHead * chanceTails)
   707  	}
   708  	return totalChance
   709  }
   710  
   711  // chanceHeadsAllowTwoTails returns the chance at least n-2 coins show heads
   712  // where n is the amount of coins.
   713  func (cf coinflips) chanceHeadsAllowTwoTails() float64 {
   714  	chanceAllHeads := cf.chanceAllHeads()
   715  	totalChance := cf.chanceHeadsAllowOneTails()
   716  
   717  	for i := 0; i < len(cf)-1; i++ {
   718  		chanceIHeads := cf[i]
   719  		chanceITails := 1 - chanceIHeads
   720  		chanceOnlyITails := chanceAllHeads / chanceIHeads * chanceITails
   721  		for jj := i + 1; jj < len(cf); jj++ {
   722  			chanceJHeads := cf[jj]
   723  			chanceJTails := 1 - chanceJHeads
   724  			chanceOnlyIAndJJTails := chanceOnlyITails / chanceJHeads * chanceJTails
   725  			totalChance += chanceOnlyIAndJJTails
   726  		}
   727  	}
   728  	return totalChance
   729  }
   730  
   731  // chanceSum returns the sum of all chances
   732  func (cf coinflips) chanceSum() float64 {
   733  	var sum float64
   734  	for _, flip := range cf {
   735  		sum += flip
   736  	}
   737  	return sum
   738  }
   739  
   740  // updateWorkers will update the given set of workers in-place, we update the
   741  // workers instead of recreating them because we found that the process of
   742  // creating an individualWorker involves some cpu intensive steps, like gouging.
   743  // By updating them, rather than recreating them, we avoid doing these
   744  // computations in every iteration of the download algorithm.
   745  func (pdc *projectDownloadChunk) updateWorkers(workers []*individualWorker) []*individualWorker {
   746  	ws := pdc.workerState
   747  	ws.mu.Lock()
   748  	defer ws.mu.Unlock()
   749  
   750  	// make a map of all resolved workers to their piece indices
   751  	resolved := make(map[string][]uint64, len(workers))
   752  	for _, rw := range ws.resolvedWorkers {
   753  		resolved[rw.worker.staticHostPubKeyStr] = rw.pieceIndices
   754  	}
   755  
   756  	// loop over all workers and update the resolved status and piece indices
   757  	for i := 0; i < len(workers); i++ {
   758  		w := workers[i]
   759  
   760  		pieceIndices, resolved := resolved[w.staticWorker.staticHostPubKeyStr]
   761  		if !w.isResolved() && resolved {
   762  			w.resolved = true
   763  			w.pieceIndices = pieceIndices
   764  			if len(w.pieceIndices) == 0 {
   765  				// if the worker resolved and doesn't have any
   766  				// pieces, remove it from the workers by
   767  				// swapping it to the end and shrinking the
   768  				// slice by 1.
   769  				workers[i], workers[len(workers)-1] = workers[len(workers)-1], workers[i]
   770  				workers = workers[:len(workers)-1]
   771  				i--
   772  				continue
   773  			}
   774  		}
   775  
   776  		// check whether the worker is on cooldown
   777  		hsq := w.staticWorker.staticJobHasSectorQueue
   778  		rjq := w.staticWorker.staticJobReadQueue
   779  		w.onCoolDown = hsq.callOnCooldown() || rjq.callOnCooldown()
   780  
   781  		// recalculate the distributions
   782  		w.recalculateDistributionChances()
   783  	}
   784  	return workers
   785  }
   786  
   787  // workers returns both resolved and unresolved workers as a single slice of
   788  // individual workers
   789  func (pdc *projectDownloadChunk) workers() []*individualWorker {
   790  	ws := pdc.workerState
   791  	ws.mu.Lock()
   792  	defer ws.mu.Unlock()
   793  
   794  	workers := make([]*individualWorker, 0, len(ws.resolvedWorkers)+len(ws.unresolvedWorkers))
   795  
   796  	// convenience variables
   797  	ec := pdc.workerSet.staticErasureCoder
   798  	length := pdc.pieceLength
   799  	numPieces := ec.NumPieces()
   800  
   801  	iws := make([]individualWorker, cap(workers))
   802  
   803  	// add all resolved workers that are deemed good for downloading
   804  	var ldt *skymodules.DistributionTracker
   805  	var rdt *skymodules.DistributionTracker
   806  	var jrq *jobReadQueue
   807  	var hsq *jobHasSectorQueue
   808  	var iw *individualWorker
   809  	var cost float64
   810  	for _, rw := range ws.resolvedWorkers {
   811  		if !isGoodForDownload(rw.worker, rw.pieceIndices) {
   812  			continue
   813  		}
   814  
   815  		jrq = rw.worker.staticJobReadQueue
   816  		rdt = jrq.staticStats.distributionTrackerForLength(length)
   817  		cost, _ = jrq.callExpectedJobCost(length).Float64()
   818  		hsq = rw.worker.staticJobHasSectorQueue
   819  		ldt = hsq.staticDT
   820  
   821  		iw = &iws[len(workers)] //staticPoolIndividualWorkers.Get()
   822  		iw.resolved = true
   823  		iw.pieceIndices = rw.pieceIndices
   824  		iw.onCoolDown = jrq.callOnCooldown() || hsq.callOnCooldown()
   825  		iw.staticAvailabilityRate = hsq.callAvailabilityRate(numPieces)
   826  		iw.staticCost = cost
   827  		iw.staticDownloadLaunchTime = time.Now()
   828  		iw.staticIdentifier = uint32(len(workers))
   829  		iw.staticLookupDistribution = ldt.Distribution(0)
   830  		iw.staticReadDistribution = rdt.Distribution(0)
   831  		iw.staticWorker = rw.worker
   832  		workers = append(workers, iw)
   833  	}
   834  
   835  	// add all unresolved workers that are deemed good for downloading
   836  	for _, uw := range ws.unresolvedWorkers {
   837  		// exclude workers that are not useful
   838  		w := uw.staticWorker
   839  		if !isGoodForDownload(w, pdc.staticPieceIndices) {
   840  			continue
   841  		}
   842  
   843  		jrq = w.staticJobReadQueue
   844  		rdt = jrq.staticStats.distributionTrackerForLength(length)
   845  		hsq = w.staticJobHasSectorQueue
   846  		ldt = hsq.staticDT
   847  
   848  		iw = &iws[len(workers)] //staticPoolIndividualWorkers.Get()
   849  		cost, _ = jrq.callExpectedJobCost(length).Float64()
   850  		iw.resolved = false
   851  		iw.pieceIndices = pdc.staticPieceIndices
   852  		iw.onCoolDown = jrq.callOnCooldown() || hsq.callOnCooldown()
   853  
   854  		iw.staticAvailabilityRate = hsq.callAvailabilityRate(numPieces)
   855  		iw.staticCost = cost
   856  		iw.staticDownloadLaunchTime = time.Now()
   857  		iw.staticIdentifier = uint32(len(workers))
   858  		iw.staticLookupDistribution = ldt.Distribution(0)
   859  		iw.staticReadDistribution = rdt.Distribution(0)
   860  		iw.staticWorker = w
   861  		workers = append(workers, iw)
   862  	}
   863  
   864  	return workers
   865  }
   866  
   867  // workerProgress returns the piece that was marked on the worker to download
   868  // next, alongside two booleans that indicate whether it was launched and
   869  // whether it completed.
   870  func (pdc *projectDownloadChunk) workerProgress(w downloadWorker) (uint64, bool, bool) {
   871  	// return defaults if the worker is a chimera worker, those are not
   872  	// downloading by definition
   873  	iw, ok := w.(*individualWorker)
   874  	if !ok {
   875  		return 0, false, false
   876  	}
   877  
   878  	// get the marked piece for this worker
   879  	currentPiece := w.getPieceForDownload()
   880  
   881  	// fetch the worker's download progress, if that does not exist, it's
   882  	// neither launched nor completed.
   883  	workerProgress, exists := pdc.workerProgressMap[iw.identifier()]
   884  	if !exists {
   885  		return currentPiece, false, false
   886  	}
   887  
   888  	_, launched := workerProgress.launchedPieces[currentPiece]
   889  	_, completed := workerProgress.completedPieces[currentPiece]
   890  	return currentPiece, launched, completed
   891  }
   892  
   893  // launchWorkerSet will range over the workers in the given worker set and will
   894  // try to launch every worker that has not yet been launched and is ready to
   895  // launch.
   896  func (pdc *projectDownloadChunk) launchWorkerSet(ws *workerSet) {
   897  	// convenience variables
   898  	minPieces := pdc.workerSet.staticErasureCoder.MinPieces()
   899  
   900  	// range over all workers in the set and launch if possible
   901  	for _, w := range ws.workers {
   902  		// continue if the worker is a chimera worker
   903  		iw, ok := w.(*individualWorker)
   904  		if !ok {
   905  			continue
   906  		}
   907  
   908  		// continue if the worker is already launched
   909  		piece, isLaunched, _ := pdc.workerProgress(w)
   910  		if isLaunched {
   911  			continue
   912  		}
   913  
   914  		// launch the worker
   915  		isOverdrive := len(pdc.launchedWorkers) >= minPieces
   916  		_, gotLaunched := pdc.launchWorker(iw, piece, isOverdrive)
   917  
   918  		// log the event in case we launched a worker
   919  		if gotLaunched {
   920  			if span := opentracing.SpanFromContext(pdc.ctx); span != nil {
   921  				span.LogKV(
   922  					"aWorkerLaunched", w.identifier(),
   923  					"piece", piece,
   924  					"overdriveWorker", isOverdrive,
   925  					"wsDuration", ws.staticBucketDuration,
   926  					"wsIndex", ws.staticBucketIndex,
   927  				)
   928  			}
   929  		}
   930  	}
   931  	return
   932  }
   933  
   934  // threadedLaunchProjectDownload performs the main download loop, every
   935  // iteration we update the pdc's available pieces, construct a new worker set
   936  // and launch every worker that can be launched from that set. Every iteration
   937  // we check whether the download was finished.
   938  func (pdc *projectDownloadChunk) threadedLaunchProjectDownload() {
   939  	// grab some variables
   940  	ws := pdc.workerState
   941  	ec := pdc.workerSet.staticErasureCoder
   942  
   943  	// grab the workers from the pdc, every iteration we will update this set of
   944  	// workers to avoid needless performing gouging checks on every iteration
   945  	workers := pdc.workers()
   946  
   947  	// verify we have enough workers to complete the download
   948  	if len(workers) < ec.MinPieces() {
   949  		pdc.fail(errors.Compose(ErrRootNotFound, errors.AddContext(errNotEnoughWorkers, fmt.Sprintf("%v < %v", len(workers), ec.MinPieces()))))
   950  		return
   951  	}
   952  
   953  	// Allocate some memory outside of the loop to reduce the number of
   954  	// allocations within.
   955  	ds := &bufferedDownloadState{
   956  		downloadWorkers:       make([]downloadWorker, 0, len(workers)),
   957  		mostLikely:            make([]downloadWorker, 0, maxOverdriveWorkers+pdc.workerSet.staticErasureCoder.MinPieces()),
   958  		lessLikely:            make([]downloadWorker, 0, len(workers)),
   959  		pieces:                make(map[uint64]struct{}, pdc.workerSet.staticErasureCoder.NumPieces()),
   960  		added:                 make(map[uint32]struct{}, len(workers)),
   961  		sortedDownloadWorkers: make([]sortedDownloadWorker, 0, len(workers)),
   962  	}
   963  
   964  	// register for a worker update chan
   965  	workerUpdateChan := ws.managedRegisterForWorkerUpdate()
   966  	prevWorkerUpdate := time.Now()
   967  
   968  	var maxTimer *time.Timer
   969  	defer func() {
   970  		if maxTimer != nil {
   971  			if !maxTimer.Stop() {
   972  				select {
   973  				case <-maxTimer.C:
   974  				default:
   975  				}
   976  			}
   977  		}
   978  	}()
   979  
   980  	for {
   981  		// update the pieces
   982  		updated := pdc.updatePieces()
   983  
   984  		// update the workers
   985  		if updated || time.Since(prevWorkerUpdate) > maxWaitUpdateWorkers {
   986  			workers = pdc.updateWorkers(workers)
   987  			prevWorkerUpdate = time.Now()
   988  		}
   989  
   990  		// create a worker set and launch it
   991  		workerSet, err := pdc.createWorkerSet(workers, ds)
   992  		if err != nil {
   993  			pdc.fail(err)
   994  			return
   995  		}
   996  		if workerSet != nil {
   997  			pdc.launchWorkerSet(workerSet)
   998  		}
   999  
  1000  		// Drain and reset timer if necessary.
  1001  		if maxTimer != nil {
  1002  			if !maxTimer.Stop() {
  1003  				select {
  1004  				case <-maxTimer.C:
  1005  				default:
  1006  				}
  1007  			}
  1008  			maxTimer.Reset(maxWaitUnresolvedWorkerUpdate)
  1009  		} else {
  1010  			maxTimer = time.NewTimer(maxWaitUnresolvedWorkerUpdate)
  1011  		}
  1012  
  1013  		// iterate
  1014  		select {
  1015  		case <-maxTimer.C:
  1016  			// recreate the workerset after maxwait
  1017  		case <-workerUpdateChan:
  1018  			// replace the worker update channel
  1019  			workerUpdateChan = ws.managedRegisterForWorkerUpdate()
  1020  		case jrr := <-pdc.workerResponseChan:
  1021  			pdc.handleJobReadResponse(jrr)
  1022  		case <-pdc.ctx.Done():
  1023  			pdc.fail(ErrProjectTimedOut)
  1024  			return
  1025  		}
  1026  
  1027  		// check whether the download is completed
  1028  		completed, err := pdc.finished()
  1029  		if completed {
  1030  			pdc.finalize()
  1031  			return
  1032  		}
  1033  		if err != nil {
  1034  			pdc.fail(err)
  1035  			return
  1036  		}
  1037  	}
  1038  }
  1039  
  1040  // createWorkerSet tries to create a worker set from the pdc's resolved and
  1041  // unresolved workers, the maximum amount of overdrive workers in the set is
  1042  // defined by the given 'maxOverdriveWorkers' argument.
  1043  func (pdc *projectDownloadChunk) createWorkerSet(workers []*individualWorker, ds *bufferedDownloadState) (*workerSet, error) {
  1044  	// can't create a workerset without download workers
  1045  	if len(workers) == 0 {
  1046  		return nil, nil
  1047  	}
  1048  
  1049  	// convenience variables
  1050  	ppms := pdc.pricePerMS
  1051  	minPieces := pdc.workerSet.staticErasureCoder.MinPieces()
  1052  
  1053  	// loop state
  1054  	var bestSet *workerSet
  1055  	var numOverdrive int
  1056  	var bI int
  1057  
  1058  	// start numOverdrive at 1 if the dependency is set
  1059  	if pdc.workerState.staticDeps.Disrupt("OverdriveDownload") {
  1060  		numOverdrive = 1
  1061  	}
  1062  
  1063  	// approximate the bucket index by iterating over all bucket indices using a
  1064  	// step size greater than 1, once we've found the best set, we range over
  1065  	// bI-stepsize|bi+stepSize to find the best bucket index
  1066  OUTER:
  1067  	for ; numOverdrive <= maxOverdriveWorkers; numOverdrive++ {
  1068  		for bI = 0; bI <= skymodules.DistributionTrackerTotalBuckets; bI += bucketIndexScanStep {
  1069  			if bI == skymodules.DistributionTrackerTotalBuckets {
  1070  				bI--
  1071  			}
  1072  			// create the worker set
  1073  			bDur := skymodules.DistributionDurationForBucketIndex(bI)
  1074  			mostLikelySet, escape := pdc.createWorkerSetInner(workers, minPieces, numOverdrive, bI, bDur, ds)
  1075  			if escape {
  1076  				break OUTER
  1077  			}
  1078  			if mostLikelySet == nil {
  1079  				continue
  1080  			}
  1081  
  1082  			// perform price per ms comparison
  1083  			if bestSet == nil {
  1084  				bestSet = mostLikelySet
  1085  			} else if mostLikelySet.adjustedDuration(ppms) < bestSet.adjustedDuration(ppms) {
  1086  				bestSet = mostLikelySet
  1087  			}
  1088  
  1089  			// exit early if ppms in combination with the bucket duration
  1090  			// already exceeds the adjusted cost of the current best set,
  1091  			// workers would be too slow by definition
  1092  			if bestSet != nil && bDur > bestSet.adjustedDuration(ppms) {
  1093  				break OUTER
  1094  			}
  1095  		}
  1096  	}
  1097  
  1098  	// if we haven't found a set, no need to try and find the optimal index
  1099  	if bestSet == nil {
  1100  		return nil, nil
  1101  	}
  1102  
  1103  	// after we've found one, range over bI-12 -> bI+12 to find the optimal
  1104  	// bucket index
  1105  	bIMin, bIMax := bucketIndexRange(bI)
  1106  	for bI = bIMin; bI < bIMax; bI++ {
  1107  		// create the worker set
  1108  		bDur := skymodules.DistributionDurationForBucketIndex(bI)
  1109  		mostLikelySet, escape := pdc.createWorkerSetInner(workers, minPieces, numOverdrive, bI, bDur, ds)
  1110  		if escape {
  1111  			break
  1112  		}
  1113  		if mostLikelySet == nil {
  1114  			continue
  1115  		}
  1116  
  1117  		// perform price per ms comparison
  1118  		if bestSet == nil {
  1119  			bestSet = mostLikelySet
  1120  		} else if mostLikelySet.adjustedDuration(ppms) < bestSet.adjustedDuration(ppms) {
  1121  			bestSet = mostLikelySet
  1122  		}
  1123  
  1124  		// exit early if ppms in combination with the bucket duration
  1125  		// already exceeds the adjusted cost of the current best set,
  1126  		// workers would be too slow by definition
  1127  		if bestSet != nil && bDur > bestSet.adjustedDuration(ppms) {
  1128  			break
  1129  		}
  1130  	}
  1131  
  1132  	return bestSet, nil
  1133  }
  1134  
  1135  // createWorkerSetInner is the inner loop that is called by createWorkerSet, it
  1136  // tries to create a worker set from the given list of workers, taking into
  1137  // account the given amount of workers and overdrive workers, but also the given
  1138  // bucket duration. It returns a workerset, and a boolean that indicates whether
  1139  // we want to break out of the (outer) loop that surrounds this function call.
  1140  func (pdc *projectDownloadChunk) createWorkerSetInner(workers []*individualWorker, minPieces, numOverdrive, bI int, bDur time.Duration, ds *bufferedDownloadState) (*workerSet, bool) {
  1141  	// reset the buffered state
  1142  	ds.Reset()
  1143  
  1144  	workersNeeded := minPieces + numOverdrive
  1145  
  1146  	// recalculate the complete chance at given index
  1147  	for _, w := range workers {
  1148  		w.recalculateCompleteChance(bI)
  1149  	}
  1150  
  1151  	// build the download workers
  1152  	downloadWorkers := pdc.buildDownloadWorkers(workers, ds)
  1153  
  1154  	// divide the workers in most likely and less likely
  1155  	mostLikely, lessLikely := pdc.splitMostlikelyLessLikely(downloadWorkers, workersNeeded, ds)
  1156  
  1157  	// if there aren't even likely workers, escape early
  1158  	if len(mostLikely) == 0 {
  1159  		return nil, true
  1160  	}
  1161  
  1162  	// build the most likely set
  1163  	mostLikelySet := &workerSet{
  1164  		workers: mostLikely,
  1165  
  1166  		staticBucketDuration: bDur,
  1167  		staticBucketIndex:    bI,
  1168  		staticNumOverdrive:   numOverdrive,
  1169  		staticMinPieces:      minPieces,
  1170  
  1171  		staticPDC: pdc,
  1172  	}
  1173  
  1174  	// if the chance of the most likely set does not exceed 50%, it is
  1175  	// not high enough to continue, no need to continue this iteration,
  1176  	// we need to try a slower and thus more likely bucket
  1177  	//
  1178  	// NOTE: this 50% value is arbitrary, it actually even means that in 50% of
  1179  	// all cases we fall at the other side of the fence... tweaking this value
  1180  	// and calculating how often we run a bad worker set is part of the download
  1181  	// improvements listed at the top of this file.
  1182  	if !mostLikelySet.chanceGreaterThanHalf() {
  1183  		return nil, false
  1184  	}
  1185  
  1186  	// now loop the less likely workers and try and swap them with the
  1187  	// most expensive workers in the most likely set
  1188  	for _, w := range lessLikely {
  1189  		cheaperSet := mostLikelySet.cheaperSetFromCandidate(w)
  1190  		if cheaperSet == nil {
  1191  			continue
  1192  		}
  1193  
  1194  		// if the cheaper set's chance of completing before the given
  1195  		// duration is not greater than half we can break because the
  1196  		// `lessLikely` workers were sorted by chance
  1197  		if !cheaperSet.chanceGreaterThanHalf() {
  1198  			break
  1199  		}
  1200  
  1201  		mostLikelySet = cheaperSet
  1202  	}
  1203  
  1204  	return mostLikelySet, false
  1205  }
  1206  
  1207  // addCostPenalty takes a certain job time and adds a penalty to it depending on
  1208  // the jobcost and the pdc's price per MS.
  1209  func addCostPenalty(jobTime time.Duration, jobCost, pricePerMS types.Currency) time.Duration {
  1210  	// If the pricePerMS is higher or equal than the cost of the job, simply
  1211  	// return without penalty.
  1212  	if pricePerMS.Cmp(jobCost) >= 0 {
  1213  		return jobTime
  1214  	}
  1215  
  1216  	// Otherwise, add a penalty
  1217  	var adjusted time.Duration
  1218  	penalty, err := jobCost.Div(pricePerMS).Uint64()
  1219  
  1220  	// because we multiply the penalty with milliseconds and add the jobtime we
  1221  	// have to check for overflows quite extensively, define a max penalty which
  1222  	// we'll then compare with the job time to see whether we can safely
  1223  	// calculate the adjusted duration
  1224  	penaltyMaxCheck := math.MaxInt64 / int64(time.Millisecond)
  1225  	if err != nil || penalty > math.MaxInt64 {
  1226  		adjusted = time.Duration(math.MaxInt64)
  1227  	} else if reduced := penaltyMaxCheck - int64(penalty); int64(jobTime) > reduced {
  1228  		adjusted = time.Duration(math.MaxInt64)
  1229  	} else {
  1230  		adjusted = jobTime + (time.Duration(penalty) * time.Millisecond)
  1231  	}
  1232  	return adjusted
  1233  }
  1234  
  1235  // buildChimeraWorkers turns a list of individual workers into chimera workers.
  1236  func (pdc *projectDownloadChunk) buildChimeraWorkers(unresolvedWorkers []*individualWorker, lowestChimeraIdentifier uint32) []downloadWorker {
  1237  	// sort workers by chance they complete
  1238  	sort.Sort(sortedIndividualWorkers(unresolvedWorkers))
  1239  
  1240  	// create an array that will hold all chimera workers
  1241  	chimeras := make([]downloadWorker, 0, len(unresolvedWorkers))
  1242  
  1243  	// create some loop state
  1244  	currAvail := float64(0)
  1245  	start := 0
  1246  
  1247  	// loop over the unresolved workers
  1248  	for curr := 0; curr < len(unresolvedWorkers); curr++ {
  1249  		currAvail += unresolvedWorkers[curr].staticAvailabilityRate
  1250  		if currAvail >= chimeraAvailabilityRateThreshold {
  1251  			end := curr + 1
  1252  			chimera := NewChimeraWorker(unresolvedWorkers[start:end], lowestChimeraIdentifier)
  1253  			lowestChimeraIdentifier++
  1254  			chimeras = append(chimeras, chimera)
  1255  
  1256  			// reset loop state
  1257  			start = end
  1258  			currAvail = 0
  1259  		}
  1260  	}
  1261  	return chimeras
  1262  }
  1263  
  1264  // buildDownloadWorkers is a helper function that takes a list of individual
  1265  // workers and turns them into download workers.
  1266  func (pdc *projectDownloadChunk) buildDownloadWorkers(workers []*individualWorker, ds *bufferedDownloadState) []downloadWorker {
  1267  	// create an array of download workers
  1268  	downloadWorkers := ds.downloadWorkers
  1269  
  1270  	// split the workers into resolved and unresolved workers, the resolved
  1271  	// workers can be added directly to the array of download workers
  1272  	resolvedWorkers, unresolvedWorkers := splitResolvedUnresolved(workers)
  1273  	for _, rw := range resolvedWorkers {
  1274  		downloadWorkers = append(downloadWorkers, rw)
  1275  	}
  1276  
  1277  	// the unresolved workers are used to build chimeras with
  1278  	chimeraWorkers := pdc.buildChimeraWorkers(unresolvedWorkers, uint32(len(workers)))
  1279  	return append(downloadWorkers, chimeraWorkers...)
  1280  }
  1281  
  1282  // splitMostlikelyLessLikely takes a list of download workers alongside a
  1283  // duration and an amount of workers that are needed for the most likely set of
  1284  // workers to compplete a download (this is not necessarily equal to 'minPieces'
  1285  // workers but also takes into account an amount of overdrive workers). This
  1286  // method will split the given workers array in a list of most likely workers,
  1287  // and a list of less likely workers.
  1288  func (pdc *projectDownloadChunk) splitMostlikelyLessLikely(workers []downloadWorker, workersNeeded int, ds *bufferedDownloadState) ([]downloadWorker, []downloadWorker) {
  1289  	// prepare two slices that hold the workers which are most likely and the
  1290  	// ones that are less likely
  1291  	mostLikely := ds.mostLikely
  1292  	lessLikely := ds.lessLikely
  1293  
  1294  	// define some state variables to ensure we select workers in a way the
  1295  	// pieces are unique and we are not using a worker twice
  1296  	pieces := ds.pieces
  1297  	added := ds.added
  1298  
  1299  	// add worker is a helper function that adds a worker to either the most
  1300  	// likely or less likely worker array and updates our state variables
  1301  	addWorker := func(w downloadWorker, pieceIndex uint64) {
  1302  		if len(mostLikely) < workersNeeded {
  1303  			mostLikely = append(mostLikely, w)
  1304  		} else {
  1305  			lessLikely = append(lessLikely, w)
  1306  		}
  1307  
  1308  		added[w.identifier()] = struct{}{}
  1309  		pieces[pieceIndex] = struct{}{}
  1310  		w.markPieceForDownload(pieceIndex)
  1311  	}
  1312  
  1313  	// sort the workers by percentage chance they complete after the current
  1314  	// bucket duration, essentially sorting them from most to least likely
  1315  	sdw := ds.sortedDownloadWorkers
  1316  	for i := range workers {
  1317  		sdw = append(sdw, sortedDownloadWorker{
  1318  			originalIndex:  i,
  1319  			completeChance: workers[i].completeChanceCached(),
  1320  		})
  1321  	}
  1322  	sort.Sort(&sdw)
  1323  
  1324  	// loop over the workers and try to add them
  1325  	for _, sw := range sdw {
  1326  		w := workers[sw.originalIndex]
  1327  		// workers that have in-progress downloads are re-added as long as we
  1328  		// don't already have a worker for the piece they are downloading
  1329  		currPiece, launched, completed := pdc.workerProgress(w)
  1330  		if launched && !completed {
  1331  			_, exists := pieces[currPiece]
  1332  			if !exists {
  1333  				addWorker(w, currPiece)
  1334  				continue
  1335  			}
  1336  		}
  1337  
  1338  		// loop the worker's pieces to see whether it can download a piece for
  1339  		// which we don't have a worker yet or which we haven't downloaded yet
  1340  		for _, pieceIndex := range w.pieces(pdc) {
  1341  			if pdc.piecesInfo[pieceIndex].downloaded {
  1342  				continue
  1343  			}
  1344  
  1345  			_, exists := pieces[pieceIndex]
  1346  			if exists {
  1347  				continue
  1348  			}
  1349  
  1350  			addWorker(w, pieceIndex)
  1351  			break // only use a worker once
  1352  		}
  1353  	}
  1354  
  1355  	// loop over the workers again to fill both the most likely and less likely
  1356  	// array with the remainder of the workers, still ensuring a worker is only
  1357  	// used once, this time we don't assert the piece indices are unique as this
  1358  	// makes it possible to overdrive on the same piece
  1359  	for _, sw := range sdw {
  1360  		w := workers[sw.originalIndex]
  1361  		_, added := added[w.identifier()]
  1362  		if added {
  1363  			continue
  1364  		}
  1365  
  1366  		for _, pieceIndex := range w.pieces(pdc) {
  1367  			if pdc.piecesInfo[pieceIndex].downloaded {
  1368  				continue
  1369  			}
  1370  
  1371  			addWorker(w, pieceIndex)
  1372  			break // only use a worker once
  1373  		}
  1374  	}
  1375  
  1376  	return mostLikely, lessLikely
  1377  }
  1378  
  1379  // bucketIndexRange is a small helper function that returns the bucket index
  1380  // range we want to loop over after finding the first bucket index approximation
  1381  func bucketIndexRange(bI int) (int, int) {
  1382  	var bIMin int
  1383  	if bI-bucketIndexScanStep >= 0 {
  1384  		bIMin = bI - bucketIndexScanStep
  1385  	}
  1386  
  1387  	bIMax := skymodules.DistributionTrackerTotalBuckets - 1
  1388  	if bI+bucketIndexScanStep <= skymodules.DistributionTrackerTotalBuckets {
  1389  		bIMax = bI + bucketIndexScanStep
  1390  	}
  1391  
  1392  	return bIMin, bIMax
  1393  }
  1394  
  1395  // checkGougingAndUpdateCache checks if a worker is pcws gouging and updates the
  1396  // cache with the result.
  1397  func (c *pcwsGougingCache) checkGougingAndUpdateCache(hpks string, pt modules.RPCPriceTable, allowance skymodules.Allowance, numWorkers, numRoots int) error {
  1398  	err := gouging.CheckPCWS(allowance, pt, numWorkers, numRoots)
  1399  
  1400  	results, exist := staticPCWSGougingCache.staticCache[hpks]
  1401  	if !exist {
  1402  		results = make(map[int]pcwsGougingResult)
  1403  		staticPCWSGougingCache.staticCache[hpks] = results
  1404  	}
  1405  	results[numRoots] = pcwsGougingResult{
  1406  		staticAllowance:  allowance,
  1407  		staticNumWorkers: numWorkers,
  1408  		staticPTID:       pt.UID,
  1409  		staticIsGouging:  err,
  1410  	}
  1411  	return err
  1412  }
  1413  
  1414  // IsGouging performs the checkPCWSGouging check but will return a cached result
  1415  // if possible.
  1416  func (c *pcwsGougingCache) IsGouging(hpks string, pt modules.RPCPriceTable, allowance skymodules.Allowance, numWorkers, numRoots int) error {
  1417  	c.mu.Lock()
  1418  	defer c.mu.Unlock()
  1419  
  1420  	results, exist := staticPCWSGougingCache.staticCache[hpks]
  1421  	if !exist {
  1422  		return c.checkGougingAndUpdateCache(hpks, pt, allowance, numWorkers, numRoots)
  1423  	}
  1424  	result, exist := results[numRoots]
  1425  	if !exist {
  1426  		return c.checkGougingAndUpdateCache(hpks, pt, allowance, numWorkers, numRoots)
  1427  	}
  1428  	if pt.UID != result.staticPTID {
  1429  		return c.checkGougingAndUpdateCache(hpks, pt, allowance, numWorkers, numRoots)
  1430  	}
  1431  	if numWorkers != result.staticNumWorkers {
  1432  		return c.checkGougingAndUpdateCache(hpks, pt, allowance, numWorkers, numRoots)
  1433  	}
  1434  	if !reflect.DeepEqual(allowance, result.staticAllowance) {
  1435  		return c.checkGougingAndUpdateCache(hpks, pt, allowance, numWorkers, numRoots)
  1436  	}
  1437  	return result.staticIsGouging
  1438  }
  1439  
  1440  // IsGouging performs the checkProjetDownloadGouging check but will return a
  1441  // cached result if possible.
  1442  func (c *pdcGougingCache) IsGouging(hpks string, pt modules.RPCPriceTable, allowance skymodules.Allowance) error {
  1443  	c.mu.Lock()
  1444  	defer c.mu.Unlock()
  1445  
  1446  	cachedResult := c.staticCache[hpks]
  1447  	if cachedResult.staticPTID == pt.UID && reflect.DeepEqual(cachedResult.staticAllowance, allowance) {
  1448  		return cachedResult.staticIsGouging
  1449  	}
  1450  
  1451  	err := gouging.CheckProjectDownload(allowance, pt)
  1452  
  1453  	c.staticCache[hpks] = pdcGougingResult{
  1454  		staticAllowance: allowance,
  1455  		staticPTID:      pt.UID,
  1456  		staticIsGouging: err,
  1457  	}
  1458  	return err
  1459  }
  1460  
  1461  // PruneWorker removes the cached results for a given worker.
  1462  func (c *pcwsGougingCache) PruneWorker(hpks string) {
  1463  	c.mu.Lock()
  1464  	delete(c.staticCache, hpks)
  1465  	c.mu.Unlock()
  1466  }
  1467  
  1468  // PruneWorker removes the cached results for a given worker.
  1469  func (c *pdcGougingCache) PruneWorker(hpks string) {
  1470  	c.mu.Lock()
  1471  	delete(c.staticCache, hpks)
  1472  	c.mu.Unlock()
  1473  }
  1474  
  1475  var staticDownloadGougingCache = &pdcGougingCache{
  1476  	staticCache: make(map[string]pdcGougingResult),
  1477  }
  1478  
  1479  var staticPCWSGougingCache = &pcwsGougingCache{
  1480  	staticCache: make(map[string]map[int]pcwsGougingResult),
  1481  }
  1482  
  1483  // isGoodForDownload is a helper function that returns true if and only if the
  1484  // worker meets a certain set of criteria that make it useful for downloads.
  1485  // It's only useful if it is not on any type of cooldown, if it's async ready
  1486  // and if it's not price gouging.
  1487  func isGoodForDownload(w *worker, pieces []uint64) bool {
  1488  	// workers that can't download any pieces are ignored
  1489  	if len(pieces) == 0 {
  1490  		return false
  1491  	}
  1492  
  1493  	// workers on cooldown or that are non async ready are not useful
  1494  	if w.managedOnMaintenanceCooldown() || !w.managedAsyncReady() {
  1495  		return false
  1496  	}
  1497  
  1498  	// workers that are price gouging are not useful
  1499  	pt := w.staticPriceTable().staticPriceTable
  1500  	allowance := w.staticCache().staticRenterAllowance
  1501  
  1502  	// Check cache.
  1503  	err := staticDownloadGougingCache.IsGouging(w.staticHostPubKeyStr, pt, allowance)
  1504  	return err == nil
  1505  }
  1506  
  1507  // partitionWorkers partitions a slice of workers in-place.
  1508  func partitionWorkers(iws []*individualWorker, isLeft func(i int) bool) (left, right []*individualWorker) {
  1509  	i := 0
  1510  	j := len(iws) - 1
  1511  
  1512  	for i <= j {
  1513  		if !isLeft(i) {
  1514  			iws[i], iws[j] = iws[j], iws[i]
  1515  			j--
  1516  			continue
  1517  		} else {
  1518  			i++
  1519  		}
  1520  	}
  1521  	return iws[:i], iws[i:]
  1522  }
  1523  
  1524  // splitResolvedUnresolved is a helper function that splits the given workers
  1525  // into resolved and unresolved worker arrays. Note that if the worker is on a
  1526  // cooldown we exclude it from the returned workers list.
  1527  func splitResolvedUnresolved(workers []*individualWorker) ([]*individualWorker, []*individualWorker) {
  1528  	// filter out the workers on cooldown first.
  1529  	notOnCooldown, _ := partitionWorkers(workers, func(i int) bool {
  1530  		return !workers[i].isOnCooldown()
  1531  	})
  1532  	resolvedWorkers, unresolvedWorkers := partitionWorkers(notOnCooldown, func(i int) bool {
  1533  		return workers[i].isResolved()
  1534  	})
  1535  	return resolvedWorkers, unresolvedWorkers
  1536  }