gitlab.com/SkynetLabs/skyd@v1.6.9/skymodules/renter/registry.go (about)

     1  package renter
     2  
     3  import (
     4  	"context"
     5  	"encoding/hex"
     6  	"fmt"
     7  	"sort"
     8  	"time"
     9  
    10  	"github.com/opentracing/opentracing-go"
    11  	"gitlab.com/NebulousLabs/errors"
    12  	"gitlab.com/SkynetLabs/skyd/build"
    13  	"gitlab.com/SkynetLabs/skyd/skymodules"
    14  	"gitlab.com/SkynetLabs/skyd/skymodules/gouging"
    15  	"go.sia.tech/siad/crypto"
    16  	"go.sia.tech/siad/modules"
    17  	"go.sia.tech/siad/types"
    18  )
    19  
    20  var (
    21  	// MaxRegistryReadTimeout is the default timeout used when reading from
    22  	// the registry.
    23  	MaxRegistryReadTimeout = build.Select(build.Var{
    24  		Dev:      30 * time.Second,
    25  		Standard: 5 * time.Minute,
    26  		Testing:  30 * time.Second,
    27  	}).(time.Duration)
    28  
    29  	// DefaultRegistryHealthTimeout is the default timeout used when
    30  	// requesting a registry entry's health.
    31  	DefaultRegistryHealthTimeout = build.Select(build.Var{
    32  		Dev:      30 * time.Second,
    33  		Standard: 30 * time.Second,
    34  		Testing:  10 * time.Second,
    35  	}).(time.Duration)
    36  
    37  	// DefaultRegistryUpdateTimeout is the default timeout used when updating
    38  	// the registry.
    39  	DefaultRegistryUpdateTimeout = build.Select(build.Var{
    40  		Dev:      30 * time.Second,
    41  		Standard: 5 * time.Minute,
    42  		Testing:  3 * time.Second,
    43  	}).(time.Duration)
    44  
    45  	// ErrRegistryEntryNotFound is returned if all workers were unable to fetch
    46  	// the entry.
    47  	ErrRegistryEntryNotFound = errors.New("registry entry not found")
    48  
    49  	// ErrRegistryLookupTimeout is similar to ErrRegistryEntryNotFound but it is
    50  	// returned instead if the lookup timed out before all workers returned.
    51  	ErrRegistryLookupTimeout = errors.New("registry entry not found within given time")
    52  
    53  	// ErrRegistryUpdateInsufficientRedundancy is returned if updating the
    54  	// registry failed due to running out of workers before reaching
    55  	// MinUpdateRegistrySuccess successful updates.
    56  	ErrRegistryUpdateInsufficientRedundancy = errors.New("registry update failed due reach sufficient redundancy")
    57  
    58  	// ErrRegistryUpdateNoSuccessfulUpdates is returned if not a single update
    59  	// was successful.
    60  	ErrRegistryUpdateNoSuccessfulUpdates = errors.New("all registry updates failed")
    61  
    62  	// ErrRegistryUpdateTimeout is returned when updating the registry was
    63  	// aborted before reaching MinUpdateRegistrySuccesses.
    64  	ErrRegistryUpdateTimeout = errors.New("registry update timed out before reaching the minimum amount of updated hosts")
    65  
    66  	// MinUpdateRegistrySuccesses is the minimum amount of success responses we
    67  	// require from UpdateRegistry to be valid.
    68  	MinUpdateRegistrySuccesses = build.Select(build.Var{
    69  		Dev:      3,
    70  		Standard: 5,
    71  		Testing:  3,
    72  	}).(int)
    73  
    74  	// RegistryEntryRepairThreshold is the minimum amount of success
    75  	// responses we require from a registry repair.
    76  	RegistryEntryRepairThreshold = build.Select(build.Var{
    77  		Dev:      10,
    78  		Standard: 20,
    79  		Testing:  4,
    80  	}).(int)
    81  
    82  	// ReadRegistryBackgroundTimeout is the amount of time a read registry job
    83  	// can stay active in the background before being cancelled.
    84  	ReadRegistryBackgroundTimeout = build.Select(build.Var{
    85  		Dev:      time.Minute,
    86  		Standard: 2 * time.Minute,
    87  		Testing:  5 * time.Second,
    88  	}).(time.Duration)
    89  
    90  	// updateRegistryMemory is the amount of registry that UpdateRegistry will
    91  	// request from the memory manager.
    92  	updateRegistryMemory = uint64(20 * (1 << 10)) // 20kib
    93  
    94  	// readRegistryMemory is the amount of registry that ReadRegistry will
    95  	// request from the memory manager.
    96  	readRegistryMemory = uint64(20 * (1 << 10)) // 20kib
    97  
    98  	// updateRegistryBackgroundTimeout is the time an update registry job on a
    99  	// worker stays active in the background after managedUpdateRegistry returns
   100  	// successfully.
   101  	updateRegistryBackgroundTimeout = time.Minute
   102  
   103  	// readRegistrySeed is the first duration added to the registry stats after
   104  	// creating it.
   105  	// NOTE: This needs to be <= readRegistryBackgroundTimeout
   106  	readRegistryStatsSeed = build.Select(build.Var{
   107  		Dev:      30 * time.Second,
   108  		Standard: 2 * time.Second,
   109  		Testing:  5 * time.Second,
   110  	}).(time.Duration)
   111  
   112  	// minAwaitedCutoffWorkerPercentage is the percentage of cutoff workers
   113  	// we wait for before cutting off a registry entry lookup.
   114  	minAwaitedCutoffWorkersPercentage = 0.8 // 80%
   115  
   116  	// minCutoffWorkers is the lower limit of workers we wait for when
   117  	// looking up a registry entry.
   118  	minCutoffWorkers = 10
   119  )
   120  
   121  // readResponseSet is a helper type which allows for returning a set of ongoing
   122  // ReadRegistry responses.
   123  type readResponseSet struct {
   124  	c    <-chan *jobReadRegistryResponse
   125  	left int
   126  
   127  	readResps []*jobReadRegistryResponse
   128  }
   129  
   130  // newReadResponseSet creates a new set from a response chan and number of
   131  // workers which are expected to write to that chan.
   132  func newReadResponseSet(responseChan <-chan *jobReadRegistryResponse, numWorkers int) *readResponseSet {
   133  	return &readResponseSet{
   134  		c:         responseChan,
   135  		left:      numWorkers,
   136  		readResps: make([]*jobReadRegistryResponse, 0, numWorkers),
   137  	}
   138  }
   139  
   140  // collect will collect all responses. It will block until it has received all
   141  // of them or until the provided context is closed.
   142  func (rrs *readResponseSet) collect(ctx context.Context) []*jobReadRegistryResponse {
   143  	for rrs.responsesLeft() > 0 {
   144  		resp := rrs.next(ctx)
   145  		if resp == nil {
   146  			break
   147  		}
   148  	}
   149  	return rrs.readResps
   150  }
   151  
   152  // next returns the next available response. It will block until the response is
   153  // received or the provided context is closed.
   154  func (rrs *readResponseSet) next(ctx context.Context) *jobReadRegistryResponse {
   155  	select {
   156  	case <-ctx.Done():
   157  		return nil
   158  	case resp := <-rrs.c:
   159  		rrs.readResps = append(rrs.readResps, resp)
   160  		rrs.left--
   161  		return resp
   162  	}
   163  }
   164  
   165  // responsesLeft returns the number of responses that can still be fetched with
   166  // Next.
   167  func (rrs *readResponseSet) responsesLeft() int {
   168  	return rrs.left
   169  }
   170  
   171  // RegistryEntryHealth returns the health of a registry entry specified by the
   172  // spk and tweak.
   173  func (r *Renter) RegistryEntryHealth(ctx context.Context, spk types.SiaPublicKey, tweak crypto.Hash) (skymodules.RegistryEntryHealth, error) {
   174  	if err := r.tg.Add(); err != nil {
   175  		return skymodules.RegistryEntryHealth{}, err
   176  	}
   177  	defer r.tg.Done()
   178  	return r.managedRegistryEntryHealth(ctx, modules.DeriveRegistryEntryID(spk, tweak), &spk, &tweak)
   179  }
   180  
   181  // RegistryEntryHealthRID returns the health of a registry entry specified by
   182  // the RID.
   183  func (r *Renter) RegistryEntryHealthRID(ctx context.Context, rid modules.RegistryEntryID) (skymodules.RegistryEntryHealth, error) {
   184  	if err := r.tg.Add(); err != nil {
   185  		return skymodules.RegistryEntryHealth{}, err
   186  	}
   187  	defer r.tg.Done()
   188  	return r.managedRegistryEntryHealth(ctx, rid, nil, nil)
   189  }
   190  
   191  // ReadRegistry starts a registry lookup on all available workers. The jobs have
   192  // until ctx is closed to return a response. Otherwise the response with the
   193  // highest revision number will be used.
   194  func (r *Renter) ReadRegistry(ctx context.Context, spk types.SiaPublicKey, tweak crypto.Hash) (skymodules.RegistryEntry, error) {
   195  	start := time.Now()
   196  	srv, err := r.managedReadRegistry(ctx, modules.DeriveRegistryEntryID(spk, tweak), &spk, &tweak, false)
   197  	if errors.Contains(err, ErrRegistryLookupTimeout) {
   198  		err = errors.AddContext(err, fmt.Sprintf("timed out after %vs", time.Since(start).Seconds()))
   199  	}
   200  	return srv, err
   201  }
   202  
   203  // ReadRegistryRID starts a registry lookup on all available workers. The jobs
   204  // have until ctx is closed to return a response. Otherwise the response with
   205  // the highest revision number will be used.
   206  func (r *Renter) ReadRegistryRID(ctx context.Context, rid modules.RegistryEntryID) (skymodules.RegistryEntry, error) {
   207  	start := time.Now()
   208  	srv, err := r.managedReadRegistry(ctx, rid, nil, nil, false)
   209  	if errors.Contains(err, ErrRegistryLookupTimeout) {
   210  		err = errors.AddContext(err, fmt.Sprintf("timed out after %vs", time.Since(start).Seconds()))
   211  	}
   212  	return srv, err
   213  }
   214  
   215  // UpdateRegistry updates the registries on all workers with the given
   216  // registry value.
   217  func (r *Renter) UpdateRegistry(ctx context.Context, spk types.SiaPublicKey, srv modules.SignedRegistryValue) error {
   218  	// Block until there is memory available, and then ensure the memory gets
   219  	// returned.
   220  	// Since registry entries are very small we use a fairly generous multiple.
   221  	if !r.staticRegistryMemoryManager.Request(ctx, updateRegistryMemory, memoryPriorityHigh) {
   222  		return errors.New("timeout while waiting in job queue - server is busy")
   223  	}
   224  	defer r.staticRegistryMemoryManager.Return(updateRegistryMemory)
   225  
   226  	// Start the UpdateRegistry jobs.
   227  	return r.managedUpdateRegistry(ctx, spk, srv)
   228  }
   229  
   230  // UpdateRegistryMulti updates the registries on the given workers with the
   231  // corresponding registry values.
   232  func (r *Renter) UpdateRegistryMulti(ctx context.Context, srvs map[string]skymodules.RegistryEntry) error {
   233  	// Block until there is memory available, and then ensure the memory gets
   234  	// returned.
   235  	// Since registry entries are very small we use a fairly generous multiple.
   236  	if !r.staticRegistryMemoryManager.Request(ctx, updateRegistryMemory, memoryPriorityHigh) {
   237  		return errors.New("timeout while waiting in job queue - server is busy")
   238  	}
   239  	defer r.staticRegistryMemoryManager.Return(updateRegistryMemory)
   240  
   241  	// Start the UpdateRegistry jobs.
   242  	workers := r.staticWorkerPool.callWorkers()
   243  	return r.managedUpdateRegistryMulti(ctx, workers, srvs, MinUpdateRegistrySuccesses)
   244  }
   245  
   246  // managedRegistryEntryHealth reads an entry from all hosts on the network until
   247  // ctx is closed. It will then find out the best entry and count how many times
   248  // that entry was found on the network.
   249  func (r *Renter) managedRegistryEntryHealth(ctx context.Context, rid modules.RegistryEntryID, spk *types.SiaPublicKey, tweak *crypto.Hash) (skymodules.RegistryEntryHealth, error) {
   250  	// Start tracing.
   251  	tracer := opentracing.GlobalTracer()
   252  	span := tracer.StartSpan("managedRegistryEntryHealth")
   253  	defer span.Finish()
   254  
   255  	// Log some info about this trace.
   256  	span.LogKV("RID", hex.EncodeToString(rid[:]))
   257  	if spk != nil && tweak != nil {
   258  		span.LogKV("SPK", spk.String())
   259  		span.LogKV("Tweak", tweak.String())
   260  	}
   261  
   262  	// Block until there is memory available, and then ensure the memory gets
   263  	// returned.
   264  	// Since registry entries are very small we use a fairly generous multiple.
   265  	if !r.staticRegistryMemoryManager.Request(ctx, readRegistryMemory, memoryPriorityHigh) {
   266  		return skymodules.RegistryEntryHealth{}, errors.New("timeout while waiting in job queue - server is busy")
   267  	}
   268  	defer r.staticRegistryMemoryManager.Return(readRegistryMemory)
   269  
   270  	// Specify a context for the background jobs. It will be closed as soon as
   271  	// threadedHandleRegistryRepairs is done.
   272  	backgroundCtx, backgroundCancel := context.WithCancel(r.tg.StopCtx())
   273  	defer backgroundCancel()
   274  	responseSet, launchedWorkers := r.managedLaunchReadRegistryWorkers(backgroundCtx, span, rid, spk, tweak)
   275  
   276  	// If there are no workers remaining, fail early.
   277  	if responseSet.left == 0 {
   278  		return skymodules.RegistryEntryHealth{}, errors.AddContext(skymodules.ErrNotEnoughWorkersInWorkerPool, "cannot perform ReadRegistry")
   279  	}
   280  
   281  	// Collect as many responses as possible before the ctx is closed.
   282  	var best *jobReadRegistryResponse
   283  	resps := responseSet.collect(ctx)
   284  	for _, resp := range resps {
   285  		if resp.staticErr != nil {
   286  			continue
   287  		}
   288  		if isBetter, _ := isBetterReadRegistryResponse(best, resp); isBetter {
   289  			best = resp
   290  		}
   291  	}
   292  
   293  	// If no entry was found return all 0s.
   294  	if best == nil || best.staticSignedRegistryValue == nil {
   295  		return skymodules.RegistryEntryHealth{}, nil
   296  	}
   297  	bestSRV := best.staticSignedRegistryValue
   298  
   299  	// Get the cutoff workers and wait for 80% of them to finish.
   300  	workersToWaitFor := regReadCutoffWorkers(launchedWorkers, minCutoffWorkers)
   301  	awaitedWorkers := 0
   302  	cutoff := int(float64(len(workersToWaitFor)) * minAwaitedCutoffWorkersPercentage)
   303  	if cutoff == 0 {
   304  		cutoff = len(workersToWaitFor)
   305  	}
   306  	if r.staticDeps.Disrupt("DelayRegistryHealthResponses") {
   307  		cutoff = 0 // all workers will be conidered to come after the cutoff
   308  	}
   309  
   310  	// Count the number of responses that match the best one. We do so by
   311  	// asking for the reason why the individual entries can't update the
   312  	// best one. If ErrSameRevNum is returned, the entries are equal.
   313  	var nTotal, nBestTotal, nBestTotalBeforeCutoff, nPrimary uint64
   314  	for _, resp := range resps {
   315  		// Check if response arrived before cutoff.
   316  		beforeCutoff := awaitedWorkers < cutoff
   317  		// Check if the response comes from one of the workers we wait
   318  		// for.
   319  		_, exists := workersToWaitFor[resp.staticWorker.staticHostPubKeyStr]
   320  		if exists {
   321  			awaitedWorkers++
   322  		}
   323  		if resp.staticSignedRegistryValue == nil {
   324  			// Ignore responses without value.
   325  			continue
   326  		}
   327  		nTotal++
   328  		// We call ShouldUpdateWith without pubkey here because we don't
   329  		// want to prefer primary entries here. We will explicitly check
   330  		// for them afterwards.
   331  		update, reason := bestSRV.ShouldUpdateWith(&resp.staticSignedRegistryValue.RegistryValue, types.SiaPublicKey{})
   332  		if update {
   333  			nPrimary++
   334  		}
   335  		if update || errors.Contains(reason, modules.ErrSameRevNum) {
   336  			nBestTotal++
   337  			// Check if it is a primary entry.
   338  			if resp.staticSignedRegistryValue.IsPrimaryEntry(resp.staticWorker.staticHostPubKey) {
   339  				nPrimary++
   340  			}
   341  			// Check if we have waited for enough workers.
   342  			if beforeCutoff {
   343  				nBestTotalBeforeCutoff++
   344  			}
   345  		}
   346  	}
   347  	return skymodules.RegistryEntryHealth{
   348  		RevisionNumber:             bestSRV.Revision,
   349  		NumEntries:                 nTotal,
   350  		NumBestEntries:             nBestTotal,
   351  		NumBestEntriesBeforeCutoff: nBestTotalBeforeCutoff,
   352  		NumBestPrimaryEntries:      nPrimary,
   353  	}, nil
   354  }
   355  
   356  // managedReadRegistry starts a registry lookup on all available workers. The
   357  // jobs have 'timeout' amount of time to finish their jobs and return a
   358  // response. Otherwise the response with the highest revision number will be
   359  // used. If ignoreCutoff is specified, the read won't be aborted early after a certain number of hosts returned but only after the timeout is reached.
   360  func (r *Renter) managedReadRegistry(ctx context.Context, rid modules.RegistryEntryID, spk *types.SiaPublicKey, tweak *crypto.Hash, ignoreCutoff bool) (skymodules.RegistryEntry, error) {
   361  	// Start tracing.
   362  	tracer := opentracing.GlobalTracer()
   363  	span := tracer.StartSpan("managedReadRegistry")
   364  	defer span.Finish()
   365  
   366  	// Check if we are subscribed to the entry first.
   367  	subscribedRV, ok := r.staticSubscriptionManager.Get(rid)
   368  	span.SetTag("cached", ok)
   369  	if ok && subscribedRV != nil {
   370  		if subscribedRV.Type == modules.RegistryTypeInvalid {
   371  			return skymodules.RegistryEntry{}, ErrRegistryEntryNotFound
   372  		}
   373  		return *subscribedRV, nil
   374  	}
   375  	if r.staticDeps.Disrupt("ReadRegistryCacheOnly") {
   376  		return skymodules.RegistryEntry{}, errors.New("ReadRegistryCacheOnly")
   377  	}
   378  
   379  	// Measure the time it takes to fetch the entry.
   380  	startTime := time.Now()
   381  	defer func() {
   382  		r.staticRegistryReadStats.AddDataPoint(time.Since(startTime))
   383  	}()
   384  
   385  	// Log some info about this trace.
   386  	span.LogKV("RID", hex.EncodeToString(rid[:]))
   387  	if spk != nil && tweak != nil {
   388  		span.LogKV("SPK", spk.String())
   389  		span.LogKV("Tweak", tweak.String())
   390  	}
   391  
   392  	// Block until there is memory available, and then ensure the memory gets
   393  	// returned.
   394  	// Since registry entries are very small we use a fairly generous multiple.
   395  	if !r.staticRegistryMemoryManager.Request(ctx, readRegistryMemory, memoryPriorityHigh) {
   396  		return skymodules.RegistryEntry{}, errors.New("timeout while waiting in job queue - server is busy")
   397  	}
   398  	defer r.staticRegistryMemoryManager.Return(readRegistryMemory)
   399  
   400  	// Specify a context for the background jobs. It will be closed as soon as
   401  	// threadedHandleRegistryRepairs is done.
   402  	backgroundCtx, backgroundCancel := context.WithCancel(r.tg.StopCtx())
   403  
   404  	responseSet, launchedWorkers := r.managedLaunchReadRegistryWorkers(backgroundCtx, span, rid, spk, tweak)
   405  	numWorkers := len(launchedWorkers)
   406  
   407  	// If there are no workers remaining, fail early.
   408  	if numWorkers == 0 {
   409  		backgroundCancel()
   410  		return skymodules.RegistryEntry{}, errors.AddContext(skymodules.ErrNotEnoughWorkersInWorkerPool, "cannot perform ReadRegistry")
   411  	}
   412  
   413  	defer func() {
   414  		_ = r.tg.Launch(func() {
   415  			defer backgroundCancel()
   416  
   417  			// Handle registry repairs.
   418  			r.threadedHandleRegistryRepairs(r.tg.StopCtx(), span, responseSet)
   419  		})
   420  	}()
   421  
   422  	// Get the cutoff workers and wait for 80% of them to finish.
   423  	workersToWaitFor := regReadCutoffWorkers(launchedWorkers, minCutoffWorkers)
   424  	awaitedWorkers := 0
   425  	cutoff := int(float64(len(workersToWaitFor)) * minAwaitedCutoffWorkersPercentage)
   426  	if cutoff == 0 {
   427  		cutoff = len(workersToWaitFor)
   428  	}
   429  
   430  	// Prevent reaching the cutoff point when ReadRegistryBlocking is
   431  	// injected as a dependency.
   432  	if r.staticDeps.Disrupt("ReadRegistryBlocking") {
   433  		awaitedWorkers = -1
   434  	}
   435  
   436  	var best *jobReadRegistryResponse
   437  	responses := 0
   438  	// Wait for responses until either there are no responses left or until
   439  	// we have waited for enough of our workersToWaitFor.
   440  	for responseSet.responsesLeft() > 0 {
   441  		// Check cancel condition and block for more responses.
   442  		resp := responseSet.next(ctx)
   443  		if resp == nil {
   444  			break // context triggered
   445  		}
   446  
   447  		// Check if we have waited for enough workers.
   448  		if !ignoreCutoff && awaitedWorkers >= cutoff {
   449  			break // done
   450  		}
   451  
   452  		// Check if the response comes from one of the workers we wait
   453  		// for.
   454  		_, exists := workersToWaitFor[resp.staticWorker.staticHostPubKeyStr]
   455  		if exists {
   456  			awaitedWorkers++
   457  		}
   458  
   459  		// Increment responses.
   460  		responses++
   461  
   462  		// Ignore error responses and responses that returned no entry.
   463  		if resp.staticErr != nil || resp.staticSignedRegistryValue == nil {
   464  			continue
   465  		}
   466  
   467  		// Remember the best response.
   468  		if isBetter, _ := isBetterReadRegistryResponse(best, resp); isBetter {
   469  			best = resp
   470  		}
   471  	}
   472  
   473  	// If we don't have a successful response and also not a response for every
   474  	// worker, we timed out.
   475  	noResponse := best == nil || best.staticSignedRegistryValue == nil
   476  	if noResponse && responses < numWorkers {
   477  		return skymodules.RegistryEntry{}, ErrRegistryLookupTimeout
   478  	}
   479  
   480  	// If we don't have a successful response but received a response from every
   481  	// worker, we were unable to look up the entry.
   482  	if noResponse {
   483  		return skymodules.RegistryEntry{}, ErrRegistryEntryNotFound
   484  	}
   485  	return *best.staticSignedRegistryValue, nil
   486  }
   487  
   488  // managedLaunchReadRegistryWorkers launches read registry jobs on all available
   489  // workers and returns a read response set which can be used to wait for the
   490  // workers' responses.
   491  func (r *Renter) managedLaunchReadRegistryWorkers(ctx context.Context, span opentracing.Span, rid modules.RegistryEntryID, spk *types.SiaPublicKey, tweak *crypto.Hash) (*readResponseSet, []*worker) {
   492  	// Get the full list of workers and create a channel to receive all of the
   493  	// results from the workers. The channel is buffered with one slot per
   494  	// worker, so that the workers do not have to block when returning the
   495  	// result of the job, even if this thread is not listening.
   496  	workers := r.staticWorkerPool.callWorkers()
   497  	staticResponseChan := make(chan *jobReadRegistryResponse, len(workers))
   498  
   499  	// Filter out hosts that don't support the registry.
   500  	numRegistryWorkers := 0
   501  	for _, worker := range workers {
   502  		cache := worker.staticCache()
   503  		if build.VersionCmp(cache.staticHostVersion, minRegistryVersion) < 0 {
   504  			continue
   505  		}
   506  
   507  		// check for price gouging
   508  		//
   509  		// TODO: use 'checkProjectDownloadGouging' gouging for some basic
   510  		// protection. Should be replaced as part of the gouging overhaul.
   511  		pt := worker.staticPriceTable().staticPriceTable
   512  		err := gouging.CheckProjectDownload(cache.staticRenterAllowance, pt)
   513  		if err != nil {
   514  			r.staticLog.Debugf("price gouging detected in worker %v, err: %v\n", worker.staticHostPubKeyStr, err)
   515  			continue
   516  		}
   517  
   518  		jrr := worker.newJobReadRegistryEID(ctx, span, staticResponseChan, rid, spk, tweak)
   519  		if !worker.staticJobReadRegistryQueue.callAdd(jrr) {
   520  			// This will filter out any workers that are on cooldown or
   521  			// otherwise can't participate in the project.
   522  			continue
   523  		}
   524  		workers[numRegistryWorkers] = worker
   525  		numRegistryWorkers++
   526  	}
   527  	workers = workers[:numRegistryWorkers]
   528  
   529  	// If specified, increment numWorkers. This will cause the loop to never
   530  	// exit without any of the context being closed since the response set won't
   531  	// be able to read the last response.
   532  	if r.staticDeps.Disrupt("ReadRegistryBlocking") {
   533  		numRegistryWorkers++
   534  	}
   535  
   536  	return newReadResponseSet(staticResponseChan, numRegistryWorkers), workers
   537  }
   538  
   539  // managedUpdateRegistry updates the registries on all workers with the given
   540  // registry value.
   541  // NOTE: the input ctx only unblocks the call if it fails to hit the threshold
   542  // before the timeout. It doesn't stop the update jobs. That's because we want
   543  // to always make sure we update as many hosts as possible.
   544  func (r *Renter) managedUpdateRegistry(ctx context.Context, spk types.SiaPublicKey, srv modules.SignedRegistryValue) (err error) {
   545  	workers := r.staticWorkerPool.callWorkers()
   546  	srvs := make(map[string]skymodules.RegistryEntry, len(workers))
   547  	for _, w := range workers {
   548  		srvs[w.staticHostPubKeyStr] = skymodules.NewRegistryEntry(spk, srv)
   549  	}
   550  	return r.managedUpdateRegistryMulti(ctx, workers, srvs, MinUpdateRegistrySuccesses)
   551  }
   552  
   553  // managedUpdateRegistry updates the registries on all workers with the given
   554  // registry value.
   555  // NOTE: the input ctx only unblocks the call if it fails to hit the threshold
   556  // before the timeout. It doesn't stop the update jobs. That's because we want
   557  // to always make sure we update as many hosts as possible.
   558  func (r *Renter) managedUpdateRegistryMulti(ctx context.Context, workers []*worker, srvs map[string]skymodules.RegistryEntry, minUpdates int) (err error) {
   559  	// Start tracing.
   560  	start := time.Now()
   561  	tracer := opentracing.GlobalTracer()
   562  	span := tracer.StartSpan("managedUpdateRegistryMulti")
   563  	defer span.Finish()
   564  
   565  	// Check how many updates we expect at the very least.
   566  	if minUpdates > len(srvs) {
   567  		minUpdates = len(srvs)
   568  	}
   569  
   570  	// Verify the signatures before updating the hosts.
   571  	for _, srv := range srvs {
   572  		if err := srv.Verify(); err != nil {
   573  			return errors.AddContext(err, "managedUpdateRegistry: failed to verify signature of entry")
   574  		}
   575  	}
   576  	// Create a channel to receive all of the
   577  	// results from the workers. The channel is buffered with one slot per
   578  	// worker, so that the workers do not have to block when returning the
   579  	// result of the job, even if this thread is not listening.
   580  	staticResponseChan := make(chan *jobUpdateRegistryResponse, len(workers))
   581  	span.LogKV("workers", len(workers))
   582  
   583  	// Create a context to continue updating registry values in the background.
   584  	updateTimeoutCtx, updateTimeoutCancel := context.WithTimeout(r.tg.StopCtx(), updateRegistryBackgroundTimeout)
   585  	defer func() {
   586  		if err != nil {
   587  			// If managedUpdateRegistry fails the caller is going to assume that
   588  			// updating the value failed. Don't let any jobs linger in that
   589  			// case.
   590  			updateTimeoutCancel()
   591  		}
   592  	}()
   593  
   594  	// Filter out hosts that don't support the registry.
   595  	numRegistryWorkers := 0
   596  	for _, worker := range workers {
   597  		// Filter out workers that we don't have an srv for.
   598  		srv, exists := srvs[worker.staticHostPubKeyStr]
   599  		if !exists {
   600  			continue
   601  		}
   602  		// Check if worker is good for updating the registry.
   603  		if !isWorkerGoodForRegistryUpdate(worker) {
   604  			continue
   605  		}
   606  
   607  		// Create the job.
   608  		jrr := worker.newJobUpdateRegistry(updateTimeoutCtx, span, staticResponseChan, srv.PubKey, srv.SignedRegistryValue)
   609  		if !worker.staticJobUpdateRegistryQueue.callAdd(jrr) {
   610  			// This will filter out any workers that are on cooldown or
   611  			// otherwise can't participate in the project.
   612  			continue
   613  		}
   614  		workers[numRegistryWorkers] = worker
   615  		numRegistryWorkers++
   616  	}
   617  	workers = workers[:numRegistryWorkers]
   618  	// If there are no workers remaining, fail early.
   619  	if len(workers) < minUpdates {
   620  		return errors.AddContext(skymodules.ErrNotEnoughWorkersInWorkerPool, "cannot perform UpdateRegistry")
   621  	}
   622  
   623  	workersLeft := len(workers)
   624  	responses := 0
   625  	successfulResponses := 0
   626  
   627  	var respErrs error
   628  	for successfulResponses < minUpdates && workersLeft+successfulResponses >= minUpdates {
   629  		// Check deadline.
   630  		var resp *jobUpdateRegistryResponse
   631  		select {
   632  		case <-ctx.Done():
   633  			// Timeout reached.
   634  			return ErrRegistryUpdateTimeout
   635  		case resp = <-staticResponseChan:
   636  		}
   637  
   638  		// Decrement the number of workers.
   639  		workersLeft--
   640  
   641  		// Increment number of responses.
   642  		responses++
   643  
   644  		// Ignore error responses except for invalid revision errors.
   645  		if resp.staticErr != nil {
   646  			// If we receive an error indicating that a better entry exists on
   647  			// the network we immediately return an error. That's because our
   648  			// update won't be able to change the consensus of the network on
   649  			// the latest entry.
   650  			if modules.IsRegistryEntryExistErr(resp.staticErr) {
   651  				return resp.staticErr
   652  			}
   653  			respErrs = errors.Compose(respErrs, resp.staticErr)
   654  			continue
   655  		}
   656  
   657  		// Increment successful responses.
   658  		successfulResponses++
   659  	}
   660  
   661  	// Check if we ran out of workers.
   662  	if successfulResponses == 0 {
   663  		r.staticLog.Print("RegistryUpdate failed with 0 successful responses: ", respErrs)
   664  		return errors.Compose(err, ErrRegistryUpdateNoSuccessfulUpdates)
   665  	}
   666  	if successfulResponses < minUpdates {
   667  		r.staticLog.Printf("RegistryUpdate failed with %v < %v successful responses: %v", successfulResponses, minUpdates, respErrs)
   668  		return errors.Compose(err, ErrRegistryUpdateInsufficientRedundancy)
   669  	}
   670  	r.staticRegWriteStats.AddDataPoint(time.Since(start))
   671  	return nil
   672  }
   673  
   674  // isBetterReadRegistryResponse returns true if resp2 is a better response than
   675  // resp1 and false otherwise. Better means that the response either has a higher
   676  // revision number, more work or was faster.
   677  func isBetterReadRegistryResponse(resp1, resp2 *jobReadRegistryResponse) (bool, bool) {
   678  	// Check for nil response.
   679  	if resp2 == nil {
   680  		// A nil entry never replaces an existing entry.
   681  		return false, resp1 == resp2
   682  	} else if resp1 == nil {
   683  		// A non-nil entry always replaces a nil entry.
   684  		return true, resp1 == resp2
   685  	}
   686  	// Same but with the entries.
   687  	srv1 := resp1.staticSignedRegistryValue
   688  	srv2 := resp2.staticSignedRegistryValue
   689  	if srv2 == nil {
   690  		return false, srv1 == srv2
   691  	} else if srv1 == nil {
   692  		return true, srv1 == srv2
   693  	}
   694  	// Compare entries. We pass the empty key here since we don't care about
   695  	// whether the entry is a primary or secondary one.
   696  	shouldUpdate, updateErr := srv1.ShouldUpdateWith(&srv2.RegistryValue, types.SiaPublicKey{})
   697  
   698  	// If the entry is not capable of updating the existing one and both entries
   699  	// have the same revision number, use the time.
   700  	if !shouldUpdate && errors.Contains(updateErr, modules.ErrSameRevNum) {
   701  		return resp2.staticCompleteTime.Before(resp1.staticCompleteTime), true
   702  	}
   703  
   704  	// Otherwise we return the result
   705  	return shouldUpdate, false
   706  }
   707  
   708  // threadedHandleRegistryRepairs waits for all provided read registry programs
   709  // to finish and updates all workers from responses which either didn't provide
   710  // the highest revision number, or didn't have the entry at all.
   711  func (r *Renter) threadedHandleRegistryRepairs(ctx context.Context, parentSpan opentracing.Span, responseSet *readResponseSet) {
   712  	if err := r.tg.Add(); err != nil {
   713  		return
   714  	}
   715  	defer r.tg.Done()
   716  
   717  	span := opentracing.StartSpan("threadedHandleRegistryRepairs", opentracing.ChildOf(parentSpan.Context()))
   718  	defer span.Finish()
   719  
   720  	// Collect all responses.
   721  	ctx, cancel := context.WithTimeout(ctx, ReadRegistryBackgroundTimeout)
   722  	defer cancel()
   723  	resps := responseSet.collect(ctx)
   724  	if resps == nil {
   725  		return // nothing to do
   726  	}
   727  
   728  	// Find the best response.
   729  	var best *jobReadRegistryResponse
   730  	for _, resp := range resps {
   731  		if better, _ := isBetterReadRegistryResponse(best, resp); better {
   732  			best = resp
   733  		}
   734  	}
   735  
   736  	// If no entry was found we can't do anything.
   737  	if best == nil || best.staticSignedRegistryValue == nil {
   738  		return
   739  	}
   740  	bestSRV := best.staticSignedRegistryValue
   741  
   742  	// Register the update to make sure we don't try again if a value is rapidly
   743  	// polled before this update is done.
   744  	rid := modules.DeriveRegistryEntryID(bestSRV.PubKey, bestSRV.Tweak)
   745  	r.ongoingRegistryRepairsMu.Lock()
   746  	_, exists := r.ongoingRegistryRepairs[rid]
   747  	if !exists {
   748  		r.ongoingRegistryRepairs[rid] = struct{}{}
   749  	}
   750  	r.ongoingRegistryRepairsMu.Unlock()
   751  	if exists {
   752  		return // ongoing update found
   753  	}
   754  
   755  	// Unregister the update once done.
   756  	defer func() {
   757  		r.ongoingRegistryRepairsMu.Lock()
   758  		delete(r.ongoingRegistryRepairs, rid)
   759  		r.ongoingRegistryRepairsMu.Unlock()
   760  	}()
   761  
   762  	// Figure out how many entries with the highest revision are out there.
   763  	upToDateHosts := make(map[string]struct{})
   764  	for _, resp := range resps {
   765  		if resp == nil || resp.staticSignedRegistryValue == nil || resp.staticErr != nil {
   766  			continue
   767  		}
   768  		if resp.staticSignedRegistryValue.Revision != best.staticSignedRegistryValue.Revision {
   769  			continue
   770  		}
   771  		upToDateHosts[resp.staticWorker.staticHostPubKeyStr] = struct{}{}
   772  	}
   773  
   774  	// Check if the entry requires repairing.
   775  	if len(upToDateHosts) >= RegistryEntryRepairThreshold {
   776  		return
   777  	}
   778  
   779  	// Prepare the updates.
   780  	workers := r.staticWorkerPool.callWorkers()
   781  	srvs := make(map[string]skymodules.RegistryEntry, len(workers))
   782  	for _, w := range workers {
   783  		if _, upToDate := upToDateHosts[w.staticHostPubKeyStr]; upToDate {
   784  			continue
   785  		}
   786  		srvs[w.staticHostPubKeyStr] = *best.staticSignedRegistryValue
   787  	}
   788  
   789  	// Update the registry.
   790  	err := r.managedUpdateRegistryMulti(ctx, workers, srvs, RegistryEntryRepairThreshold-len(upToDateHosts))
   791  	if err != nil {
   792  		r.staticLog.Debugln("threadedHandleRegistryRepairs: failed to update registry", err)
   793  	}
   794  }
   795  
   796  // isWorkerGoodForRegistryUpdate is a helper function which returns 'true' if a
   797  // worker can be used for updating the registry.
   798  func isWorkerGoodForRegistryUpdate(worker *worker) bool {
   799  	cache := worker.staticCache()
   800  	if build.VersionCmp(cache.staticHostVersion, minRegistryVersion) < 0 {
   801  		return false
   802  	}
   803  	// Skip !goodForUpload workers.
   804  	if !cache.staticContractUtility.GoodForUpload {
   805  		return false
   806  	}
   807  
   808  	// check for price gouging
   809  	pt := worker.staticPriceTable().staticPriceTable
   810  	err := gouging.CheckUpload(cache.staticRenterAllowance, pt)
   811  	if err != nil {
   812  		return false
   813  	}
   814  	return true
   815  }
   816  
   817  // regReadCutoffWorkers returns the workers to wait for before considering the
   818  // result good enough amongst the provided launched workers.
   819  func regReadCutoffWorkers(workers []*worker, minWorkers int) map[string]*worker {
   820  	// Filter malicious hosts.
   821  	i := 0
   822  	for _, w := range workers {
   823  		if w.staticCache().staticMaliciousHost {
   824  			continue
   825  		}
   826  		workers[i] = w
   827  		i++
   828  	}
   829  	workers = workers[:i]
   830  	// Sort workers by their estimate.
   831  	sort.Slice(workers, func(i, j int) bool {
   832  		return workers[i].ReadRegCutoffEstimate() < workers[j].ReadRegCutoffEstimate()
   833  	})
   834  	// Drop slowest 50% but don't go below the min.
   835  	newLen := len(workers) / 2
   836  	if newLen < minWorkers && minWorkers <= len(workers) {
   837  		newLen = minWorkers
   838  	} else if newLen < minWorkers && minWorkers > len(workers) {
   839  		newLen = len(workers)
   840  	}
   841  	workers = workers[:newLen]
   842  
   843  	// Put remaining ones in map.
   844  	remaining := make(map[string]*worker, len(workers))
   845  	for _, w := range workers {
   846  		remaining[w.staticHostPubKeyStr] = w
   847  	}
   848  	return remaining
   849  }