gitlab.com/SkynetLabs/skyd@v1.6.9/skymodules/renter/registry.go (about) 1 package renter 2 3 import ( 4 "context" 5 "encoding/hex" 6 "fmt" 7 "sort" 8 "time" 9 10 "github.com/opentracing/opentracing-go" 11 "gitlab.com/NebulousLabs/errors" 12 "gitlab.com/SkynetLabs/skyd/build" 13 "gitlab.com/SkynetLabs/skyd/skymodules" 14 "gitlab.com/SkynetLabs/skyd/skymodules/gouging" 15 "go.sia.tech/siad/crypto" 16 "go.sia.tech/siad/modules" 17 "go.sia.tech/siad/types" 18 ) 19 20 var ( 21 // MaxRegistryReadTimeout is the default timeout used when reading from 22 // the registry. 23 MaxRegistryReadTimeout = build.Select(build.Var{ 24 Dev: 30 * time.Second, 25 Standard: 5 * time.Minute, 26 Testing: 30 * time.Second, 27 }).(time.Duration) 28 29 // DefaultRegistryHealthTimeout is the default timeout used when 30 // requesting a registry entry's health. 31 DefaultRegistryHealthTimeout = build.Select(build.Var{ 32 Dev: 30 * time.Second, 33 Standard: 30 * time.Second, 34 Testing: 10 * time.Second, 35 }).(time.Duration) 36 37 // DefaultRegistryUpdateTimeout is the default timeout used when updating 38 // the registry. 39 DefaultRegistryUpdateTimeout = build.Select(build.Var{ 40 Dev: 30 * time.Second, 41 Standard: 5 * time.Minute, 42 Testing: 3 * time.Second, 43 }).(time.Duration) 44 45 // ErrRegistryEntryNotFound is returned if all workers were unable to fetch 46 // the entry. 47 ErrRegistryEntryNotFound = errors.New("registry entry not found") 48 49 // ErrRegistryLookupTimeout is similar to ErrRegistryEntryNotFound but it is 50 // returned instead if the lookup timed out before all workers returned. 51 ErrRegistryLookupTimeout = errors.New("registry entry not found within given time") 52 53 // ErrRegistryUpdateInsufficientRedundancy is returned if updating the 54 // registry failed due to running out of workers before reaching 55 // MinUpdateRegistrySuccess successful updates. 56 ErrRegistryUpdateInsufficientRedundancy = errors.New("registry update failed due reach sufficient redundancy") 57 58 // ErrRegistryUpdateNoSuccessfulUpdates is returned if not a single update 59 // was successful. 60 ErrRegistryUpdateNoSuccessfulUpdates = errors.New("all registry updates failed") 61 62 // ErrRegistryUpdateTimeout is returned when updating the registry was 63 // aborted before reaching MinUpdateRegistrySuccesses. 64 ErrRegistryUpdateTimeout = errors.New("registry update timed out before reaching the minimum amount of updated hosts") 65 66 // MinUpdateRegistrySuccesses is the minimum amount of success responses we 67 // require from UpdateRegistry to be valid. 68 MinUpdateRegistrySuccesses = build.Select(build.Var{ 69 Dev: 3, 70 Standard: 5, 71 Testing: 3, 72 }).(int) 73 74 // RegistryEntryRepairThreshold is the minimum amount of success 75 // responses we require from a registry repair. 76 RegistryEntryRepairThreshold = build.Select(build.Var{ 77 Dev: 10, 78 Standard: 20, 79 Testing: 4, 80 }).(int) 81 82 // ReadRegistryBackgroundTimeout is the amount of time a read registry job 83 // can stay active in the background before being cancelled. 84 ReadRegistryBackgroundTimeout = build.Select(build.Var{ 85 Dev: time.Minute, 86 Standard: 2 * time.Minute, 87 Testing: 5 * time.Second, 88 }).(time.Duration) 89 90 // updateRegistryMemory is the amount of registry that UpdateRegistry will 91 // request from the memory manager. 92 updateRegistryMemory = uint64(20 * (1 << 10)) // 20kib 93 94 // readRegistryMemory is the amount of registry that ReadRegistry will 95 // request from the memory manager. 96 readRegistryMemory = uint64(20 * (1 << 10)) // 20kib 97 98 // updateRegistryBackgroundTimeout is the time an update registry job on a 99 // worker stays active in the background after managedUpdateRegistry returns 100 // successfully. 101 updateRegistryBackgroundTimeout = time.Minute 102 103 // readRegistrySeed is the first duration added to the registry stats after 104 // creating it. 105 // NOTE: This needs to be <= readRegistryBackgroundTimeout 106 readRegistryStatsSeed = build.Select(build.Var{ 107 Dev: 30 * time.Second, 108 Standard: 2 * time.Second, 109 Testing: 5 * time.Second, 110 }).(time.Duration) 111 112 // minAwaitedCutoffWorkerPercentage is the percentage of cutoff workers 113 // we wait for before cutting off a registry entry lookup. 114 minAwaitedCutoffWorkersPercentage = 0.8 // 80% 115 116 // minCutoffWorkers is the lower limit of workers we wait for when 117 // looking up a registry entry. 118 minCutoffWorkers = 10 119 ) 120 121 // readResponseSet is a helper type which allows for returning a set of ongoing 122 // ReadRegistry responses. 123 type readResponseSet struct { 124 c <-chan *jobReadRegistryResponse 125 left int 126 127 readResps []*jobReadRegistryResponse 128 } 129 130 // newReadResponseSet creates a new set from a response chan and number of 131 // workers which are expected to write to that chan. 132 func newReadResponseSet(responseChan <-chan *jobReadRegistryResponse, numWorkers int) *readResponseSet { 133 return &readResponseSet{ 134 c: responseChan, 135 left: numWorkers, 136 readResps: make([]*jobReadRegistryResponse, 0, numWorkers), 137 } 138 } 139 140 // collect will collect all responses. It will block until it has received all 141 // of them or until the provided context is closed. 142 func (rrs *readResponseSet) collect(ctx context.Context) []*jobReadRegistryResponse { 143 for rrs.responsesLeft() > 0 { 144 resp := rrs.next(ctx) 145 if resp == nil { 146 break 147 } 148 } 149 return rrs.readResps 150 } 151 152 // next returns the next available response. It will block until the response is 153 // received or the provided context is closed. 154 func (rrs *readResponseSet) next(ctx context.Context) *jobReadRegistryResponse { 155 select { 156 case <-ctx.Done(): 157 return nil 158 case resp := <-rrs.c: 159 rrs.readResps = append(rrs.readResps, resp) 160 rrs.left-- 161 return resp 162 } 163 } 164 165 // responsesLeft returns the number of responses that can still be fetched with 166 // Next. 167 func (rrs *readResponseSet) responsesLeft() int { 168 return rrs.left 169 } 170 171 // RegistryEntryHealth returns the health of a registry entry specified by the 172 // spk and tweak. 173 func (r *Renter) RegistryEntryHealth(ctx context.Context, spk types.SiaPublicKey, tweak crypto.Hash) (skymodules.RegistryEntryHealth, error) { 174 if err := r.tg.Add(); err != nil { 175 return skymodules.RegistryEntryHealth{}, err 176 } 177 defer r.tg.Done() 178 return r.managedRegistryEntryHealth(ctx, modules.DeriveRegistryEntryID(spk, tweak), &spk, &tweak) 179 } 180 181 // RegistryEntryHealthRID returns the health of a registry entry specified by 182 // the RID. 183 func (r *Renter) RegistryEntryHealthRID(ctx context.Context, rid modules.RegistryEntryID) (skymodules.RegistryEntryHealth, error) { 184 if err := r.tg.Add(); err != nil { 185 return skymodules.RegistryEntryHealth{}, err 186 } 187 defer r.tg.Done() 188 return r.managedRegistryEntryHealth(ctx, rid, nil, nil) 189 } 190 191 // ReadRegistry starts a registry lookup on all available workers. The jobs have 192 // until ctx is closed to return a response. Otherwise the response with the 193 // highest revision number will be used. 194 func (r *Renter) ReadRegistry(ctx context.Context, spk types.SiaPublicKey, tweak crypto.Hash) (skymodules.RegistryEntry, error) { 195 start := time.Now() 196 srv, err := r.managedReadRegistry(ctx, modules.DeriveRegistryEntryID(spk, tweak), &spk, &tweak, false) 197 if errors.Contains(err, ErrRegistryLookupTimeout) { 198 err = errors.AddContext(err, fmt.Sprintf("timed out after %vs", time.Since(start).Seconds())) 199 } 200 return srv, err 201 } 202 203 // ReadRegistryRID starts a registry lookup on all available workers. The jobs 204 // have until ctx is closed to return a response. Otherwise the response with 205 // the highest revision number will be used. 206 func (r *Renter) ReadRegistryRID(ctx context.Context, rid modules.RegistryEntryID) (skymodules.RegistryEntry, error) { 207 start := time.Now() 208 srv, err := r.managedReadRegistry(ctx, rid, nil, nil, false) 209 if errors.Contains(err, ErrRegistryLookupTimeout) { 210 err = errors.AddContext(err, fmt.Sprintf("timed out after %vs", time.Since(start).Seconds())) 211 } 212 return srv, err 213 } 214 215 // UpdateRegistry updates the registries on all workers with the given 216 // registry value. 217 func (r *Renter) UpdateRegistry(ctx context.Context, spk types.SiaPublicKey, srv modules.SignedRegistryValue) error { 218 // Block until there is memory available, and then ensure the memory gets 219 // returned. 220 // Since registry entries are very small we use a fairly generous multiple. 221 if !r.staticRegistryMemoryManager.Request(ctx, updateRegistryMemory, memoryPriorityHigh) { 222 return errors.New("timeout while waiting in job queue - server is busy") 223 } 224 defer r.staticRegistryMemoryManager.Return(updateRegistryMemory) 225 226 // Start the UpdateRegistry jobs. 227 return r.managedUpdateRegistry(ctx, spk, srv) 228 } 229 230 // UpdateRegistryMulti updates the registries on the given workers with the 231 // corresponding registry values. 232 func (r *Renter) UpdateRegistryMulti(ctx context.Context, srvs map[string]skymodules.RegistryEntry) error { 233 // Block until there is memory available, and then ensure the memory gets 234 // returned. 235 // Since registry entries are very small we use a fairly generous multiple. 236 if !r.staticRegistryMemoryManager.Request(ctx, updateRegistryMemory, memoryPriorityHigh) { 237 return errors.New("timeout while waiting in job queue - server is busy") 238 } 239 defer r.staticRegistryMemoryManager.Return(updateRegistryMemory) 240 241 // Start the UpdateRegistry jobs. 242 workers := r.staticWorkerPool.callWorkers() 243 return r.managedUpdateRegistryMulti(ctx, workers, srvs, MinUpdateRegistrySuccesses) 244 } 245 246 // managedRegistryEntryHealth reads an entry from all hosts on the network until 247 // ctx is closed. It will then find out the best entry and count how many times 248 // that entry was found on the network. 249 func (r *Renter) managedRegistryEntryHealth(ctx context.Context, rid modules.RegistryEntryID, spk *types.SiaPublicKey, tweak *crypto.Hash) (skymodules.RegistryEntryHealth, error) { 250 // Start tracing. 251 tracer := opentracing.GlobalTracer() 252 span := tracer.StartSpan("managedRegistryEntryHealth") 253 defer span.Finish() 254 255 // Log some info about this trace. 256 span.LogKV("RID", hex.EncodeToString(rid[:])) 257 if spk != nil && tweak != nil { 258 span.LogKV("SPK", spk.String()) 259 span.LogKV("Tweak", tweak.String()) 260 } 261 262 // Block until there is memory available, and then ensure the memory gets 263 // returned. 264 // Since registry entries are very small we use a fairly generous multiple. 265 if !r.staticRegistryMemoryManager.Request(ctx, readRegistryMemory, memoryPriorityHigh) { 266 return skymodules.RegistryEntryHealth{}, errors.New("timeout while waiting in job queue - server is busy") 267 } 268 defer r.staticRegistryMemoryManager.Return(readRegistryMemory) 269 270 // Specify a context for the background jobs. It will be closed as soon as 271 // threadedHandleRegistryRepairs is done. 272 backgroundCtx, backgroundCancel := context.WithCancel(r.tg.StopCtx()) 273 defer backgroundCancel() 274 responseSet, launchedWorkers := r.managedLaunchReadRegistryWorkers(backgroundCtx, span, rid, spk, tweak) 275 276 // If there are no workers remaining, fail early. 277 if responseSet.left == 0 { 278 return skymodules.RegistryEntryHealth{}, errors.AddContext(skymodules.ErrNotEnoughWorkersInWorkerPool, "cannot perform ReadRegistry") 279 } 280 281 // Collect as many responses as possible before the ctx is closed. 282 var best *jobReadRegistryResponse 283 resps := responseSet.collect(ctx) 284 for _, resp := range resps { 285 if resp.staticErr != nil { 286 continue 287 } 288 if isBetter, _ := isBetterReadRegistryResponse(best, resp); isBetter { 289 best = resp 290 } 291 } 292 293 // If no entry was found return all 0s. 294 if best == nil || best.staticSignedRegistryValue == nil { 295 return skymodules.RegistryEntryHealth{}, nil 296 } 297 bestSRV := best.staticSignedRegistryValue 298 299 // Get the cutoff workers and wait for 80% of them to finish. 300 workersToWaitFor := regReadCutoffWorkers(launchedWorkers, minCutoffWorkers) 301 awaitedWorkers := 0 302 cutoff := int(float64(len(workersToWaitFor)) * minAwaitedCutoffWorkersPercentage) 303 if cutoff == 0 { 304 cutoff = len(workersToWaitFor) 305 } 306 if r.staticDeps.Disrupt("DelayRegistryHealthResponses") { 307 cutoff = 0 // all workers will be conidered to come after the cutoff 308 } 309 310 // Count the number of responses that match the best one. We do so by 311 // asking for the reason why the individual entries can't update the 312 // best one. If ErrSameRevNum is returned, the entries are equal. 313 var nTotal, nBestTotal, nBestTotalBeforeCutoff, nPrimary uint64 314 for _, resp := range resps { 315 // Check if response arrived before cutoff. 316 beforeCutoff := awaitedWorkers < cutoff 317 // Check if the response comes from one of the workers we wait 318 // for. 319 _, exists := workersToWaitFor[resp.staticWorker.staticHostPubKeyStr] 320 if exists { 321 awaitedWorkers++ 322 } 323 if resp.staticSignedRegistryValue == nil { 324 // Ignore responses without value. 325 continue 326 } 327 nTotal++ 328 // We call ShouldUpdateWith without pubkey here because we don't 329 // want to prefer primary entries here. We will explicitly check 330 // for them afterwards. 331 update, reason := bestSRV.ShouldUpdateWith(&resp.staticSignedRegistryValue.RegistryValue, types.SiaPublicKey{}) 332 if update { 333 nPrimary++ 334 } 335 if update || errors.Contains(reason, modules.ErrSameRevNum) { 336 nBestTotal++ 337 // Check if it is a primary entry. 338 if resp.staticSignedRegistryValue.IsPrimaryEntry(resp.staticWorker.staticHostPubKey) { 339 nPrimary++ 340 } 341 // Check if we have waited for enough workers. 342 if beforeCutoff { 343 nBestTotalBeforeCutoff++ 344 } 345 } 346 } 347 return skymodules.RegistryEntryHealth{ 348 RevisionNumber: bestSRV.Revision, 349 NumEntries: nTotal, 350 NumBestEntries: nBestTotal, 351 NumBestEntriesBeforeCutoff: nBestTotalBeforeCutoff, 352 NumBestPrimaryEntries: nPrimary, 353 }, nil 354 } 355 356 // managedReadRegistry starts a registry lookup on all available workers. The 357 // jobs have 'timeout' amount of time to finish their jobs and return a 358 // response. Otherwise the response with the highest revision number will be 359 // used. If ignoreCutoff is specified, the read won't be aborted early after a certain number of hosts returned but only after the timeout is reached. 360 func (r *Renter) managedReadRegistry(ctx context.Context, rid modules.RegistryEntryID, spk *types.SiaPublicKey, tweak *crypto.Hash, ignoreCutoff bool) (skymodules.RegistryEntry, error) { 361 // Start tracing. 362 tracer := opentracing.GlobalTracer() 363 span := tracer.StartSpan("managedReadRegistry") 364 defer span.Finish() 365 366 // Check if we are subscribed to the entry first. 367 subscribedRV, ok := r.staticSubscriptionManager.Get(rid) 368 span.SetTag("cached", ok) 369 if ok && subscribedRV != nil { 370 if subscribedRV.Type == modules.RegistryTypeInvalid { 371 return skymodules.RegistryEntry{}, ErrRegistryEntryNotFound 372 } 373 return *subscribedRV, nil 374 } 375 if r.staticDeps.Disrupt("ReadRegistryCacheOnly") { 376 return skymodules.RegistryEntry{}, errors.New("ReadRegistryCacheOnly") 377 } 378 379 // Measure the time it takes to fetch the entry. 380 startTime := time.Now() 381 defer func() { 382 r.staticRegistryReadStats.AddDataPoint(time.Since(startTime)) 383 }() 384 385 // Log some info about this trace. 386 span.LogKV("RID", hex.EncodeToString(rid[:])) 387 if spk != nil && tweak != nil { 388 span.LogKV("SPK", spk.String()) 389 span.LogKV("Tweak", tweak.String()) 390 } 391 392 // Block until there is memory available, and then ensure the memory gets 393 // returned. 394 // Since registry entries are very small we use a fairly generous multiple. 395 if !r.staticRegistryMemoryManager.Request(ctx, readRegistryMemory, memoryPriorityHigh) { 396 return skymodules.RegistryEntry{}, errors.New("timeout while waiting in job queue - server is busy") 397 } 398 defer r.staticRegistryMemoryManager.Return(readRegistryMemory) 399 400 // Specify a context for the background jobs. It will be closed as soon as 401 // threadedHandleRegistryRepairs is done. 402 backgroundCtx, backgroundCancel := context.WithCancel(r.tg.StopCtx()) 403 404 responseSet, launchedWorkers := r.managedLaunchReadRegistryWorkers(backgroundCtx, span, rid, spk, tweak) 405 numWorkers := len(launchedWorkers) 406 407 // If there are no workers remaining, fail early. 408 if numWorkers == 0 { 409 backgroundCancel() 410 return skymodules.RegistryEntry{}, errors.AddContext(skymodules.ErrNotEnoughWorkersInWorkerPool, "cannot perform ReadRegistry") 411 } 412 413 defer func() { 414 _ = r.tg.Launch(func() { 415 defer backgroundCancel() 416 417 // Handle registry repairs. 418 r.threadedHandleRegistryRepairs(r.tg.StopCtx(), span, responseSet) 419 }) 420 }() 421 422 // Get the cutoff workers and wait for 80% of them to finish. 423 workersToWaitFor := regReadCutoffWorkers(launchedWorkers, minCutoffWorkers) 424 awaitedWorkers := 0 425 cutoff := int(float64(len(workersToWaitFor)) * minAwaitedCutoffWorkersPercentage) 426 if cutoff == 0 { 427 cutoff = len(workersToWaitFor) 428 } 429 430 // Prevent reaching the cutoff point when ReadRegistryBlocking is 431 // injected as a dependency. 432 if r.staticDeps.Disrupt("ReadRegistryBlocking") { 433 awaitedWorkers = -1 434 } 435 436 var best *jobReadRegistryResponse 437 responses := 0 438 // Wait for responses until either there are no responses left or until 439 // we have waited for enough of our workersToWaitFor. 440 for responseSet.responsesLeft() > 0 { 441 // Check cancel condition and block for more responses. 442 resp := responseSet.next(ctx) 443 if resp == nil { 444 break // context triggered 445 } 446 447 // Check if we have waited for enough workers. 448 if !ignoreCutoff && awaitedWorkers >= cutoff { 449 break // done 450 } 451 452 // Check if the response comes from one of the workers we wait 453 // for. 454 _, exists := workersToWaitFor[resp.staticWorker.staticHostPubKeyStr] 455 if exists { 456 awaitedWorkers++ 457 } 458 459 // Increment responses. 460 responses++ 461 462 // Ignore error responses and responses that returned no entry. 463 if resp.staticErr != nil || resp.staticSignedRegistryValue == nil { 464 continue 465 } 466 467 // Remember the best response. 468 if isBetter, _ := isBetterReadRegistryResponse(best, resp); isBetter { 469 best = resp 470 } 471 } 472 473 // If we don't have a successful response and also not a response for every 474 // worker, we timed out. 475 noResponse := best == nil || best.staticSignedRegistryValue == nil 476 if noResponse && responses < numWorkers { 477 return skymodules.RegistryEntry{}, ErrRegistryLookupTimeout 478 } 479 480 // If we don't have a successful response but received a response from every 481 // worker, we were unable to look up the entry. 482 if noResponse { 483 return skymodules.RegistryEntry{}, ErrRegistryEntryNotFound 484 } 485 return *best.staticSignedRegistryValue, nil 486 } 487 488 // managedLaunchReadRegistryWorkers launches read registry jobs on all available 489 // workers and returns a read response set which can be used to wait for the 490 // workers' responses. 491 func (r *Renter) managedLaunchReadRegistryWorkers(ctx context.Context, span opentracing.Span, rid modules.RegistryEntryID, spk *types.SiaPublicKey, tweak *crypto.Hash) (*readResponseSet, []*worker) { 492 // Get the full list of workers and create a channel to receive all of the 493 // results from the workers. The channel is buffered with one slot per 494 // worker, so that the workers do not have to block when returning the 495 // result of the job, even if this thread is not listening. 496 workers := r.staticWorkerPool.callWorkers() 497 staticResponseChan := make(chan *jobReadRegistryResponse, len(workers)) 498 499 // Filter out hosts that don't support the registry. 500 numRegistryWorkers := 0 501 for _, worker := range workers { 502 cache := worker.staticCache() 503 if build.VersionCmp(cache.staticHostVersion, minRegistryVersion) < 0 { 504 continue 505 } 506 507 // check for price gouging 508 // 509 // TODO: use 'checkProjectDownloadGouging' gouging for some basic 510 // protection. Should be replaced as part of the gouging overhaul. 511 pt := worker.staticPriceTable().staticPriceTable 512 err := gouging.CheckProjectDownload(cache.staticRenterAllowance, pt) 513 if err != nil { 514 r.staticLog.Debugf("price gouging detected in worker %v, err: %v\n", worker.staticHostPubKeyStr, err) 515 continue 516 } 517 518 jrr := worker.newJobReadRegistryEID(ctx, span, staticResponseChan, rid, spk, tweak) 519 if !worker.staticJobReadRegistryQueue.callAdd(jrr) { 520 // This will filter out any workers that are on cooldown or 521 // otherwise can't participate in the project. 522 continue 523 } 524 workers[numRegistryWorkers] = worker 525 numRegistryWorkers++ 526 } 527 workers = workers[:numRegistryWorkers] 528 529 // If specified, increment numWorkers. This will cause the loop to never 530 // exit without any of the context being closed since the response set won't 531 // be able to read the last response. 532 if r.staticDeps.Disrupt("ReadRegistryBlocking") { 533 numRegistryWorkers++ 534 } 535 536 return newReadResponseSet(staticResponseChan, numRegistryWorkers), workers 537 } 538 539 // managedUpdateRegistry updates the registries on all workers with the given 540 // registry value. 541 // NOTE: the input ctx only unblocks the call if it fails to hit the threshold 542 // before the timeout. It doesn't stop the update jobs. That's because we want 543 // to always make sure we update as many hosts as possible. 544 func (r *Renter) managedUpdateRegistry(ctx context.Context, spk types.SiaPublicKey, srv modules.SignedRegistryValue) (err error) { 545 workers := r.staticWorkerPool.callWorkers() 546 srvs := make(map[string]skymodules.RegistryEntry, len(workers)) 547 for _, w := range workers { 548 srvs[w.staticHostPubKeyStr] = skymodules.NewRegistryEntry(spk, srv) 549 } 550 return r.managedUpdateRegistryMulti(ctx, workers, srvs, MinUpdateRegistrySuccesses) 551 } 552 553 // managedUpdateRegistry updates the registries on all workers with the given 554 // registry value. 555 // NOTE: the input ctx only unblocks the call if it fails to hit the threshold 556 // before the timeout. It doesn't stop the update jobs. That's because we want 557 // to always make sure we update as many hosts as possible. 558 func (r *Renter) managedUpdateRegistryMulti(ctx context.Context, workers []*worker, srvs map[string]skymodules.RegistryEntry, minUpdates int) (err error) { 559 // Start tracing. 560 start := time.Now() 561 tracer := opentracing.GlobalTracer() 562 span := tracer.StartSpan("managedUpdateRegistryMulti") 563 defer span.Finish() 564 565 // Check how many updates we expect at the very least. 566 if minUpdates > len(srvs) { 567 minUpdates = len(srvs) 568 } 569 570 // Verify the signatures before updating the hosts. 571 for _, srv := range srvs { 572 if err := srv.Verify(); err != nil { 573 return errors.AddContext(err, "managedUpdateRegistry: failed to verify signature of entry") 574 } 575 } 576 // Create a channel to receive all of the 577 // results from the workers. The channel is buffered with one slot per 578 // worker, so that the workers do not have to block when returning the 579 // result of the job, even if this thread is not listening. 580 staticResponseChan := make(chan *jobUpdateRegistryResponse, len(workers)) 581 span.LogKV("workers", len(workers)) 582 583 // Create a context to continue updating registry values in the background. 584 updateTimeoutCtx, updateTimeoutCancel := context.WithTimeout(r.tg.StopCtx(), updateRegistryBackgroundTimeout) 585 defer func() { 586 if err != nil { 587 // If managedUpdateRegistry fails the caller is going to assume that 588 // updating the value failed. Don't let any jobs linger in that 589 // case. 590 updateTimeoutCancel() 591 } 592 }() 593 594 // Filter out hosts that don't support the registry. 595 numRegistryWorkers := 0 596 for _, worker := range workers { 597 // Filter out workers that we don't have an srv for. 598 srv, exists := srvs[worker.staticHostPubKeyStr] 599 if !exists { 600 continue 601 } 602 // Check if worker is good for updating the registry. 603 if !isWorkerGoodForRegistryUpdate(worker) { 604 continue 605 } 606 607 // Create the job. 608 jrr := worker.newJobUpdateRegistry(updateTimeoutCtx, span, staticResponseChan, srv.PubKey, srv.SignedRegistryValue) 609 if !worker.staticJobUpdateRegistryQueue.callAdd(jrr) { 610 // This will filter out any workers that are on cooldown or 611 // otherwise can't participate in the project. 612 continue 613 } 614 workers[numRegistryWorkers] = worker 615 numRegistryWorkers++ 616 } 617 workers = workers[:numRegistryWorkers] 618 // If there are no workers remaining, fail early. 619 if len(workers) < minUpdates { 620 return errors.AddContext(skymodules.ErrNotEnoughWorkersInWorkerPool, "cannot perform UpdateRegistry") 621 } 622 623 workersLeft := len(workers) 624 responses := 0 625 successfulResponses := 0 626 627 var respErrs error 628 for successfulResponses < minUpdates && workersLeft+successfulResponses >= minUpdates { 629 // Check deadline. 630 var resp *jobUpdateRegistryResponse 631 select { 632 case <-ctx.Done(): 633 // Timeout reached. 634 return ErrRegistryUpdateTimeout 635 case resp = <-staticResponseChan: 636 } 637 638 // Decrement the number of workers. 639 workersLeft-- 640 641 // Increment number of responses. 642 responses++ 643 644 // Ignore error responses except for invalid revision errors. 645 if resp.staticErr != nil { 646 // If we receive an error indicating that a better entry exists on 647 // the network we immediately return an error. That's because our 648 // update won't be able to change the consensus of the network on 649 // the latest entry. 650 if modules.IsRegistryEntryExistErr(resp.staticErr) { 651 return resp.staticErr 652 } 653 respErrs = errors.Compose(respErrs, resp.staticErr) 654 continue 655 } 656 657 // Increment successful responses. 658 successfulResponses++ 659 } 660 661 // Check if we ran out of workers. 662 if successfulResponses == 0 { 663 r.staticLog.Print("RegistryUpdate failed with 0 successful responses: ", respErrs) 664 return errors.Compose(err, ErrRegistryUpdateNoSuccessfulUpdates) 665 } 666 if successfulResponses < minUpdates { 667 r.staticLog.Printf("RegistryUpdate failed with %v < %v successful responses: %v", successfulResponses, minUpdates, respErrs) 668 return errors.Compose(err, ErrRegistryUpdateInsufficientRedundancy) 669 } 670 r.staticRegWriteStats.AddDataPoint(time.Since(start)) 671 return nil 672 } 673 674 // isBetterReadRegistryResponse returns true if resp2 is a better response than 675 // resp1 and false otherwise. Better means that the response either has a higher 676 // revision number, more work or was faster. 677 func isBetterReadRegistryResponse(resp1, resp2 *jobReadRegistryResponse) (bool, bool) { 678 // Check for nil response. 679 if resp2 == nil { 680 // A nil entry never replaces an existing entry. 681 return false, resp1 == resp2 682 } else if resp1 == nil { 683 // A non-nil entry always replaces a nil entry. 684 return true, resp1 == resp2 685 } 686 // Same but with the entries. 687 srv1 := resp1.staticSignedRegistryValue 688 srv2 := resp2.staticSignedRegistryValue 689 if srv2 == nil { 690 return false, srv1 == srv2 691 } else if srv1 == nil { 692 return true, srv1 == srv2 693 } 694 // Compare entries. We pass the empty key here since we don't care about 695 // whether the entry is a primary or secondary one. 696 shouldUpdate, updateErr := srv1.ShouldUpdateWith(&srv2.RegistryValue, types.SiaPublicKey{}) 697 698 // If the entry is not capable of updating the existing one and both entries 699 // have the same revision number, use the time. 700 if !shouldUpdate && errors.Contains(updateErr, modules.ErrSameRevNum) { 701 return resp2.staticCompleteTime.Before(resp1.staticCompleteTime), true 702 } 703 704 // Otherwise we return the result 705 return shouldUpdate, false 706 } 707 708 // threadedHandleRegistryRepairs waits for all provided read registry programs 709 // to finish and updates all workers from responses which either didn't provide 710 // the highest revision number, or didn't have the entry at all. 711 func (r *Renter) threadedHandleRegistryRepairs(ctx context.Context, parentSpan opentracing.Span, responseSet *readResponseSet) { 712 if err := r.tg.Add(); err != nil { 713 return 714 } 715 defer r.tg.Done() 716 717 span := opentracing.StartSpan("threadedHandleRegistryRepairs", opentracing.ChildOf(parentSpan.Context())) 718 defer span.Finish() 719 720 // Collect all responses. 721 ctx, cancel := context.WithTimeout(ctx, ReadRegistryBackgroundTimeout) 722 defer cancel() 723 resps := responseSet.collect(ctx) 724 if resps == nil { 725 return // nothing to do 726 } 727 728 // Find the best response. 729 var best *jobReadRegistryResponse 730 for _, resp := range resps { 731 if better, _ := isBetterReadRegistryResponse(best, resp); better { 732 best = resp 733 } 734 } 735 736 // If no entry was found we can't do anything. 737 if best == nil || best.staticSignedRegistryValue == nil { 738 return 739 } 740 bestSRV := best.staticSignedRegistryValue 741 742 // Register the update to make sure we don't try again if a value is rapidly 743 // polled before this update is done. 744 rid := modules.DeriveRegistryEntryID(bestSRV.PubKey, bestSRV.Tweak) 745 r.ongoingRegistryRepairsMu.Lock() 746 _, exists := r.ongoingRegistryRepairs[rid] 747 if !exists { 748 r.ongoingRegistryRepairs[rid] = struct{}{} 749 } 750 r.ongoingRegistryRepairsMu.Unlock() 751 if exists { 752 return // ongoing update found 753 } 754 755 // Unregister the update once done. 756 defer func() { 757 r.ongoingRegistryRepairsMu.Lock() 758 delete(r.ongoingRegistryRepairs, rid) 759 r.ongoingRegistryRepairsMu.Unlock() 760 }() 761 762 // Figure out how many entries with the highest revision are out there. 763 upToDateHosts := make(map[string]struct{}) 764 for _, resp := range resps { 765 if resp == nil || resp.staticSignedRegistryValue == nil || resp.staticErr != nil { 766 continue 767 } 768 if resp.staticSignedRegistryValue.Revision != best.staticSignedRegistryValue.Revision { 769 continue 770 } 771 upToDateHosts[resp.staticWorker.staticHostPubKeyStr] = struct{}{} 772 } 773 774 // Check if the entry requires repairing. 775 if len(upToDateHosts) >= RegistryEntryRepairThreshold { 776 return 777 } 778 779 // Prepare the updates. 780 workers := r.staticWorkerPool.callWorkers() 781 srvs := make(map[string]skymodules.RegistryEntry, len(workers)) 782 for _, w := range workers { 783 if _, upToDate := upToDateHosts[w.staticHostPubKeyStr]; upToDate { 784 continue 785 } 786 srvs[w.staticHostPubKeyStr] = *best.staticSignedRegistryValue 787 } 788 789 // Update the registry. 790 err := r.managedUpdateRegistryMulti(ctx, workers, srvs, RegistryEntryRepairThreshold-len(upToDateHosts)) 791 if err != nil { 792 r.staticLog.Debugln("threadedHandleRegistryRepairs: failed to update registry", err) 793 } 794 } 795 796 // isWorkerGoodForRegistryUpdate is a helper function which returns 'true' if a 797 // worker can be used for updating the registry. 798 func isWorkerGoodForRegistryUpdate(worker *worker) bool { 799 cache := worker.staticCache() 800 if build.VersionCmp(cache.staticHostVersion, minRegistryVersion) < 0 { 801 return false 802 } 803 // Skip !goodForUpload workers. 804 if !cache.staticContractUtility.GoodForUpload { 805 return false 806 } 807 808 // check for price gouging 809 pt := worker.staticPriceTable().staticPriceTable 810 err := gouging.CheckUpload(cache.staticRenterAllowance, pt) 811 if err != nil { 812 return false 813 } 814 return true 815 } 816 817 // regReadCutoffWorkers returns the workers to wait for before considering the 818 // result good enough amongst the provided launched workers. 819 func regReadCutoffWorkers(workers []*worker, minWorkers int) map[string]*worker { 820 // Filter malicious hosts. 821 i := 0 822 for _, w := range workers { 823 if w.staticCache().staticMaliciousHost { 824 continue 825 } 826 workers[i] = w 827 i++ 828 } 829 workers = workers[:i] 830 // Sort workers by their estimate. 831 sort.Slice(workers, func(i, j int) bool { 832 return workers[i].ReadRegCutoffEstimate() < workers[j].ReadRegCutoffEstimate() 833 }) 834 // Drop slowest 50% but don't go below the min. 835 newLen := len(workers) / 2 836 if newLen < minWorkers && minWorkers <= len(workers) { 837 newLen = minWorkers 838 } else if newLen < minWorkers && minWorkers > len(workers) { 839 newLen = len(workers) 840 } 841 workers = workers[:newLen] 842 843 // Put remaining ones in map. 844 remaining := make(map[string]*worker, len(workers)) 845 for _, w := range workers { 846 remaining[w.staticHostPubKeyStr] = w 847 } 848 return remaining 849 }