github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/pkg/compute/bidstrategy/distance_delay_strategy.go (about) 1 package bidstrategy 2 3 import ( 4 "context" 5 "hash/fnv" 6 "math" 7 "time" 8 9 "github.com/filecoin-project/bacalhau/pkg/model" 10 "github.com/rs/zerolog/log" 11 ) 12 13 // Decide whether we should even consider bidding on the job, early exit if 14 // we're not in the active set for this job, given the hash distances. 15 // (This is an optimization to avoid all nodes bidding on a job in large networks). 16 17 type DistanceDelayStrategyParams struct { 18 NetworkSize int 19 } 20 21 type DistanceDelayStrategy struct { 22 networkSize int 23 } 24 25 func NewDistanceDelayStrategy(params DistanceDelayStrategyParams) *DistanceDelayStrategy { 26 return &DistanceDelayStrategy{networkSize: params.NetworkSize} 27 } 28 29 func (s DistanceDelayStrategy) ShouldBid(ctx context.Context, request BidStrategyRequest) (BidStrategyResponse, error) { 30 jobNodeDistanceDelayMs, shouldRunJob := s.calculateJobNodeDistanceDelay(ctx, request) 31 if !shouldRunJob { 32 return BidStrategyResponse{ 33 ShouldBid: false, 34 Reason: "Job to node hash distance too high", 35 }, nil 36 } 37 38 if jobNodeDistanceDelayMs > 0 { 39 log.Ctx(ctx).Debug().Msgf("Waiting %d ms before selecting job %s", jobNodeDistanceDelayMs, request.Job.Metadata.ID) 40 time.Sleep(time.Millisecond * time.Duration(jobNodeDistanceDelayMs)) //nolint:gosec 41 } 42 43 return newShouldBidResponse(), nil 44 } 45 46 func (s DistanceDelayStrategy) ShouldBidBasedOnUsage( 47 _ context.Context, _ BidStrategyRequest, _ model.ResourceUsageData) (BidStrategyResponse, error) { 48 return newShouldBidResponse(), nil 49 } 50 51 func (s DistanceDelayStrategy) calculateJobNodeDistanceDelay(ctx context.Context, request BidStrategyRequest) (int, bool) { 52 // Calculate how long to wait to bid on the job by using a circular hashing 53 // style approach: Invent a metric for distance between node ID and job ID. 54 // If the node and job ID happen to be close to eachother, such that we'd 55 // expect that we are one of the N nodes "closest" to the job, bid 56 // instantly. Beyond that, back off an amount "stepped" proportional to how 57 // far we are from the job. This should evenly spread the work across the 58 // network, and have the property of on average only concurrency many nodes 59 // bidding on the job, and other nodes not bothering to bid because they 60 // will already have seen bid/bidaccepted messages from the close nodes. 61 // This will decrease overall network traffic, improving CPU and memory 62 // usage in large clusters. 63 nodeHash := hash(request.NodeID) 64 jobHash := hash(request.Job.Metadata.ID) 65 // Range: 0 through 4,294,967,295. (4 billion) 66 distance := diff(nodeHash, jobHash) 67 // scale distance per chunk by concurrency (so that many nodes bid on a job 68 // with high concurrency). IOW, divide the space up into this many pieces. 69 // If concurrency=3 and network size=3, there'll only be one piece and 70 // everyone will bid. If concurrency=1 and network size=1 million, there 71 // will be a million slices of the hash space. 72 concurrency := max(1, request.Job.Spec.Deal.Concurrency, request.Job.Spec.Deal.MinBids) 73 chunk := int((float32(concurrency) / float32(s.networkSize)) * 4294967295) //nolint:gomnd 74 // wait 1 second per chunk distance. So, if we land in exactly the same 75 // chunk, bid immediately. If we're one chunk away, wait a bit before 76 // bidding. If we're very far away, wait a very long time. 77 delay := (distance / chunk) * 1000 //nolint:gomnd 78 log.Ctx(ctx).Trace().Msgf( 79 "node/job %s/%s, %d/%d, dist=%d, chunk=%d, delay=%d", 80 request.NodeID, request.Job.Metadata.ID, nodeHash, jobHash, distance, chunk, delay, 81 ) 82 shouldRun := true 83 // if delay is too high, just exit immediately. 84 if delay > 1000 { //nolint:gomnd 85 // drop the job on the floor, :-O 86 shouldRun = false 87 log.Ctx(ctx).Warn().Msgf( 88 "dropped job: node/job %s/%s, %d/%d, dist=%d, chunk=%d, delay=%d", 89 request.NodeID, request.Job.Metadata.ID, nodeHash, jobHash, distance, chunk, delay, 90 ) 91 } 92 return delay, shouldRun 93 } 94 95 func hash(s string) int { 96 h := fnv.New32a() 97 h.Write([]byte(s)) 98 return int(h.Sum32()) 99 } 100 101 func diff(a, b int) int { 102 if a < b { 103 return b - a 104 } 105 return a - b 106 } 107 108 func max(vars ...int) int { 109 res := math.MinInt 110 111 for _, i := range vars { 112 if res < i { 113 res = i 114 } 115 } 116 return res 117 } 118 119 // Compile-time check of interface implementation 120 var _ BidStrategy = (*DistanceDelayStrategy)(nil)