github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/pkg/compute/bidstrategy/distance_delay_strategy.go (about)

     1  package bidstrategy
     2  
     3  import (
     4  	"context"
     5  	"hash/fnv"
     6  	"math"
     7  	"time"
     8  
     9  	"github.com/filecoin-project/bacalhau/pkg/model"
    10  	"github.com/rs/zerolog/log"
    11  )
    12  
    13  // Decide whether we should even consider bidding on the job, early exit if
    14  // we're not in the active set for this job, given the hash distances.
    15  // (This is an optimization to avoid all nodes bidding on a job in large networks).
    16  
    17  type DistanceDelayStrategyParams struct {
    18  	NetworkSize int
    19  }
    20  
    21  type DistanceDelayStrategy struct {
    22  	networkSize int
    23  }
    24  
    25  func NewDistanceDelayStrategy(params DistanceDelayStrategyParams) *DistanceDelayStrategy {
    26  	return &DistanceDelayStrategy{networkSize: params.NetworkSize}
    27  }
    28  
    29  func (s DistanceDelayStrategy) ShouldBid(ctx context.Context, request BidStrategyRequest) (BidStrategyResponse, error) {
    30  	jobNodeDistanceDelayMs, shouldRunJob := s.calculateJobNodeDistanceDelay(ctx, request)
    31  	if !shouldRunJob {
    32  		return BidStrategyResponse{
    33  			ShouldBid: false,
    34  			Reason:    "Job to node hash distance too high",
    35  		}, nil
    36  	}
    37  
    38  	if jobNodeDistanceDelayMs > 0 {
    39  		log.Ctx(ctx).Debug().Msgf("Waiting %d ms before selecting job %s", jobNodeDistanceDelayMs, request.Job.Metadata.ID)
    40  		time.Sleep(time.Millisecond * time.Duration(jobNodeDistanceDelayMs)) //nolint:gosec
    41  	}
    42  
    43  	return newShouldBidResponse(), nil
    44  }
    45  
    46  func (s DistanceDelayStrategy) ShouldBidBasedOnUsage(
    47  	_ context.Context, _ BidStrategyRequest, _ model.ResourceUsageData) (BidStrategyResponse, error) {
    48  	return newShouldBidResponse(), nil
    49  }
    50  
    51  func (s DistanceDelayStrategy) calculateJobNodeDistanceDelay(ctx context.Context, request BidStrategyRequest) (int, bool) {
    52  	// Calculate how long to wait to bid on the job by using a circular hashing
    53  	// style approach: Invent a metric for distance between node ID and job ID.
    54  	// If the node and job ID happen to be close to eachother, such that we'd
    55  	// expect that we are one of the N nodes "closest" to the job, bid
    56  	// instantly. Beyond that, back off an amount "stepped" proportional to how
    57  	// far we are from the job. This should evenly spread the work across the
    58  	// network, and have the property of on average only concurrency many nodes
    59  	// bidding on the job, and other nodes not bothering to bid because they
    60  	// will already have seen bid/bidaccepted messages from the close nodes.
    61  	// This will decrease overall network traffic, improving CPU and memory
    62  	// usage in large clusters.
    63  	nodeHash := hash(request.NodeID)
    64  	jobHash := hash(request.Job.Metadata.ID)
    65  	// Range: 0 through 4,294,967,295. (4 billion)
    66  	distance := diff(nodeHash, jobHash)
    67  	// scale distance per chunk by concurrency (so that many nodes bid on a job
    68  	// with high concurrency). IOW, divide the space up into this many pieces.
    69  	// If concurrency=3 and network size=3, there'll only be one piece and
    70  	// everyone will bid. If concurrency=1 and network size=1 million, there
    71  	// will be a million slices of the hash space.
    72  	concurrency := max(1, request.Job.Spec.Deal.Concurrency, request.Job.Spec.Deal.MinBids)
    73  	chunk := int((float32(concurrency) / float32(s.networkSize)) * 4294967295) //nolint:gomnd
    74  	// wait 1 second per chunk distance. So, if we land in exactly the same
    75  	// chunk, bid immediately. If we're one chunk away, wait a bit before
    76  	// bidding. If we're very far away, wait a very long time.
    77  	delay := (distance / chunk) * 1000 //nolint:gomnd
    78  	log.Ctx(ctx).Trace().Msgf(
    79  		"node/job %s/%s, %d/%d, dist=%d, chunk=%d, delay=%d",
    80  		request.NodeID, request.Job.Metadata.ID, nodeHash, jobHash, distance, chunk, delay,
    81  	)
    82  	shouldRun := true
    83  	// if delay is too high, just exit immediately.
    84  	if delay > 1000 { //nolint:gomnd
    85  		// drop the job on the floor, :-O
    86  		shouldRun = false
    87  		log.Ctx(ctx).Warn().Msgf(
    88  			"dropped job: node/job %s/%s, %d/%d, dist=%d, chunk=%d, delay=%d",
    89  			request.NodeID, request.Job.Metadata.ID, nodeHash, jobHash, distance, chunk, delay,
    90  		)
    91  	}
    92  	return delay, shouldRun
    93  }
    94  
    95  func hash(s string) int {
    96  	h := fnv.New32a()
    97  	h.Write([]byte(s))
    98  	return int(h.Sum32())
    99  }
   100  
   101  func diff(a, b int) int {
   102  	if a < b {
   103  		return b - a
   104  	}
   105  	return a - b
   106  }
   107  
   108  func max(vars ...int) int {
   109  	res := math.MinInt
   110  
   111  	for _, i := range vars {
   112  		if res < i {
   113  			res = i
   114  		}
   115  	}
   116  	return res
   117  }
   118  
   119  // Compile-time check of interface implementation
   120  var _ BidStrategy = (*DistanceDelayStrategy)(nil)