github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/util/shard.go (about)

     1  package util
     2  
     3  import (
     4  	"crypto/md5"
     5  	"encoding/binary"
     6  	"math"
     7  )
     8  
     9  const (
    10  	// Sharding strategies.
    11  	ShardingStrategyDefault = "default"
    12  	ShardingStrategyShuffle = "shuffle-sharding"
    13  )
    14  
    15  var (
    16  	seedSeparator = []byte{0}
    17  )
    18  
    19  // ShuffleShardSeed returns seed for random number generator, computed from provided identifier.
    20  func ShuffleShardSeed(identifier, zone string) int64 {
    21  	// Use the identifier to compute an hash we'll use to seed the random.
    22  	hasher := md5.New()
    23  	hasher.Write(YoloBuf(identifier)) // nolint:errcheck
    24  	if zone != "" {
    25  		hasher.Write(seedSeparator) // nolint:errcheck
    26  		hasher.Write(YoloBuf(zone)) // nolint:errcheck
    27  	}
    28  	checksum := hasher.Sum(nil)
    29  
    30  	// Generate the seed based on the first 64 bits of the checksum.
    31  	return int64(binary.BigEndian.Uint64(checksum))
    32  }
    33  
    34  // ShuffleShardExpectedInstancesPerZone returns the number of instances that should be selected for each
    35  // zone when zone-aware replication is enabled. The algorithm expects the shard size to be divisible
    36  // by the number of zones, in order to have nodes balanced across zones. If it's not, we do round up.
    37  func ShuffleShardExpectedInstancesPerZone(shardSize, numZones int) int {
    38  	return int(math.Ceil(float64(shardSize) / float64(numZones)))
    39  }
    40  
    41  // ShuffleShardExpectedInstances returns the total number of instances that should be selected for a given
    42  // tenant. If zone-aware replication is disabled, the input numZones should be 1.
    43  func ShuffleShardExpectedInstances(shardSize, numZones int) int {
    44  	return ShuffleShardExpectedInstancesPerZone(shardSize, numZones) * numZones
    45  }