github.com/uber/kraken@v0.1.4/lib/hrw/rendezvous.go (about)

     1  // Copyright (c) 2016-2019 Uber Technologies, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  package hrw
    15  
    16  import (
    17  	"encoding/binary"
    18  	"encoding/hex"
    19  	"hash"
    20  	"math"
    21  	"math/big"
    22  	"sort"
    23  
    24  	"github.com/spaolacci/murmur3"
    25  )
    26  
    27  // min between two integers.
    28  func min(a, b int) int {
    29  	if a < b {
    30  		return a
    31  	}
    32  	return b
    33  }
    34  
    35  // HashFactory is a function object for Hash.New() constructor.
    36  type HashFactory func() hash.Hash
    37  
    38  // Murmur3Hash is a murmur3 HashFactory.
    39  func Murmur3Hash() hash.Hash { return murmur3.New64() }
    40  
    41  // UIntToFloat is a conversion function from uint64 to float64.
    42  // Int could be potentially very big integer, like 256 bits long.
    43  type UIntToFloat func(bytesUInt []byte, maxValue []byte, hasher hash.Hash) float64
    44  
    45  // RendezvousHashNode represents a weighted node in a hashing schema.
    46  type RendezvousHashNode struct {
    47  	RHash  *RendezvousHash // parent hash structure with all the configuration
    48  	Label  string          // some string ientifying a unique node label
    49  	Weight int             // node weight, usually denotes node's capacity
    50  }
    51  
    52  // RendezvousHash represents a Rendezvous Hashing schema.
    53  // It does not make any assumption about concurrency model so synchronizing
    54  // access to it is a caller's responsibility.
    55  type RendezvousHash struct {
    56  	Hash         HashFactory           // hash function
    57  	ScoreFunc    UIntToFloat           // conversion function from generated hash to float64
    58  	Nodes        []*RendezvousHashNode // all nodes
    59  	MaxHashValue []byte
    60  }
    61  
    62  // RendezvousNodesByScore is a predicat that supports sorting by score(key).
    63  type RendezvousNodesByScore struct {
    64  	key   string
    65  	nodes []*RendezvousHashNode
    66  }
    67  
    68  // Len return length.
    69  func (a RendezvousNodesByScore) Len() int { return len(a.nodes) }
    70  
    71  // Swap swaps two elements.
    72  func (a RendezvousNodesByScore) Swap(i, j int) { a.nodes[i], a.nodes[j] = a.nodes[j], a.nodes[i] }
    73  
    74  // Less is a predicate '<' for a set.
    75  func (a RendezvousNodesByScore) Less(i, j int) bool {
    76  	return a.nodes[i].Score(a.key) < a.nodes[j].Score(a.key)
    77  }
    78  
    79  // NewRendezvousHash constructs and prepopulates a RendezvousHash object.
    80  func NewRendezvousHash(hashFactory HashFactory, scoreFunc UIntToFloat) *RendezvousHash {
    81  	rh := &RendezvousHash{
    82  		Hash:      hashFactory,
    83  		ScoreFunc: scoreFunc,
    84  	}
    85  	hashLen := len(hashFactory().Sum(nil))
    86  
    87  	rh.MaxHashValue = make([]byte, hashLen)
    88  	for i := 0; i < hashLen; i++ {
    89  		rh.MaxHashValue[i] = 0xFF
    90  	}
    91  	return rh
    92  }
    93  
    94  // UInt64ToFloat64 Converts a uniformly random 64-bit integer
    95  // to "uniformly" random floating point number on interval [0, 1)
    96  // The approach is heavily based on this material
    97  // https://crypto.stackexchange.com/questions/31657/uniformly-distributed-secure-floating-point-numbers-in-0-1
    98  // and this https://en.wikipedia.org/wiki/Rendezvous_hashing
    99  func UInt64ToFloat64(bytesUInt []byte, maxValue []byte, hasher hash.Hash) float64 {
   100  	maxUInt := binary.BigEndian.Uint64(maxValue)
   101  	fiftyThreeOnes := uint64(maxUInt >> (64 - 53))
   102  	fiftyThreeZeros := float64(1 << 53)
   103  	u64val := binary.BigEndian.Uint64(bytesUInt)
   104  
   105  	// val & 0xFFF000000000000 == 0 need to be handled differently
   106  	// as it will result in zeros: something that score
   107  	// function cannot survive. So there are 2^11 keys like that
   108  	// need to be re-hashed one more time. That will introduce a tiny bias
   109  	// in hashing key space distribution that we can live with
   110  	val := u64val & fiftyThreeOnes
   111  	if val == 0 && hasher != nil {
   112  		hasher.Reset()
   113  		hasher.Write(bytesUInt)
   114  
   115  		val = binary.BigEndian.Uint64(hasher.Sum(nil)) & fiftyThreeOnes
   116  	}
   117  	return float64(val) / fiftyThreeZeros
   118  }
   119  
   120  // BigIntToFloat64 converts BigInt to float64.
   121  func BigIntToFloat64(bytesUInt []byte, maxValue []byte, hasher hash.Hash) float64 {
   122  	maxHashFloat := new(big.Float)
   123  	maxHashFloat.SetInt(new(big.Int).SetBytes(maxValue))
   124  
   125  	hashInt := new(big.Int)
   126  	// checksumHash is being consumed as a big endian int.
   127  	hashInt.SetBytes(bytesUInt)
   128  
   129  	hashFloat := new(big.Float).SetInt(hashInt)
   130  
   131  	// float64's precision, we would not need more then that
   132  	// as we eventually cast everything to float64
   133  	// Big Float will use greater presicions in operations
   134  	hashFloat.SetPrec(53)
   135  
   136  	fl64value, _ := hashFloat.Quo(hashFloat, maxHashFloat).Float64()
   137  
   138  	// I don't expact that to happen, the accuracy of 256 bits division
   139  	// arithmetic is well within float'64 theoretical minimum for a single
   140  	// division and we always divide with a non zero constant.
   141  	if hashFloat.IsInf() {
   142  		panic("Float64.Quo operation has failed")
   143  	}
   144  
   145  	return fl64value
   146  }
   147  
   148  // Score computes score of a key for this node in accordance to Weighted
   149  // Rendezvous Hash. It's using big golang float key as hexidemical encoding of
   150  // a byte array.
   151  func (rhn *RendezvousHashNode) Score(key string) float64 {
   152  	hasher := rhn.RHash.Hash()
   153  
   154  	keyBytes, err := hex.DecodeString(key)
   155  	if err != nil {
   156  		return math.NaN()
   157  	}
   158  
   159  	hashBytes := make([]byte, len(keyBytes)+len(rhn.Label))
   160  	// Add node's seed to a key string
   161  	hashBytes = append(keyBytes, []byte(rhn.Label)...)
   162  
   163  	hasher.Write(hashBytes)
   164  	score := rhn.RHash.ScoreFunc(hasher.Sum(nil), rhn.RHash.MaxHashValue, hasher)
   165  
   166  	// for more information on this math please look at this paper:
   167  	// http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.414.9353&rep=rep1&type=pdf
   168  	// and this presentation slides:
   169  	// http://www.snia.org/sites/default/files/SDC15_presentations/dist_sys/Jason_Resch_New_Consistent_Hashings_Rev.pdf
   170  
   171  	return -float64(rhn.Weight) / math.Log(score)
   172  }
   173  
   174  // AddNode adds a node to a hashing ring.
   175  func (rh *RendezvousHash) AddNode(seed string, weight int) {
   176  	node := &RendezvousHashNode{
   177  		RHash:  rh,
   178  		Label:  seed,
   179  		Weight: weight,
   180  	}
   181  	rh.Nodes = append(rh.Nodes, node)
   182  }
   183  
   184  // RemoveNode removes a node from a hashing ring.
   185  func (rh *RendezvousHash) RemoveNode(name string) {
   186  	for i, node := range rh.Nodes {
   187  		if node.Label == name {
   188  			rh.Nodes = append(rh.Nodes[:i], rh.Nodes[i+1:]...)
   189  			break
   190  		}
   191  	}
   192  }
   193  
   194  // GetNode gets a node from a hashing ring and its index in array.
   195  func (rh *RendezvousHash) GetNode(name string) (*RendezvousHashNode, int) {
   196  	for index, node := range rh.Nodes {
   197  		if node.Label == name {
   198  			return node, index
   199  		}
   200  	}
   201  	return nil, -1
   202  }
   203  
   204  // GetOrderedNodes gets an ordered set of N nodes for a key where
   205  // score(Node1) > score(N2) > ... score(NodeN).
   206  // Number of returned nodes = min(N, len(nodes)).
   207  func (rh *RendezvousHash) GetOrderedNodes(key string, n int) []*RendezvousHashNode {
   208  	nodes := make([]*RendezvousHashNode, len(rh.Nodes))
   209  	copy(nodes, rh.Nodes)
   210  
   211  	sort.Sort(sort.Reverse(&RendezvousNodesByScore{key: key, nodes: nodes}))
   212  
   213  	if n >= len(nodes) {
   214  		return nodes
   215  	}
   216  	return nodes[:n]
   217  }