github.com/uber/kraken@v0.1.4/lib/hrw/rendezvous.go (about) 1 // Copyright (c) 2016-2019 Uber Technologies, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 package hrw 15 16 import ( 17 "encoding/binary" 18 "encoding/hex" 19 "hash" 20 "math" 21 "math/big" 22 "sort" 23 24 "github.com/spaolacci/murmur3" 25 ) 26 27 // min between two integers. 28 func min(a, b int) int { 29 if a < b { 30 return a 31 } 32 return b 33 } 34 35 // HashFactory is a function object for Hash.New() constructor. 36 type HashFactory func() hash.Hash 37 38 // Murmur3Hash is a murmur3 HashFactory. 39 func Murmur3Hash() hash.Hash { return murmur3.New64() } 40 41 // UIntToFloat is a conversion function from uint64 to float64. 42 // Int could be potentially very big integer, like 256 bits long. 43 type UIntToFloat func(bytesUInt []byte, maxValue []byte, hasher hash.Hash) float64 44 45 // RendezvousHashNode represents a weighted node in a hashing schema. 46 type RendezvousHashNode struct { 47 RHash *RendezvousHash // parent hash structure with all the configuration 48 Label string // some string ientifying a unique node label 49 Weight int // node weight, usually denotes node's capacity 50 } 51 52 // RendezvousHash represents a Rendezvous Hashing schema. 53 // It does not make any assumption about concurrency model so synchronizing 54 // access to it is a caller's responsibility. 55 type RendezvousHash struct { 56 Hash HashFactory // hash function 57 ScoreFunc UIntToFloat // conversion function from generated hash to float64 58 Nodes []*RendezvousHashNode // all nodes 59 MaxHashValue []byte 60 } 61 62 // RendezvousNodesByScore is a predicat that supports sorting by score(key). 63 type RendezvousNodesByScore struct { 64 key string 65 nodes []*RendezvousHashNode 66 } 67 68 // Len return length. 69 func (a RendezvousNodesByScore) Len() int { return len(a.nodes) } 70 71 // Swap swaps two elements. 72 func (a RendezvousNodesByScore) Swap(i, j int) { a.nodes[i], a.nodes[j] = a.nodes[j], a.nodes[i] } 73 74 // Less is a predicate '<' for a set. 75 func (a RendezvousNodesByScore) Less(i, j int) bool { 76 return a.nodes[i].Score(a.key) < a.nodes[j].Score(a.key) 77 } 78 79 // NewRendezvousHash constructs and prepopulates a RendezvousHash object. 80 func NewRendezvousHash(hashFactory HashFactory, scoreFunc UIntToFloat) *RendezvousHash { 81 rh := &RendezvousHash{ 82 Hash: hashFactory, 83 ScoreFunc: scoreFunc, 84 } 85 hashLen := len(hashFactory().Sum(nil)) 86 87 rh.MaxHashValue = make([]byte, hashLen) 88 for i := 0; i < hashLen; i++ { 89 rh.MaxHashValue[i] = 0xFF 90 } 91 return rh 92 } 93 94 // UInt64ToFloat64 Converts a uniformly random 64-bit integer 95 // to "uniformly" random floating point number on interval [0, 1) 96 // The approach is heavily based on this material 97 // https://crypto.stackexchange.com/questions/31657/uniformly-distributed-secure-floating-point-numbers-in-0-1 98 // and this https://en.wikipedia.org/wiki/Rendezvous_hashing 99 func UInt64ToFloat64(bytesUInt []byte, maxValue []byte, hasher hash.Hash) float64 { 100 maxUInt := binary.BigEndian.Uint64(maxValue) 101 fiftyThreeOnes := uint64(maxUInt >> (64 - 53)) 102 fiftyThreeZeros := float64(1 << 53) 103 u64val := binary.BigEndian.Uint64(bytesUInt) 104 105 // val & 0xFFF000000000000 == 0 need to be handled differently 106 // as it will result in zeros: something that score 107 // function cannot survive. So there are 2^11 keys like that 108 // need to be re-hashed one more time. That will introduce a tiny bias 109 // in hashing key space distribution that we can live with 110 val := u64val & fiftyThreeOnes 111 if val == 0 && hasher != nil { 112 hasher.Reset() 113 hasher.Write(bytesUInt) 114 115 val = binary.BigEndian.Uint64(hasher.Sum(nil)) & fiftyThreeOnes 116 } 117 return float64(val) / fiftyThreeZeros 118 } 119 120 // BigIntToFloat64 converts BigInt to float64. 121 func BigIntToFloat64(bytesUInt []byte, maxValue []byte, hasher hash.Hash) float64 { 122 maxHashFloat := new(big.Float) 123 maxHashFloat.SetInt(new(big.Int).SetBytes(maxValue)) 124 125 hashInt := new(big.Int) 126 // checksumHash is being consumed as a big endian int. 127 hashInt.SetBytes(bytesUInt) 128 129 hashFloat := new(big.Float).SetInt(hashInt) 130 131 // float64's precision, we would not need more then that 132 // as we eventually cast everything to float64 133 // Big Float will use greater presicions in operations 134 hashFloat.SetPrec(53) 135 136 fl64value, _ := hashFloat.Quo(hashFloat, maxHashFloat).Float64() 137 138 // I don't expact that to happen, the accuracy of 256 bits division 139 // arithmetic is well within float'64 theoretical minimum for a single 140 // division and we always divide with a non zero constant. 141 if hashFloat.IsInf() { 142 panic("Float64.Quo operation has failed") 143 } 144 145 return fl64value 146 } 147 148 // Score computes score of a key for this node in accordance to Weighted 149 // Rendezvous Hash. It's using big golang float key as hexidemical encoding of 150 // a byte array. 151 func (rhn *RendezvousHashNode) Score(key string) float64 { 152 hasher := rhn.RHash.Hash() 153 154 keyBytes, err := hex.DecodeString(key) 155 if err != nil { 156 return math.NaN() 157 } 158 159 hashBytes := make([]byte, len(keyBytes)+len(rhn.Label)) 160 // Add node's seed to a key string 161 hashBytes = append(keyBytes, []byte(rhn.Label)...) 162 163 hasher.Write(hashBytes) 164 score := rhn.RHash.ScoreFunc(hasher.Sum(nil), rhn.RHash.MaxHashValue, hasher) 165 166 // for more information on this math please look at this paper: 167 // http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.414.9353&rep=rep1&type=pdf 168 // and this presentation slides: 169 // http://www.snia.org/sites/default/files/SDC15_presentations/dist_sys/Jason_Resch_New_Consistent_Hashings_Rev.pdf 170 171 return -float64(rhn.Weight) / math.Log(score) 172 } 173 174 // AddNode adds a node to a hashing ring. 175 func (rh *RendezvousHash) AddNode(seed string, weight int) { 176 node := &RendezvousHashNode{ 177 RHash: rh, 178 Label: seed, 179 Weight: weight, 180 } 181 rh.Nodes = append(rh.Nodes, node) 182 } 183 184 // RemoveNode removes a node from a hashing ring. 185 func (rh *RendezvousHash) RemoveNode(name string) { 186 for i, node := range rh.Nodes { 187 if node.Label == name { 188 rh.Nodes = append(rh.Nodes[:i], rh.Nodes[i+1:]...) 189 break 190 } 191 } 192 } 193 194 // GetNode gets a node from a hashing ring and its index in array. 195 func (rh *RendezvousHash) GetNode(name string) (*RendezvousHashNode, int) { 196 for index, node := range rh.Nodes { 197 if node.Label == name { 198 return node, index 199 } 200 } 201 return nil, -1 202 } 203 204 // GetOrderedNodes gets an ordered set of N nodes for a key where 205 // score(Node1) > score(N2) > ... score(NodeN). 206 // Number of returned nodes = min(N, len(nodes)). 207 func (rh *RendezvousHash) GetOrderedNodes(key string, n int) []*RendezvousHashNode { 208 nodes := make([]*RendezvousHashNode, len(rh.Nodes)) 209 copy(nodes, rh.Nodes) 210 211 sort.Sort(sort.Reverse(&RendezvousNodesByScore{key: key, nodes: nodes})) 212 213 if n >= len(nodes) { 214 return nodes 215 } 216 return nodes[:n] 217 }