github.com/blend/go-sdk@v1.20220411.3/consistenthash/consistent_hash.go (about) 1 /* 2 3 Copyright (c) 2022 - Present. Blend Labs, Inc. All rights reserved 4 Use of this source code is governed by a MIT license that can be found in the LICENSE file. 5 6 */ 7 8 package consistenthash 9 10 import ( 11 "encoding/json" 12 "fmt" 13 "sort" 14 "strings" 15 "sync" 16 ) 17 18 const ( 19 // DefaultReplicas is the default number of bucket virtual replicas. 20 DefaultReplicas = 16 21 ) 22 23 var ( 24 _ json.Marshaler = (*ConsistentHash)(nil) 25 _ fmt.Stringer = (*ConsistentHash)(nil) 26 ) 27 28 // New returns a new consistent hash. 29 func New(opts ...Option) *ConsistentHash { 30 var ch ConsistentHash 31 for _, opt := range opts { 32 opt(&ch) 33 } 34 return &ch 35 } 36 37 // Option mutates a consistent hash. 38 type Option func(*ConsistentHash) 39 40 // OptBuckets adds buckets to the consistent hash. 41 // 42 // It is functionally equiavalent to looping over the buckets 43 // and calling `AddBuckets(bucketsj...)` for it. 44 func OptBuckets(buckets ...string) Option { 45 return func(ch *ConsistentHash) { 46 ch.AddBuckets(buckets...) 47 } 48 } 49 50 // OptReplicas sets the bucket virtual replica count. 51 // 52 // More virtual replicas can help with making item assignments 53 // more uniform, but the tradeoff is every operation takes a little 54 // longer as log2 of the number of buckets times the number of virtual replicas. 55 // 56 // If not provided, the default (16) is used. 57 func OptReplicas(replicas int) Option { 58 return func(ch *ConsistentHash) { ch.replicas = replicas } 59 } 60 61 // OptHashFunction sets the hash function. 62 // 63 // The default hash function is `consistenthash.StableHash` which uses 64 // a stable crc64 hash function to preserve ordering between process restarts. 65 func OptHashFunction(hashFunction HashFunction) Option { 66 return func(ch *ConsistentHash) { ch.hashFunction = hashFunction } 67 } 68 69 // ConsistentHash creates hashed assignments for each bucket. 70 type ConsistentHash struct { 71 mu sync.RWMutex 72 73 replicas int 74 buckets map[string]struct{} 75 hashFunction HashFunction 76 hashring []HashedBucket 77 } 78 79 // 80 // properties with defaults 81 // 82 83 // ReplicasOrDefault is the default number of bucket virtual replicas. 84 func (ch *ConsistentHash) ReplicasOrDefault() int { 85 if ch.replicas > 0 { 86 return ch.replicas 87 } 88 return DefaultReplicas 89 } 90 91 // HashFunctionOrDefault returns the provided hash function or a default. 92 func (ch *ConsistentHash) HashFunctionOrDefault() HashFunction { 93 if ch.hashFunction != nil { 94 return ch.hashFunction 95 } 96 return StableHash 97 } 98 99 // 100 // Write methods 101 // 102 103 // AddBuckets adds a list of buckets to the consistent hash, and returns 104 // a boolean indiciating if _any_ buckets were added. 105 // 106 // If any of the new buckets do not exist on the hash ring the 107 // new bucket will be inserted `ReplicasOrDefault` number 108 // of times into the internal hashring. 109 // 110 // If any of the new buckets already exist on the hash ring 111 // no action is taken for that bucket. 112 // 113 // Calling `AddBuckets` is safe to do concurrently 114 // and acquires a write lock on the consistent hash reference. 115 func (ch *ConsistentHash) AddBuckets(newBuckets ...string) (ok bool) { 116 ch.mu.Lock() 117 defer ch.mu.Unlock() 118 119 if ch.buckets == nil { 120 ch.buckets = make(map[string]struct{}) 121 } 122 for _, newBucket := range newBuckets { 123 if _, ok := ch.buckets[newBucket]; ok { 124 continue 125 } 126 ok = true 127 ch.buckets[newBucket] = struct{}{} 128 ch.insertUnsafe(newBucket) 129 } 130 return 131 } 132 133 // RemoveBucket removes a bucket from the consistent hash, and returns 134 // a boolean indicating if the provided bucket was found. 135 // 136 // If the bucket exists on the hash ring, the bucket and its replicas are removed. 137 // 138 // If the bucket does not exist on the ring, no action is taken. 139 // 140 // Calling `RemoveBucket` is safe to do concurrently 141 // and acquires a write lock on the consistent hash reference. 142 func (ch *ConsistentHash) RemoveBucket(toRemove string) (ok bool) { 143 ch.mu.Lock() 144 defer ch.mu.Unlock() 145 146 if ch.buckets == nil { 147 return 148 } 149 if _, ok = ch.buckets[toRemove]; !ok { 150 return 151 } 152 delete(ch.buckets, toRemove) 153 for x := 0; x < ch.ReplicasOrDefault(); x++ { 154 index := ch.search(ch.bucketHashKey(toRemove, x)) 155 ch.hashring = append(ch.hashring[:index], ch.hashring[index+1:]...) 156 } 157 return 158 } 159 160 // 161 // Read methods 162 // 163 164 // Buckets returns the buckets. 165 // 166 // Calling `Buckets` is safe to do concurrently and acquires 167 // a read lock on the consistent hash reference. 168 func (ch *ConsistentHash) Buckets() (buckets []string) { 169 ch.mu.RLock() 170 defer ch.mu.RUnlock() 171 172 for bucket := range ch.buckets { 173 buckets = append(buckets, bucket) 174 } 175 sort.Strings(buckets) 176 return 177 } 178 179 // Assignment returns the bucket assignment for a given item. 180 // 181 // Calling `Assignment` is safe to do concurrently and acquires 182 // a read lock on the consistent hash reference. 183 func (ch *ConsistentHash) Assignment(item string) (bucket string) { 184 ch.mu.RLock() 185 defer ch.mu.RUnlock() 186 187 bucket = ch.assignmentUnsafe(item) 188 return 189 } 190 191 // IsAssigned returns if a given bucket is assigned a given item. 192 // 193 // Calling `IsAssigned` is safe to do concurrently and acquires 194 // a read lock on the consistent hash reference. 195 func (ch *ConsistentHash) IsAssigned(bucket, item string) (ok bool) { 196 ch.mu.RLock() 197 defer ch.mu.RUnlock() 198 199 ok = bucket == ch.assignmentUnsafe(item) 200 return 201 } 202 203 // Assignments returns the assignments for a given list of items organized 204 // by the name of the bucket, and an array of the assigned items. 205 // 206 // Calling `Assignments` is safe to do concurrently and acquires 207 // a read lock on the consistent hash reference. 208 func (ch *ConsistentHash) Assignments(items ...string) map[string][]string { 209 ch.mu.RLock() 210 defer ch.mu.RUnlock() 211 212 output := make(map[string][]string) 213 for _, item := range items { 214 bucket := ch.assignmentUnsafe(item) 215 output[bucket] = append(output[bucket], item) 216 } 217 return output 218 } 219 220 // String returns a string form of the hash for debugging purposes. 221 // 222 // Calling `String` is safe to do concurrently and acquires 223 // a read lock on the consistent hash reference. 224 func (ch *ConsistentHash) String() string { 225 ch.mu.RLock() 226 defer ch.mu.RUnlock() 227 228 var output []string 229 for _, bucket := range ch.hashring { 230 output = append(output, fmt.Sprintf("%d:%s-%02d", bucket.Hashcode, bucket.Bucket, bucket.Replica)) 231 } 232 return strings.Join(output, ", ") 233 } 234 235 // MarshalJSON marshals the consistent hash as json. 236 // 237 // The form of the returned json is the underlying []HashedBucket 238 // and there is no corresponding `UnmarshalJSON` because 239 // it is uncertain on the other end what the hashfunction is 240 // because functions can't be json serialized. 241 // 242 // You should use MarshalJSON for communicating information 243 // for debugging purposes only. 244 // 245 // Calling `MarshalJSON` is safe to do concurrently and acquires 246 // a read lock on the consistent hash reference. 247 func (ch *ConsistentHash) MarshalJSON() ([]byte, error) { 248 ch.mu.RLock() 249 defer ch.mu.RUnlock() 250 251 return json.Marshal(ch.hashring) 252 } 253 254 // 255 // internal / unexported helpers 256 // 257 258 // assignmentUnsafe searches for the item's matching bucket based 259 // on a binary search, and if the index returned is outside the 260 // ring length, the first index (0) is returned to simulate wrapping around. 261 func (ch *ConsistentHash) assignmentUnsafe(item string) (bucket string) { 262 index := ch.search(item) 263 if index >= len(ch.hashring) { 264 index = 0 265 } 266 bucket = ch.hashring[index].Bucket 267 return 268 } 269 270 // insert inserts a hashring bucket. 271 // 272 // insert uses an insertion sort such that the 273 // resulting ring will remain sorted after insert. 274 // 275 // it will also insert `ReplicasOrDefault` copies of the bucket 276 // to help distribute items across buckets more evenly. 277 func (ch *ConsistentHash) insertUnsafe(bucket string) { 278 for x := 0; x < ch.ReplicasOrDefault(); x++ { 279 ch.hashring = InsertionSort(ch.hashring, HashedBucket{ 280 Bucket: bucket, 281 Replica: x, 282 Hashcode: ch.hashcode(ch.bucketHashKey(bucket, x)), 283 }) 284 } 285 } 286 287 // search does a binary search for the first hashring index whose 288 // node hashcode is >= the hashcode of a given item. 289 func (ch *ConsistentHash) search(item string) (index int) { 290 return sort.Search(len(ch.hashring), ch.searchFn(ch.hashcode(item))) 291 } 292 293 // searchFn returns a closure searching for a given hashcode. 294 func (ch *ConsistentHash) searchFn(hashcode uint64) func(index int) bool { 295 return func(index int) bool { 296 return ch.hashring[index].Hashcode >= hashcode 297 } 298 } 299 300 // bucketHashKey formats a hash key for a given bucket virtual replica. 301 func (ch *ConsistentHash) bucketHashKey(bucket string, index int) string { 302 return bucket + "|" + fmt.Sprintf("%02d", index) 303 } 304 305 // hashcode creates a hashcode for a given string 306 func (ch *ConsistentHash) hashcode(item string) uint64 { 307 return ch.HashFunctionOrDefault()([]byte(item)) 308 } 309 310 // HashedBucket is a bucket in the hashring 311 // that holds the hashcode, the bucket name (as Bucket) 312 // and the virtual replica index. 313 type HashedBucket struct { 314 Hashcode uint64 `json:"hashcode"` 315 Bucket string `json:"bucket"` 316 Replica int `json:"replica"` 317 }