go.charczuk.com@v0.0.0-20240327042549-bc490516bd1a/sdk/consistenthash/consistent_hash.go (about) 1 /* 2 3 Copyright (c) 2023 - Present. Will Charczuk. All rights reserved. 4 Use of this source code is governed by a MIT license that can be found in the LICENSE file at the root of the repository. 5 6 */ 7 8 package consistenthash 9 10 import ( 11 "encoding/json" 12 "fmt" 13 "sort" 14 "strings" 15 "sync" 16 ) 17 18 const ( 19 // DefaultReplicas is the default number of bucket virtual replicas. 20 DefaultReplicas = 16 21 ) 22 23 var ( 24 _ json.Marshaler = (*ConsistentHash)(nil) 25 _ fmt.Stringer = (*ConsistentHash)(nil) 26 ) 27 28 // Options are the options for the consistent hash type. 29 type Options struct { 30 Replicas int 31 HashFunction HashFunction 32 } 33 34 // Option mutates options. 35 type Option func(*Options) 36 37 // OptReplicas sets the replicas on options. 38 func OptReplicas(replicas int) Option { 39 return func(o *Options) { 40 o.Replicas = replicas 41 } 42 } 43 44 // OptReplicas sets the replicas on options. 45 func OptHashFunction(hashFunction HashFunction) Option { 46 return func(o *Options) { 47 o.HashFunction = hashFunction 48 } 49 } 50 51 // New creates a new consistent hash instance. 52 func New(opts ...Option) *ConsistentHash { 53 var options Options 54 for _, opt := range opts { 55 opt(&options) 56 } 57 return &ConsistentHash{ 58 replicas: options.Replicas, 59 hashFunction: options.HashFunction, 60 } 61 } 62 63 // ConsistentHash creates hashed assignments for each bucket. 64 // 65 // You _must_ use `New` to parameterize ConsistentHash beyond the 66 // defaults for `replicas` and `hashFunction`. 67 // 68 // This is done because these parameters if changed after data has been added 69 // will lead to inconsistent behavior. 70 type ConsistentHash struct { 71 replicas int 72 hashFunction HashFunction 73 mu sync.RWMutex 74 buckets map[string]struct{} 75 hashring []HashedBucket 76 } 77 78 // 79 // properties with defaults 80 // 81 82 // Replicas is the default number of bucket virtual replicas. 83 func (ch *ConsistentHash) Replicas() int { 84 if ch.replicas > 0 { 85 return ch.replicas 86 } 87 return DefaultReplicas 88 } 89 90 // HashFunction returns the provided hash function or a default. 91 func (ch *ConsistentHash) HashFunction() HashFunction { 92 if ch.hashFunction != nil { 93 return ch.hashFunction 94 } 95 return StableHash 96 } 97 98 // 99 // Write methods 100 // 101 102 // AddBuckets adds a list of buckets to the consistent hash, and returns 103 // a boolean indiciating if _any_ buckets were added. 104 // 105 // If any of the new buckets do not exist on the hash ring the 106 // new bucket will be inserted `ReplicasOrDefault` number 107 // of times into the internal hashring. 108 // 109 // If any of the new buckets already exist on the hash ring 110 // no action is taken for that bucket (it's effectively skipped). 111 // 112 // Calling `AddBuckets` is safe to do concurrently 113 // and acquires a write lock on the consistent hash reference. 114 func (ch *ConsistentHash) AddBuckets(newBuckets ...string) (ok bool) { 115 ch.mu.Lock() 116 defer ch.mu.Unlock() 117 118 if ch.buckets == nil { 119 ch.buckets = make(map[string]struct{}) 120 } 121 for _, newBucket := range newBuckets { 122 if _, ok := ch.buckets[newBucket]; ok { 123 continue 124 } 125 ok = true 126 ch.buckets[newBucket] = struct{}{} 127 ch.insertUnsafe(newBucket) 128 } 129 return 130 } 131 132 // RemoveBucket removes a bucket from the consistent hash, and returns 133 // a boolean indicating if the provided bucket was found. 134 // 135 // If the bucket exists on the hash ring, the bucket and its replicas are removed. 136 // 137 // If the bucket does not exist on the ring, no action is taken. 138 // 139 // Calling `RemoveBucket` is safe to do concurrently 140 // and acquires a write lock on the consistent hash reference. 141 func (ch *ConsistentHash) RemoveBucket(toRemove string) (ok bool) { 142 ch.mu.Lock() 143 defer ch.mu.Unlock() 144 145 if ch.buckets == nil { 146 return 147 } 148 if _, ok = ch.buckets[toRemove]; !ok { 149 return 150 } 151 // delete the bucket entry 152 delete(ch.buckets, toRemove) 153 154 // delete all the replicas from the hash ring for the bucket (there can be many!) 155 for x := 0; x < ch.Replicas(); x++ { 156 index := ch.search(ch.bucketHashKey(toRemove, x)) 157 // do slice things to pull it out of the ring. 158 ch.hashring = append(ch.hashring[:index], ch.hashring[index+1:]...) 159 } 160 return 161 } 162 163 // 164 // Read methods 165 // 166 167 // Buckets returns the buckets. 168 // 169 // Calling `Buckets` is safe to do concurrently and acquires 170 // a read lock on the consistent hash reference. 171 func (ch *ConsistentHash) Buckets() (buckets []string) { 172 ch.mu.RLock() 173 defer ch.mu.RUnlock() 174 175 for bucket := range ch.buckets { 176 buckets = append(buckets, bucket) 177 } 178 sort.Strings(buckets) 179 return 180 } 181 182 // Assignment returns the bucket assignment for a given item. 183 // 184 // Calling `Assignment` is safe to do concurrently and acquires 185 // a read lock on the consistent hash reference. 186 func (ch *ConsistentHash) Assignment(item string) (bucket string) { 187 ch.mu.RLock() 188 defer ch.mu.RUnlock() 189 190 bucket = ch.assignmentUnsafe(item) 191 return 192 } 193 194 // IsAssigned returns if a given bucket is assigned a given item. 195 // 196 // Calling `IsAssigned` is safe to do concurrently and acquires 197 // a read lock on the consistent hash reference. 198 func (ch *ConsistentHash) IsAssigned(bucket, item string) (ok bool) { 199 ch.mu.RLock() 200 defer ch.mu.RUnlock() 201 202 ok = bucket == ch.assignmentUnsafe(item) 203 return 204 } 205 206 // Assignments returns the assignments for a given list of items organized 207 // by the name of the bucket, and an array of the assigned items. 208 // 209 // Calling `Assignments` is safe to do concurrently and acquires 210 // a read lock on the consistent hash reference. 211 func (ch *ConsistentHash) Assignments(items ...string) map[string][]string { 212 ch.mu.RLock() 213 defer ch.mu.RUnlock() 214 215 output := make(map[string][]string) 216 for _, item := range items { 217 bucket := ch.assignmentUnsafe(item) 218 output[bucket] = append(output[bucket], item) 219 } 220 return output 221 } 222 223 // String returns a string form of the hash for debugging purposes. 224 // 225 // Calling `String` is safe to do concurrently and acquires 226 // a read lock on the consistent hash reference. 227 func (ch *ConsistentHash) String() string { 228 ch.mu.RLock() 229 defer ch.mu.RUnlock() 230 231 var output []string 232 for _, bucket := range ch.hashring { 233 output = append(output, fmt.Sprintf("%d:%s-%02d", bucket.Hashcode, bucket.Bucket, bucket.Replica)) 234 } 235 return strings.Join(output, ", ") 236 } 237 238 // MarshalJSON marshals the consistent hash as json. 239 // 240 // The form of the returned json is the underlying []HashedBucket 241 // and there is no corresponding `UnmarshalJSON` because 242 // it is uncertain on the other end what the hashfunction is 243 // because functions can't be json serialized. 244 // 245 // You should use MarshalJSON for communicating information 246 // for debugging purposes only. 247 // 248 // Calling `MarshalJSON` is safe to do concurrently and acquires 249 // a read lock on the consistent hash reference. 250 func (ch *ConsistentHash) MarshalJSON() ([]byte, error) { 251 ch.mu.RLock() 252 defer ch.mu.RUnlock() 253 254 return json.Marshal(ch.hashring) 255 } 256 257 // 258 // internal / unexported helpers 259 // 260 261 // assignmentUnsafe searches for the item's matching bucket based 262 // on a binary search, and if the index returned is outside the 263 // ring length, the first index (0) is returned to simulate wrapping around. 264 func (ch *ConsistentHash) assignmentUnsafe(item string) (bucket string) { 265 index := ch.search(item) 266 if index >= len(ch.hashring) { 267 index = 0 268 } 269 bucket = ch.hashring[index].Bucket 270 return 271 } 272 273 // insert inserts a hashring bucket. 274 // 275 // insert uses an insertion sort such that the 276 // resulting ring will remain sorted after insert. 277 // 278 // it will also insert `ReplicasOrDefault` copies of the bucket 279 // to help distribute items across buckets more evenly. 280 func (ch *ConsistentHash) insertUnsafe(bucket string) { 281 for x := 0; x < ch.Replicas(); x++ { 282 ch.insertionSort(HashedBucket{ 283 Hashcode: ch.hashcode(ch.bucketHashKey(bucket, x)), 284 Bucket: bucket, 285 Replica: x, 286 }) 287 } 288 } 289 290 // insertionSort inserts an bucket into the hashring by binary searching 291 // for the index which would satisfy the overall "sorted" status of the ring. 292 func (ch *ConsistentHash) insertionSort(item HashedBucket) { 293 destinationIndex := sort.Search(len(ch.hashring), func(index int) bool { 294 return ch.hashring[index].Hashcode >= item.Hashcode 295 }) 296 // potentially grow the hashring to accommodate the new entry 297 ch.hashring = append(ch.hashring, HashedBucket{}) 298 // move elements around the new entry index 299 copy(ch.hashring[destinationIndex+1:], ch.hashring[destinationIndex:]) 300 // assign the destination index directly 301 ch.hashring[destinationIndex] = item 302 } 303 304 // search does a binary search for the first hashring index whose 305 // node hashcode is >= the hashcode of a given item. 306 func (ch *ConsistentHash) search(item string) (index int) { 307 index = sort.Search(len(ch.hashring), ch.searchFn(ch.hashcode(item))) 308 return 309 } 310 311 // searchFn returns a closure searching for a given hashcode. 312 func (ch *ConsistentHash) searchFn(hashcode uint64) func(int) bool { 313 return func(index int) bool { 314 return ch.hashring[index].Hashcode >= hashcode 315 } 316 } 317 318 // bucketHashKey formats a hash key for a given bucket virtual replica. 319 func (ch *ConsistentHash) bucketHashKey(bucket string, index int) string { 320 return bucket + "|" + fmt.Sprintf("%02d", index) 321 } 322 323 // hashcode creates a hashcode for a given string 324 func (ch *ConsistentHash) hashcode(item string) uint64 { 325 return ch.HashFunction()([]byte(item)) 326 } 327 328 // HashedBucket is a bucket in the hashring 329 // that holds the hashcode, the bucket name (as Bucket) 330 // and the virtual replica index. 331 type HashedBucket struct { 332 Hashcode uint64 `json:"hashcode"` 333 Bucket string `json:"bucket"` 334 Replica int `json:"replica"` 335 }