go.charczuk.com@v0.0.0-20240327042549-bc490516bd1a/sdk/consistenthash/consistent_hash.go (about)

     1  /*
     2  
     3  Copyright (c) 2023 - Present. Will Charczuk. All rights reserved.
     4  Use of this source code is governed by a MIT license that can be found in the LICENSE file at the root of the repository.
     5  
     6  */
     7  
     8  package consistenthash
     9  
    10  import (
    11  	"encoding/json"
    12  	"fmt"
    13  	"sort"
    14  	"strings"
    15  	"sync"
    16  )
    17  
    18  const (
    19  	// DefaultReplicas is the default number of bucket virtual replicas.
    20  	DefaultReplicas = 16
    21  )
    22  
    23  var (
    24  	_ json.Marshaler = (*ConsistentHash)(nil)
    25  	_ fmt.Stringer   = (*ConsistentHash)(nil)
    26  )
    27  
    28  // Options are the options for the consistent hash type.
    29  type Options struct {
    30  	Replicas     int
    31  	HashFunction HashFunction
    32  }
    33  
    34  // Option mutates options.
    35  type Option func(*Options)
    36  
    37  // OptReplicas sets the replicas on options.
    38  func OptReplicas(replicas int) Option {
    39  	return func(o *Options) {
    40  		o.Replicas = replicas
    41  	}
    42  }
    43  
    44  // OptReplicas sets the replicas on options.
    45  func OptHashFunction(hashFunction HashFunction) Option {
    46  	return func(o *Options) {
    47  		o.HashFunction = hashFunction
    48  	}
    49  }
    50  
    51  // New creates a new consistent hash instance.
    52  func New(opts ...Option) *ConsistentHash {
    53  	var options Options
    54  	for _, opt := range opts {
    55  		opt(&options)
    56  	}
    57  	return &ConsistentHash{
    58  		replicas:     options.Replicas,
    59  		hashFunction: options.HashFunction,
    60  	}
    61  }
    62  
    63  // ConsistentHash creates hashed assignments for each bucket.
    64  //
    65  // You _must_ use `New` to parameterize ConsistentHash beyond the
    66  // defaults for `replicas` and `hashFunction`.
    67  //
    68  // This is done because these parameters if changed after data has been added
    69  // will lead to inconsistent behavior.
    70  type ConsistentHash struct {
    71  	replicas     int
    72  	hashFunction HashFunction
    73  	mu           sync.RWMutex
    74  	buckets      map[string]struct{}
    75  	hashring     []HashedBucket
    76  }
    77  
    78  //
    79  // properties with defaults
    80  //
    81  
    82  // Replicas is the default number of bucket virtual replicas.
    83  func (ch *ConsistentHash) Replicas() int {
    84  	if ch.replicas > 0 {
    85  		return ch.replicas
    86  	}
    87  	return DefaultReplicas
    88  }
    89  
    90  // HashFunction returns the provided hash function or a default.
    91  func (ch *ConsistentHash) HashFunction() HashFunction {
    92  	if ch.hashFunction != nil {
    93  		return ch.hashFunction
    94  	}
    95  	return StableHash
    96  }
    97  
    98  //
    99  // Write methods
   100  //
   101  
   102  // AddBuckets adds a list of buckets to the consistent hash, and returns
   103  // a boolean indiciating if _any_ buckets were added.
   104  //
   105  // If any of the new buckets do not exist on the hash ring the
   106  // new bucket will be inserted `ReplicasOrDefault` number
   107  // of times into the internal hashring.
   108  //
   109  // If any of the new buckets already exist on the hash ring
   110  // no action is taken for that bucket (it's effectively skipped).
   111  //
   112  // Calling `AddBuckets` is safe to do concurrently
   113  // and acquires a write lock on the consistent hash reference.
   114  func (ch *ConsistentHash) AddBuckets(newBuckets ...string) (ok bool) {
   115  	ch.mu.Lock()
   116  	defer ch.mu.Unlock()
   117  
   118  	if ch.buckets == nil {
   119  		ch.buckets = make(map[string]struct{})
   120  	}
   121  	for _, newBucket := range newBuckets {
   122  		if _, ok := ch.buckets[newBucket]; ok {
   123  			continue
   124  		}
   125  		ok = true
   126  		ch.buckets[newBucket] = struct{}{}
   127  		ch.insertUnsafe(newBucket)
   128  	}
   129  	return
   130  }
   131  
   132  // RemoveBucket removes a bucket from the consistent hash, and returns
   133  // a boolean indicating if the provided bucket was found.
   134  //
   135  // If the bucket exists on the hash ring, the bucket and its replicas are removed.
   136  //
   137  // If the bucket does not exist on the ring, no action is taken.
   138  //
   139  // Calling `RemoveBucket` is safe to do concurrently
   140  // and acquires a write lock on the consistent hash reference.
   141  func (ch *ConsistentHash) RemoveBucket(toRemove string) (ok bool) {
   142  	ch.mu.Lock()
   143  	defer ch.mu.Unlock()
   144  
   145  	if ch.buckets == nil {
   146  		return
   147  	}
   148  	if _, ok = ch.buckets[toRemove]; !ok {
   149  		return
   150  	}
   151  	// delete the bucket entry
   152  	delete(ch.buckets, toRemove)
   153  
   154  	// delete all the replicas from the hash ring for the bucket (there can be many!)
   155  	for x := 0; x < ch.Replicas(); x++ {
   156  		index := ch.search(ch.bucketHashKey(toRemove, x))
   157  		// do slice things to pull it out of the ring.
   158  		ch.hashring = append(ch.hashring[:index], ch.hashring[index+1:]...)
   159  	}
   160  	return
   161  }
   162  
   163  //
   164  // Read methods
   165  //
   166  
   167  // Buckets returns the buckets.
   168  //
   169  // Calling `Buckets` is safe to do concurrently and acquires
   170  // a read lock on the consistent hash reference.
   171  func (ch *ConsistentHash) Buckets() (buckets []string) {
   172  	ch.mu.RLock()
   173  	defer ch.mu.RUnlock()
   174  
   175  	for bucket := range ch.buckets {
   176  		buckets = append(buckets, bucket)
   177  	}
   178  	sort.Strings(buckets)
   179  	return
   180  }
   181  
   182  // Assignment returns the bucket assignment for a given item.
   183  //
   184  // Calling `Assignment` is safe to do concurrently and acquires
   185  // a read lock on the consistent hash reference.
   186  func (ch *ConsistentHash) Assignment(item string) (bucket string) {
   187  	ch.mu.RLock()
   188  	defer ch.mu.RUnlock()
   189  
   190  	bucket = ch.assignmentUnsafe(item)
   191  	return
   192  }
   193  
   194  // IsAssigned returns if a given bucket is assigned a given item.
   195  //
   196  // Calling `IsAssigned` is safe to do concurrently and acquires
   197  // a read lock on the consistent hash reference.
   198  func (ch *ConsistentHash) IsAssigned(bucket, item string) (ok bool) {
   199  	ch.mu.RLock()
   200  	defer ch.mu.RUnlock()
   201  
   202  	ok = bucket == ch.assignmentUnsafe(item)
   203  	return
   204  }
   205  
   206  // Assignments returns the assignments for a given list of items organized
   207  // by the name of the bucket, and an array of the assigned items.
   208  //
   209  // Calling `Assignments` is safe to do concurrently and acquires
   210  // a read lock on the consistent hash reference.
   211  func (ch *ConsistentHash) Assignments(items ...string) map[string][]string {
   212  	ch.mu.RLock()
   213  	defer ch.mu.RUnlock()
   214  
   215  	output := make(map[string][]string)
   216  	for _, item := range items {
   217  		bucket := ch.assignmentUnsafe(item)
   218  		output[bucket] = append(output[bucket], item)
   219  	}
   220  	return output
   221  }
   222  
   223  // String returns a string form of the hash for debugging purposes.
   224  //
   225  // Calling `String` is safe to do concurrently and acquires
   226  // a read lock on the consistent hash reference.
   227  func (ch *ConsistentHash) String() string {
   228  	ch.mu.RLock()
   229  	defer ch.mu.RUnlock()
   230  
   231  	var output []string
   232  	for _, bucket := range ch.hashring {
   233  		output = append(output, fmt.Sprintf("%d:%s-%02d", bucket.Hashcode, bucket.Bucket, bucket.Replica))
   234  	}
   235  	return strings.Join(output, ", ")
   236  }
   237  
   238  // MarshalJSON marshals the consistent hash as json.
   239  //
   240  // The form of the returned json is the underlying []HashedBucket
   241  // and there is no corresponding `UnmarshalJSON` because
   242  // it is uncertain on the other end what the hashfunction is
   243  // because functions can't be json serialized.
   244  //
   245  // You should use MarshalJSON for communicating information
   246  // for debugging purposes only.
   247  //
   248  // Calling `MarshalJSON` is safe to do concurrently and acquires
   249  // a read lock on the consistent hash reference.
   250  func (ch *ConsistentHash) MarshalJSON() ([]byte, error) {
   251  	ch.mu.RLock()
   252  	defer ch.mu.RUnlock()
   253  
   254  	return json.Marshal(ch.hashring)
   255  }
   256  
   257  //
   258  // internal / unexported helpers
   259  //
   260  
   261  // assignmentUnsafe searches for the item's matching bucket based
   262  // on a binary search, and if the index returned is outside the
   263  // ring length, the first index (0) is returned to simulate wrapping around.
   264  func (ch *ConsistentHash) assignmentUnsafe(item string) (bucket string) {
   265  	index := ch.search(item)
   266  	if index >= len(ch.hashring) {
   267  		index = 0
   268  	}
   269  	bucket = ch.hashring[index].Bucket
   270  	return
   271  }
   272  
   273  // insert inserts a hashring bucket.
   274  //
   275  // insert uses an insertion sort such that the
   276  // resulting ring will remain sorted after insert.
   277  //
   278  // it will also insert `ReplicasOrDefault` copies of the bucket
   279  // to help distribute items across buckets more evenly.
   280  func (ch *ConsistentHash) insertUnsafe(bucket string) {
   281  	for x := 0; x < ch.Replicas(); x++ {
   282  		ch.insertionSort(HashedBucket{
   283  			Hashcode: ch.hashcode(ch.bucketHashKey(bucket, x)),
   284  			Bucket:   bucket,
   285  			Replica:  x,
   286  		})
   287  	}
   288  }
   289  
   290  // insertionSort inserts an bucket into the hashring by binary searching
   291  // for the index which would satisfy the overall "sorted" status of the ring.
   292  func (ch *ConsistentHash) insertionSort(item HashedBucket) {
   293  	destinationIndex := sort.Search(len(ch.hashring), func(index int) bool {
   294  		return ch.hashring[index].Hashcode >= item.Hashcode
   295  	})
   296  	// potentially grow the hashring to accommodate the new entry
   297  	ch.hashring = append(ch.hashring, HashedBucket{})
   298  	// move elements around the new entry index
   299  	copy(ch.hashring[destinationIndex+1:], ch.hashring[destinationIndex:])
   300  	// assign the destination index directly
   301  	ch.hashring[destinationIndex] = item
   302  }
   303  
   304  // search does a binary search for the first hashring index whose
   305  // node hashcode is >= the hashcode of a given item.
   306  func (ch *ConsistentHash) search(item string) (index int) {
   307  	index = sort.Search(len(ch.hashring), ch.searchFn(ch.hashcode(item)))
   308  	return
   309  }
   310  
   311  // searchFn returns a closure searching for a given hashcode.
   312  func (ch *ConsistentHash) searchFn(hashcode uint64) func(int) bool {
   313  	return func(index int) bool {
   314  		return ch.hashring[index].Hashcode >= hashcode
   315  	}
   316  }
   317  
   318  // bucketHashKey formats a hash key for a given bucket virtual replica.
   319  func (ch *ConsistentHash) bucketHashKey(bucket string, index int) string {
   320  	return bucket + "|" + fmt.Sprintf("%02d", index)
   321  }
   322  
   323  // hashcode creates a hashcode for a given string
   324  func (ch *ConsistentHash) hashcode(item string) uint64 {
   325  	return ch.HashFunction()([]byte(item))
   326  }
   327  
   328  // HashedBucket is a bucket in the hashring
   329  // that holds the hashcode, the bucket name (as Bucket)
   330  // and the virtual replica index.
   331  type HashedBucket struct {
   332  	Hashcode uint64 `json:"hashcode"`
   333  	Bucket   string `json:"bucket"`
   334  	Replica  int    `json:"replica"`
   335  }