github.com/blend/go-sdk@v1.20220411.3/consistenthash/consistent_hash.go (about)

     1  /*
     2  
     3  Copyright (c) 2022 - Present. Blend Labs, Inc. All rights reserved
     4  Use of this source code is governed by a MIT license that can be found in the LICENSE file.
     5  
     6  */
     7  
     8  package consistenthash
     9  
    10  import (
    11  	"encoding/json"
    12  	"fmt"
    13  	"sort"
    14  	"strings"
    15  	"sync"
    16  )
    17  
    18  const (
    19  	// DefaultReplicas is the default number of bucket virtual replicas.
    20  	DefaultReplicas = 16
    21  )
    22  
    23  var (
    24  	_ json.Marshaler = (*ConsistentHash)(nil)
    25  	_ fmt.Stringer   = (*ConsistentHash)(nil)
    26  )
    27  
    28  // New returns a new consistent hash.
    29  func New(opts ...Option) *ConsistentHash {
    30  	var ch ConsistentHash
    31  	for _, opt := range opts {
    32  		opt(&ch)
    33  	}
    34  	return &ch
    35  }
    36  
    37  // Option mutates a consistent hash.
    38  type Option func(*ConsistentHash)
    39  
    40  // OptBuckets adds buckets to the consistent hash.
    41  //
    42  // It is functionally equiavalent to looping over the buckets
    43  // and calling `AddBuckets(bucketsj...)` for it.
    44  func OptBuckets(buckets ...string) Option {
    45  	return func(ch *ConsistentHash) {
    46  		ch.AddBuckets(buckets...)
    47  	}
    48  }
    49  
    50  // OptReplicas sets the bucket virtual replica count.
    51  //
    52  // More virtual replicas can help with making item assignments
    53  // more uniform, but the tradeoff is every operation takes a little
    54  // longer as log2 of the number of buckets times the number of virtual replicas.
    55  //
    56  // If not provided, the default (16) is used.
    57  func OptReplicas(replicas int) Option {
    58  	return func(ch *ConsistentHash) { ch.replicas = replicas }
    59  }
    60  
    61  // OptHashFunction sets the hash function.
    62  //
    63  // The default hash function is `consistenthash.StableHash` which uses
    64  // a stable crc64 hash function to preserve ordering between process restarts.
    65  func OptHashFunction(hashFunction HashFunction) Option {
    66  	return func(ch *ConsistentHash) { ch.hashFunction = hashFunction }
    67  }
    68  
    69  // ConsistentHash creates hashed assignments for each bucket.
    70  type ConsistentHash struct {
    71  	mu sync.RWMutex
    72  
    73  	replicas     int
    74  	buckets      map[string]struct{}
    75  	hashFunction HashFunction
    76  	hashring     []HashedBucket
    77  }
    78  
    79  //
    80  // properties with defaults
    81  //
    82  
    83  // ReplicasOrDefault is the default number of bucket virtual replicas.
    84  func (ch *ConsistentHash) ReplicasOrDefault() int {
    85  	if ch.replicas > 0 {
    86  		return ch.replicas
    87  	}
    88  	return DefaultReplicas
    89  }
    90  
    91  // HashFunctionOrDefault returns the provided hash function or a default.
    92  func (ch *ConsistentHash) HashFunctionOrDefault() HashFunction {
    93  	if ch.hashFunction != nil {
    94  		return ch.hashFunction
    95  	}
    96  	return StableHash
    97  }
    98  
    99  //
   100  // Write methods
   101  //
   102  
   103  // AddBuckets adds a list of buckets to the consistent hash, and returns
   104  // a boolean indiciating if _any_ buckets were added.
   105  //
   106  // If any of the new buckets do not exist on the hash ring the
   107  // new bucket will be inserted `ReplicasOrDefault` number
   108  // of times into the internal hashring.
   109  //
   110  // If any of the new buckets already exist on the hash ring
   111  // no action is taken for that bucket.
   112  //
   113  // Calling `AddBuckets` is safe to do concurrently
   114  // and acquires a write lock on the consistent hash reference.
   115  func (ch *ConsistentHash) AddBuckets(newBuckets ...string) (ok bool) {
   116  	ch.mu.Lock()
   117  	defer ch.mu.Unlock()
   118  
   119  	if ch.buckets == nil {
   120  		ch.buckets = make(map[string]struct{})
   121  	}
   122  	for _, newBucket := range newBuckets {
   123  		if _, ok := ch.buckets[newBucket]; ok {
   124  			continue
   125  		}
   126  		ok = true
   127  		ch.buckets[newBucket] = struct{}{}
   128  		ch.insertUnsafe(newBucket)
   129  	}
   130  	return
   131  }
   132  
   133  // RemoveBucket removes a bucket from the consistent hash, and returns
   134  // a boolean indicating if the provided bucket was found.
   135  //
   136  // If the bucket exists on the hash ring, the bucket and its replicas are removed.
   137  //
   138  // If the bucket does not exist on the ring, no action is taken.
   139  //
   140  // Calling `RemoveBucket` is safe to do concurrently
   141  // and acquires a write lock on the consistent hash reference.
   142  func (ch *ConsistentHash) RemoveBucket(toRemove string) (ok bool) {
   143  	ch.mu.Lock()
   144  	defer ch.mu.Unlock()
   145  
   146  	if ch.buckets == nil {
   147  		return
   148  	}
   149  	if _, ok = ch.buckets[toRemove]; !ok {
   150  		return
   151  	}
   152  	delete(ch.buckets, toRemove)
   153  	for x := 0; x < ch.ReplicasOrDefault(); x++ {
   154  		index := ch.search(ch.bucketHashKey(toRemove, x))
   155  		ch.hashring = append(ch.hashring[:index], ch.hashring[index+1:]...)
   156  	}
   157  	return
   158  }
   159  
   160  //
   161  // Read methods
   162  //
   163  
   164  // Buckets returns the buckets.
   165  //
   166  // Calling `Buckets` is safe to do concurrently and acquires
   167  // a read lock on the consistent hash reference.
   168  func (ch *ConsistentHash) Buckets() (buckets []string) {
   169  	ch.mu.RLock()
   170  	defer ch.mu.RUnlock()
   171  
   172  	for bucket := range ch.buckets {
   173  		buckets = append(buckets, bucket)
   174  	}
   175  	sort.Strings(buckets)
   176  	return
   177  }
   178  
   179  // Assignment returns the bucket assignment for a given item.
   180  //
   181  // Calling `Assignment` is safe to do concurrently and acquires
   182  // a read lock on the consistent hash reference.
   183  func (ch *ConsistentHash) Assignment(item string) (bucket string) {
   184  	ch.mu.RLock()
   185  	defer ch.mu.RUnlock()
   186  
   187  	bucket = ch.assignmentUnsafe(item)
   188  	return
   189  }
   190  
   191  // IsAssigned returns if a given bucket is assigned a given item.
   192  //
   193  // Calling `IsAssigned` is safe to do concurrently and acquires
   194  // a read lock on the consistent hash reference.
   195  func (ch *ConsistentHash) IsAssigned(bucket, item string) (ok bool) {
   196  	ch.mu.RLock()
   197  	defer ch.mu.RUnlock()
   198  
   199  	ok = bucket == ch.assignmentUnsafe(item)
   200  	return
   201  }
   202  
   203  // Assignments returns the assignments for a given list of items organized
   204  // by the name of the bucket, and an array of the assigned items.
   205  //
   206  // Calling `Assignments` is safe to do concurrently and acquires
   207  // a read lock on the consistent hash reference.
   208  func (ch *ConsistentHash) Assignments(items ...string) map[string][]string {
   209  	ch.mu.RLock()
   210  	defer ch.mu.RUnlock()
   211  
   212  	output := make(map[string][]string)
   213  	for _, item := range items {
   214  		bucket := ch.assignmentUnsafe(item)
   215  		output[bucket] = append(output[bucket], item)
   216  	}
   217  	return output
   218  }
   219  
   220  // String returns a string form of the hash for debugging purposes.
   221  //
   222  // Calling `String` is safe to do concurrently and acquires
   223  // a read lock on the consistent hash reference.
   224  func (ch *ConsistentHash) String() string {
   225  	ch.mu.RLock()
   226  	defer ch.mu.RUnlock()
   227  
   228  	var output []string
   229  	for _, bucket := range ch.hashring {
   230  		output = append(output, fmt.Sprintf("%d:%s-%02d", bucket.Hashcode, bucket.Bucket, bucket.Replica))
   231  	}
   232  	return strings.Join(output, ", ")
   233  }
   234  
   235  // MarshalJSON marshals the consistent hash as json.
   236  //
   237  // The form of the returned json is the underlying []HashedBucket
   238  // and there is no corresponding `UnmarshalJSON` because
   239  // it is uncertain on the other end what the hashfunction is
   240  // because functions can't be json serialized.
   241  //
   242  // You should use MarshalJSON for communicating information
   243  // for debugging purposes only.
   244  //
   245  // Calling `MarshalJSON` is safe to do concurrently and acquires
   246  // a read lock on the consistent hash reference.
   247  func (ch *ConsistentHash) MarshalJSON() ([]byte, error) {
   248  	ch.mu.RLock()
   249  	defer ch.mu.RUnlock()
   250  
   251  	return json.Marshal(ch.hashring)
   252  }
   253  
   254  //
   255  // internal / unexported helpers
   256  //
   257  
   258  // assignmentUnsafe searches for the item's matching bucket based
   259  // on a binary search, and if the index returned is outside the
   260  // ring length, the first index (0) is returned to simulate wrapping around.
   261  func (ch *ConsistentHash) assignmentUnsafe(item string) (bucket string) {
   262  	index := ch.search(item)
   263  	if index >= len(ch.hashring) {
   264  		index = 0
   265  	}
   266  	bucket = ch.hashring[index].Bucket
   267  	return
   268  }
   269  
   270  // insert inserts a hashring bucket.
   271  //
   272  // insert uses an insertion sort such that the
   273  // resulting ring will remain sorted after insert.
   274  //
   275  // it will also insert `ReplicasOrDefault` copies of the bucket
   276  // to help distribute items across buckets more evenly.
   277  func (ch *ConsistentHash) insertUnsafe(bucket string) {
   278  	for x := 0; x < ch.ReplicasOrDefault(); x++ {
   279  		ch.hashring = InsertionSort(ch.hashring, HashedBucket{
   280  			Bucket:   bucket,
   281  			Replica:  x,
   282  			Hashcode: ch.hashcode(ch.bucketHashKey(bucket, x)),
   283  		})
   284  	}
   285  }
   286  
   287  // search does a binary search for the first hashring index whose
   288  // node hashcode is >= the hashcode of a given item.
   289  func (ch *ConsistentHash) search(item string) (index int) {
   290  	return sort.Search(len(ch.hashring), ch.searchFn(ch.hashcode(item)))
   291  }
   292  
   293  // searchFn returns a closure searching for a given hashcode.
   294  func (ch *ConsistentHash) searchFn(hashcode uint64) func(index int) bool {
   295  	return func(index int) bool {
   296  		return ch.hashring[index].Hashcode >= hashcode
   297  	}
   298  }
   299  
   300  // bucketHashKey formats a hash key for a given bucket virtual replica.
   301  func (ch *ConsistentHash) bucketHashKey(bucket string, index int) string {
   302  	return bucket + "|" + fmt.Sprintf("%02d", index)
   303  }
   304  
   305  // hashcode creates a hashcode for a given string
   306  func (ch *ConsistentHash) hashcode(item string) uint64 {
   307  	return ch.HashFunctionOrDefault()([]byte(item))
   308  }
   309  
   310  // HashedBucket is a bucket in the hashring
   311  // that holds the hashcode, the bucket name (as Bucket)
   312  // and the virtual replica index.
   313  type HashedBucket struct {
   314  	Hashcode uint64 `json:"hashcode"`
   315  	Bucket   string `json:"bucket"`
   316  	Replica  int    `json:"replica"`
   317  }