github.com/aaabigfish/gopkg@v1.1.0/mq/nsq/balancer.go (about)

     1  package nsq
     2  
     3  import (
     4  	"hash"
     5  	"hash/crc32"
     6  	"hash/fnv"
     7  	"math/rand"
     8  	"sync"
     9  	"sync/atomic"
    10  )
    11  
    12  // The Balancer interface provides an abstraction of the message distribution
    13  // logic used by Writer instances to route messages to the partitions available
    14  // on a nsq cluster.
    15  //
    16  // Balancers must be safe to use concurrently from multiple goroutines.
    17  type Balancer interface {
    18  	// Balance receives a message and a set of available partitions and
    19  	// returns the partition number that the message should be routed to.
    20  	//
    21  	// An application should refrain from using a balancer to manage multiple
    22  	// sets of partitions (from different topics for examples), use one balancer
    23  	// instance for each partition set, so the balancer can detect when the
    24  	// partitions change and assume that the nsq topic has been rebalanced.
    25  	Balance(key []byte, partitions ...int) (partition int)
    26  }
    27  
    28  // BalancerFunc is an implementation of the Balancer interface that makes it
    29  // possible to use regular functions to distribute messages across partitions.
    30  type BalancerFunc func([]byte, ...int) int
    31  
    32  // Balance calls f, satisfies the Balancer interface.
    33  func (f BalancerFunc) Balance(key []byte, partitions ...int) int {
    34  	return f(key, partitions...)
    35  }
    36  
    37  // RoundRobin is an Balancer implementation that equally distributes messages
    38  // across all available partitions.
    39  type RoundRobin struct {
    40  	// Use a 32 bits integer so RoundRobin values don't need to be aligned to
    41  	// apply atomic increments.
    42  	offset uint32
    43  }
    44  
    45  // Balance satisfies the Balancer interface.
    46  func (rr *RoundRobin) Balance(key []byte, partitions ...int) int {
    47  	return rr.balance(partitions)
    48  }
    49  
    50  func (rr *RoundRobin) balance(partitions []int) int {
    51  	length := uint32(len(partitions))
    52  	offset := atomic.AddUint32(&rr.offset, 1) - 1
    53  	return partitions[offset%length]
    54  }
    55  
    56  var (
    57  	fnv1aPool = &sync.Pool{
    58  		New: func() interface{} {
    59  			return fnv.New32a()
    60  		},
    61  	}
    62  )
    63  
    64  // Hash is a Balancer that uses the provided hash function to determine which
    65  // partition to route messages to.  This ensures that messages with the same key
    66  // are routed to the same partition.
    67  //
    68  // The logic to calculate the partition is:
    69  //
    70  //	hasher.Sum32() % len(partitions) => partition
    71  //
    72  // By default, Hash uses the FNV-1a algorithm.  This is the same algorithm used
    73  // by the Sarama Producer and ensures that messages produced by nsq-go will
    74  // be delivered to the same topics that the Sarama producer would be delivered to.
    75  type Hash struct {
    76  	rr     RoundRobin
    77  	Hasher hash.Hash32
    78  
    79  	// lock protects Hasher while calculating the hash code.  It is assumed that
    80  	// the Hasher field is read-only once the Balancer is created, so as a
    81  	// performance optimization, reads of the field are not protected.
    82  	lock sync.Mutex
    83  }
    84  
    85  func (h *Hash) Balance(key []byte, partitions ...int) int {
    86  	if key == nil {
    87  		return h.rr.Balance(key, partitions...)
    88  	}
    89  
    90  	hasher := h.Hasher
    91  	if hasher != nil {
    92  		h.lock.Lock()
    93  		defer h.lock.Unlock()
    94  	} else {
    95  		hasher = fnv1aPool.Get().(hash.Hash32)
    96  		defer fnv1aPool.Put(hasher)
    97  	}
    98  
    99  	hasher.Reset()
   100  	if _, err := hasher.Write(key); err != nil {
   101  		panic(err)
   102  	}
   103  
   104  	// uses same algorithm that Sarama's hashPartitioner uses
   105  	// note the type conversions here.  if the uint32 hash code is not cast to
   106  	// an int32, we do not get the same result as sarama.
   107  	partition := int32(hasher.Sum32()) % int32(len(partitions))
   108  	if partition < 0 {
   109  		partition = -partition
   110  	}
   111  
   112  	return int(partition)
   113  }
   114  
   115  // ReferenceHash is a Balancer that uses the provided hash function to determine which
   116  // partition to route messages to.  This ensures that messages with the same key
   117  // are routed to the same partition.
   118  //
   119  // The logic to calculate the partition is:
   120  //
   121  //	(int32(hasher.Sum32()) & 0x7fffffff) % len(partitions) => partition
   122  //
   123  // By default, ReferenceHash uses the FNV-1a algorithm. This is the same algorithm as
   124  // the Sarama NewReferenceHashPartitioner and ensures that messages produced by nsq-go will
   125  // be delivered to the same topics that the Sarama producer would be delivered to.
   126  type ReferenceHash struct {
   127  	rr     randomBalancer
   128  	Hasher hash.Hash32
   129  
   130  	// lock protects Hasher while calculating the hash code.  It is assumed that
   131  	// the Hasher field is read-only once the Balancer is created, so as a
   132  	// performance optimization, reads of the field are not protected.
   133  	lock sync.Mutex
   134  }
   135  
   136  func (h *ReferenceHash) Balance(key []byte, partitions ...int) int {
   137  	if key == nil {
   138  		return h.rr.Balance(key, partitions...)
   139  	}
   140  
   141  	hasher := h.Hasher
   142  	if hasher != nil {
   143  		h.lock.Lock()
   144  		defer h.lock.Unlock()
   145  	} else {
   146  		hasher = fnv1aPool.Get().(hash.Hash32)
   147  		defer fnv1aPool.Put(hasher)
   148  	}
   149  
   150  	hasher.Reset()
   151  	if _, err := hasher.Write(key); err != nil {
   152  		panic(err)
   153  	}
   154  
   155  	// uses the same algorithm as the Sarama's referenceHashPartitioner.
   156  	// note the type conversions here. if the uint32 hash code is not cast to
   157  	// an int32, we do not get the same result as sarama.
   158  	partition := (int32(hasher.Sum32()) & 0x7fffffff) % int32(len(partitions))
   159  	return int(partition)
   160  }
   161  
   162  type randomBalancer struct {
   163  	mock int // mocked return value, used for testing
   164  }
   165  
   166  func (b randomBalancer) Balance(key []byte, partitions ...int) (partition int) {
   167  	if b.mock != 0 {
   168  		return b.mock
   169  	}
   170  	return partitions[rand.Int()%len(partitions)]
   171  }
   172  
   173  // CRC32Balancer is a Balancer that uses the CRC32 hash function to determine
   174  // which partition to route messages to.  This ensures that messages with the
   175  // same key are routed to the same partition.  This balancer is compatible with
   176  // the built-in hash partitioners in librdnsq and the language bindings that
   177  // are built on top of it, including the
   178  // github.com/confluentinc/confluent-kafka-go Go package.
   179  //
   180  // With the Consistent field false (default), this partitioner is equivalent to
   181  // the "consistent_random" setting in librdkafka.  When Consistent is true, this
   182  // partitioner is equivalent to the "consistent" setting.  The latter will hash
   183  // empty or nil keys into the same partition.
   184  //
   185  // Unless you are absolutely certain that all your messages will have keys, it's
   186  // best to leave the Consistent flag off.  Otherwise, you run the risk of
   187  // creating a very hot partition.
   188  type CRC32Balancer struct {
   189  	Consistent bool
   190  	random     randomBalancer
   191  }
   192  
   193  func (b CRC32Balancer) Balance(key []byte, partitions ...int) (partition int) {
   194  	// NOTE: the crc32 balancers in librdkafka don't differentiate between nil
   195  	//       and empty keys.  both cases are treated as unset.
   196  	if len(key) == 0 && !b.Consistent {
   197  		return b.random.Balance(key, partitions...)
   198  	}
   199  
   200  	idx := crc32.ChecksumIEEE(key) % uint32(len(partitions))
   201  	return partitions[idx]
   202  }
   203  
   204  // Murmur2Balancer is a Balancer that uses the Murmur2 hash function to
   205  // determine which partition to route messages to.  This ensures that messages
   206  // with the same key are routed to the same partition.  This balancer is
   207  // compatible with the partitioner used by the Java library and by librdkafka's
   208  // "murmur2" and "murmur2_random" partitioners.
   209  //
   210  // With the Consistent field false (default), this partitioner is equivalent to
   211  // the "murmur2_random" setting in librdkafka.  When Consistent is true, this
   212  // partitioner is equivalent to the "murmur2" setting.  The latter will hash
   213  // nil keys into the same partition.  Empty, non-nil keys are always hashed to
   214  // the same partition regardless of configuration.
   215  //
   216  // Unless you are absolutely certain that all your messages will have keys, it's
   217  // best to leave the Consistent flag off.  Otherwise, you run the risk of
   218  // creating a very hot partition.
   219  //
   220  // Note that the librdkafka documentation states that the "murmur2_random" is
   221  // functionally equivalent to the default Java partitioner.  That's because the
   222  // Java partitioner will use a round robin balancer instead of random on nil
   223  // keys.  We choose librdkafka's implementation because it arguably has a larger
   224  // install base.
   225  type Murmur2Balancer struct {
   226  	Consistent bool
   227  	random     randomBalancer
   228  }
   229  
   230  func (b Murmur2Balancer) Balance(key []byte, partitions ...int) (partition int) {
   231  	// NOTE: the murmur2 balancers in java and librdkafka treat a nil key as
   232  	//       non-existent while treating an empty slice as a defined value.
   233  	if key == nil && !b.Consistent {
   234  		return b.random.Balance(key, partitions...)
   235  	}
   236  
   237  	idx := (murmur2(key) & 0x7fffffff) % uint32(len(partitions))
   238  	return partitions[idx]
   239  }
   240  
   241  // Go port of the Java library's murmur2 function.
   242  // https://github.com/apache/kafka/blob/1.0/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L353
   243  func murmur2(data []byte) uint32 {
   244  	length := len(data)
   245  	const (
   246  		seed uint32 = 0x9747b28c
   247  		// 'm' and 'r' are mixing constants generated offline.
   248  		// They're not really 'magic', they just happen to work well.
   249  		m = 0x5bd1e995
   250  		r = 24
   251  	)
   252  
   253  	// Initialize the hash to a random value
   254  	h := seed ^ uint32(length)
   255  	length4 := length / 4
   256  
   257  	for i := 0; i < length4; i++ {
   258  		i4 := i * 4
   259  		k := (uint32(data[i4+0]) & 0xff) + ((uint32(data[i4+1]) & 0xff) << 8) + ((uint32(data[i4+2]) & 0xff) << 16) + ((uint32(data[i4+3]) & 0xff) << 24)
   260  		k *= m
   261  		k ^= k >> r
   262  		k *= m
   263  		h *= m
   264  		h ^= k
   265  	}
   266  
   267  	// Handle the last few bytes of the input array
   268  	extra := length % 4
   269  	if extra >= 3 {
   270  		h ^= (uint32(data[(length & ^3)+2]) & 0xff) << 16
   271  	}
   272  	if extra >= 2 {
   273  		h ^= (uint32(data[(length & ^3)+1]) & 0xff) << 8
   274  	}
   275  	if extra >= 1 {
   276  		h ^= uint32(data[length & ^3]) & 0xff
   277  		h *= m
   278  	}
   279  
   280  	h ^= h >> 13
   281  	h *= m
   282  	h ^= h >> 15
   283  
   284  	return h
   285  }
   286  
   287  var nodesCache atomic.Value
   288  
   289  func loadCachedNodes(numNodes int) []int {
   290  	nodes, ok := nodesCache.Load().([]int)
   291  	if ok && len(nodes) >= numNodes {
   292  		return nodes[:numNodes]
   293  	}
   294  
   295  	const alignment = 128
   296  	n := ((numNodes / alignment) + 1) * alignment
   297  
   298  	nodes = make([]int, n)
   299  	for i := range nodes {
   300  		nodes[i] = i
   301  	}
   302  
   303  	nodesCache.Store(nodes)
   304  	return nodes[:numNodes]
   305  }