github.com/segmentio/kafka-go@v0.4.48-0.20240318174348-3f6244eb34fd/balancer_test.go (about)

     1  package kafka
     2  
     3  import (
     4  	"fmt"
     5  	"hash"
     6  	"hash/crc32"
     7  	"testing"
     8  )
     9  
    10  func TestHashBalancer(t *testing.T) {
    11  	testCases := map[string]struct {
    12  		Key        []byte
    13  		Hasher     hash.Hash32
    14  		Partitions []int
    15  		Partition  int
    16  	}{
    17  		"nil": {
    18  			Key:        nil,
    19  			Partitions: []int{0, 1, 2},
    20  			Partition:  0,
    21  		},
    22  		"partition-0": {
    23  			Key:        []byte("blah"),
    24  			Partitions: []int{0, 1},
    25  			Partition:  0,
    26  		},
    27  		"partition-1": {
    28  			Key:        []byte("blah"),
    29  			Partitions: []int{0, 1, 2},
    30  			Partition:  1,
    31  		},
    32  		"partition-2": {
    33  			Key:        []byte("boop"),
    34  			Partitions: []int{0, 1, 2},
    35  			Partition:  2,
    36  		},
    37  		"custom hash": {
    38  			Key:        []byte("boop"),
    39  			Hasher:     crc32.NewIEEE(),
    40  			Partitions: []int{0, 1, 2},
    41  			Partition:  1,
    42  		},
    43  		// in a previous version, this test would select a different partition
    44  		// than sarama's hash partitioner.
    45  		"hash code with MSB set": {
    46  			Key:        []byte("20"),
    47  			Partitions: []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
    48  			Partition:  1,
    49  		},
    50  	}
    51  
    52  	for label, test := range testCases {
    53  		t.Run(label, func(t *testing.T) {
    54  			msg := Message{Key: test.Key}
    55  			h := Hash{
    56  				Hasher: test.Hasher,
    57  			}
    58  			partition := h.Balance(msg, test.Partitions...)
    59  			if partition != test.Partition {
    60  				t.Errorf("expected %v; got %v", test.Partition, partition)
    61  			}
    62  		})
    63  	}
    64  }
    65  
    66  func TestReferenceHashBalancer(t *testing.T) {
    67  	testCases := map[string]struct {
    68  		Key               []byte
    69  		Hasher            hash.Hash32
    70  		Partitions        []int
    71  		Partition         int
    72  		RndBalancerResult int
    73  	}{
    74  		"nil": {
    75  			Key:               nil, // nil key means random partition
    76  			Partitions:        []int{0, 1, 2},
    77  			Partition:         123,
    78  			RndBalancerResult: 123,
    79  		},
    80  		"partition-0": {
    81  			Key:        []byte("blah"),
    82  			Partitions: []int{0, 1},
    83  			Partition:  0,
    84  		},
    85  		"partition-1": {
    86  			Key:        []byte("blah"),
    87  			Partitions: []int{0, 1, 2},
    88  			Partition:  1,
    89  		},
    90  		"partition-2": {
    91  			Key:        []byte("castle"),
    92  			Partitions: []int{0, 1, 2},
    93  			Partition:  2,
    94  		},
    95  		"custom hash": {
    96  			Key:        []byte("boop"),
    97  			Hasher:     crc32.NewIEEE(),
    98  			Partitions: []int{0, 1, 2},
    99  			Partition:  1,
   100  		},
   101  		"hash code with MSB set": {
   102  			Key:        []byte("20"),
   103  			Partitions: []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
   104  			Partition:  15,
   105  		},
   106  	}
   107  
   108  	for label, test := range testCases {
   109  		t.Run(label, func(t *testing.T) {
   110  			var rr randomBalancer
   111  			if test.Key == nil {
   112  				rr.mock = test.RndBalancerResult
   113  			}
   114  
   115  			msg := Message{Key: test.Key}
   116  			h := ReferenceHash{Hasher: test.Hasher, rr: rr}
   117  			partition := h.Balance(msg, test.Partitions...)
   118  			if partition != test.Partition {
   119  				t.Errorf("expected %v; got %v", test.Partition, partition)
   120  			}
   121  		})
   122  	}
   123  }
   124  
   125  func TestCRC32Balancer(t *testing.T) {
   126  	// These tests are taken from the default "consistent_random" partitioner from
   127  	// https://github.com/edenhill/librdkafka/blob/master/tests/0048-partitioner.c
   128  	partitionCount := 17
   129  	var partitions []int
   130  	for i := 0; i < partitionCount; i++ {
   131  		partitions = append(partitions, i*i)
   132  	}
   133  
   134  	testCases := map[string]struct {
   135  		Key        []byte
   136  		Partitions []int
   137  		Partition  int
   138  	}{
   139  		"nil": {
   140  			Key:        nil,
   141  			Partitions: partitions,
   142  			Partition:  -1,
   143  		},
   144  		"empty": {
   145  			Key:        []byte{},
   146  			Partitions: partitions,
   147  			Partition:  -1,
   148  		},
   149  		"unaligned": {
   150  			Key:        []byte("23456"),
   151  			Partitions: partitions,
   152  			Partition:  partitions[0xb1b451d7%partitionCount],
   153  		},
   154  		"long key": {
   155  			Key:        []byte("this is another string with more length to it perhaps"),
   156  			Partitions: partitions,
   157  			Partition:  partitions[0xb0150df7%partitionCount],
   158  		},
   159  		"short key": {
   160  			Key:        []byte("hejsan"),
   161  			Partitions: partitions,
   162  			Partition:  partitions[0xd077037e%partitionCount],
   163  		},
   164  	}
   165  
   166  	t.Run("default", func(t *testing.T) {
   167  		for label, test := range testCases {
   168  			t.Run(label, func(t *testing.T) {
   169  				b := CRC32Balancer{}
   170  				b.random.mock = -1
   171  
   172  				msg := Message{Key: test.Key}
   173  				partition := b.Balance(msg, test.Partitions...)
   174  				if partition != test.Partition {
   175  					t.Errorf("expected %v; got %v", test.Partition, partition)
   176  				}
   177  			})
   178  		}
   179  	})
   180  
   181  	t.Run("consistent", func(t *testing.T) {
   182  		b := CRC32Balancer{Consistent: true}
   183  		b.random.mock = -1
   184  
   185  		p := b.Balance(Message{}, partitions...)
   186  		if p < 0 {
   187  			t.Fatal("should not have gotten a random partition")
   188  		}
   189  		for i := 0; i < 10; i++ {
   190  			if p != b.Balance(Message{}, partitions...) {
   191  				t.Fatal("nil key should always hash consistently")
   192  			}
   193  			if p != b.Balance(Message{Key: []byte{}}, partitions...) {
   194  				t.Fatal("empty key should always hash consistently and have same result as nil key")
   195  			}
   196  		}
   197  	})
   198  }
   199  
   200  func TestMurmur2(t *testing.T) {
   201  	// These tests are taken from the "murmur2" implementation from
   202  	// https://github.com/edenhill/librdkafka/blob/master/src/rdmurmur2.c
   203  	testCases := []struct {
   204  		Key               []byte
   205  		JavaMurmur2Result uint32
   206  	}{
   207  		{Key: []byte("kafka"), JavaMurmur2Result: 0xd067cf64},
   208  		{Key: []byte("giberish123456789"), JavaMurmur2Result: 0x8f552b0c},
   209  		{Key: []byte("1234"), JavaMurmur2Result: 0x9fc97b14},
   210  		{Key: []byte("234"), JavaMurmur2Result: 0xe7c009ca},
   211  		{Key: []byte("34"), JavaMurmur2Result: 0x873930da},
   212  		{Key: []byte("4"), JavaMurmur2Result: 0x5a4b5ca1},
   213  		{Key: []byte("PreAmbleWillBeRemoved,ThePrePartThatIs"), JavaMurmur2Result: 0x78424f1c},
   214  		{Key: []byte("reAmbleWillBeRemoved,ThePrePartThatIs"), JavaMurmur2Result: 0x4a62b377},
   215  		{Key: []byte("eAmbleWillBeRemoved,ThePrePartThatIs"), JavaMurmur2Result: 0xe0e4e09e},
   216  		{Key: []byte("AmbleWillBeRemoved,ThePrePartThatIs"), JavaMurmur2Result: 0x62b8b43f},
   217  		{Key: []byte(""), JavaMurmur2Result: 0x106e08d9},
   218  		{Key: nil, JavaMurmur2Result: 0x106e08d9},
   219  	}
   220  
   221  	for _, test := range testCases {
   222  		t.Run(fmt.Sprintf("key:%s", test.Key), func(t *testing.T) {
   223  			got := murmur2(test.Key)
   224  			if got != test.JavaMurmur2Result {
   225  				t.Errorf("expected %v; got %v", test.JavaMurmur2Result, got)
   226  			}
   227  		})
   228  	}
   229  }
   230  
   231  func TestMurmur2Balancer(t *testing.T) {
   232  	// These tests are taken from the "murmur2_random" partitioner from
   233  	// https://github.com/edenhill/librdkafka/blob/master/tests/0048-partitioner.c
   234  	partitionCount := 17
   235  	librdkafkaPartitions := make([]int, partitionCount)
   236  	for i := 0; i < partitionCount; i++ {
   237  		librdkafkaPartitions[i] = i * i
   238  	}
   239  
   240  	// These tests are taken from the Murmur2Partitioner Python class from
   241  	// https://github.com/dpkp/kafka-python/blob/master/test/test_partitioner.py
   242  	pythonPartitions := make([]int, 1000)
   243  	for i := 0; i < 1000; i++ {
   244  		pythonPartitions[i] = i
   245  	}
   246  
   247  	testCases := map[string]struct {
   248  		Key        []byte
   249  		Partitions []int
   250  		Partition  int
   251  	}{
   252  		"librdkafka-nil": {
   253  			Key:        nil,
   254  			Partitions: librdkafkaPartitions,
   255  			Partition:  123,
   256  		},
   257  		"librdkafka-empty": {
   258  			Key:        []byte{},
   259  			Partitions: librdkafkaPartitions,
   260  			Partition:  librdkafkaPartitions[0x106e08d9%partitionCount],
   261  		},
   262  		"librdkafka-unaligned": {
   263  			Key:        []byte("23456"),
   264  			Partitions: librdkafkaPartitions,
   265  			Partition:  librdkafkaPartitions[0x058d780f%partitionCount],
   266  		},
   267  		"librdkafka-long key": {
   268  			Key:        []byte("this is another string with more length to it perhaps"),
   269  			Partitions: librdkafkaPartitions,
   270  			Partition:  librdkafkaPartitions[0x4f7703da%partitionCount],
   271  		},
   272  		"librdkafka-short key": {
   273  			Key:        []byte("hejsan"),
   274  			Partitions: librdkafkaPartitions,
   275  			Partition:  librdkafkaPartitions[0x5ec19395%partitionCount],
   276  		},
   277  		"python-empty": {
   278  			Key:        []byte(""),
   279  			Partitions: pythonPartitions,
   280  			Partition:  681,
   281  		},
   282  		"python-a": {
   283  			Key:        []byte("a"),
   284  			Partitions: pythonPartitions,
   285  			Partition:  524,
   286  		},
   287  		"python-ab": {
   288  			Key:        []byte("ab"),
   289  			Partitions: pythonPartitions,
   290  			Partition:  434,
   291  		},
   292  		"python-abc": {
   293  			Key:        []byte("abc"),
   294  			Partitions: pythonPartitions,
   295  			Partition:  107,
   296  		},
   297  		"python-123456789": {
   298  			Key:        []byte("123456789"),
   299  			Partitions: pythonPartitions,
   300  			Partition:  566,
   301  		},
   302  		"python-\x00 ": {
   303  			Key:        []byte{0, 32},
   304  			Partitions: pythonPartitions,
   305  			Partition:  742,
   306  		},
   307  	}
   308  
   309  	t.Run("default", func(t *testing.T) {
   310  		for label, test := range testCases {
   311  			t.Run(label, func(t *testing.T) {
   312  				b := Murmur2Balancer{}
   313  				b.random.mock = 123
   314  
   315  				msg := Message{Key: test.Key}
   316  				partition := b.Balance(msg, test.Partitions...)
   317  				if partition != test.Partition {
   318  					t.Errorf("expected %v; got %v", test.Partition, partition)
   319  				}
   320  			})
   321  		}
   322  	})
   323  
   324  	t.Run("consistent", func(t *testing.T) {
   325  		b := Murmur2Balancer{Consistent: true}
   326  		b.random.mock = -1
   327  
   328  		p := b.Balance(Message{}, librdkafkaPartitions...)
   329  		if p < 0 {
   330  			t.Fatal("should not have gotten a random partition")
   331  		}
   332  		for i := 0; i < 10; i++ {
   333  			if p != b.Balance(Message{}, librdkafkaPartitions...) {
   334  				t.Fatal("nil key should always hash consistently")
   335  			}
   336  		}
   337  	})
   338  }
   339  
   340  func TestLeastBytes(t *testing.T) {
   341  	testCases := map[string]struct {
   342  		Keys       [][]byte
   343  		Partitions [][]int
   344  		Partition  int
   345  	}{
   346  		"single message": {
   347  			Keys: [][]byte{
   348  				[]byte("key"),
   349  			},
   350  			Partitions: [][]int{
   351  				{0, 1, 2},
   352  			},
   353  			Partition: 0,
   354  		},
   355  		"multiple messages, no partition change": {
   356  			Keys: [][]byte{
   357  				[]byte("a"),
   358  				[]byte("ab"),
   359  				[]byte("abc"),
   360  				[]byte("abcd"),
   361  			},
   362  			Partitions: [][]int{
   363  				{0, 1, 2},
   364  				{0, 1, 2},
   365  				{0, 1, 2},
   366  				{0, 1, 2},
   367  			},
   368  			Partition: 0,
   369  		},
   370  		"partition gained": {
   371  			Keys: [][]byte{
   372  				[]byte("hello world 1"),
   373  				[]byte("hello world 2"),
   374  				[]byte("hello world 3"),
   375  			},
   376  			Partitions: [][]int{
   377  				{0, 1},
   378  				{0, 1},
   379  				{0, 1, 2},
   380  			},
   381  			Partition: 0,
   382  		},
   383  		"partition lost": {
   384  			Keys: [][]byte{
   385  				[]byte("hello world 1"),
   386  				[]byte("hello world 2"),
   387  				[]byte("hello world 3"),
   388  			},
   389  			Partitions: [][]int{
   390  				{0, 1, 2},
   391  				{0, 1, 2},
   392  				{0, 1},
   393  			},
   394  			Partition: 0,
   395  		},
   396  	}
   397  
   398  	for label, test := range testCases {
   399  		t.Run(label, func(t *testing.T) {
   400  			lb := &LeastBytes{}
   401  
   402  			var partition int
   403  			for i, key := range test.Keys {
   404  				msg := Message{Key: key}
   405  				partition = lb.Balance(msg, test.Partitions[i]...)
   406  			}
   407  
   408  			if partition != test.Partition {
   409  				t.Errorf("expected %v; got %v", test.Partition, partition)
   410  			}
   411  		})
   412  	}
   413  }
   414  
   415  func TestRoundRobin(t *testing.T) {
   416  	testCases := map[string]struct {
   417  		Partitions []int
   418  		ChunkSize  int
   419  	}{
   420  		"default - odd partition count": {
   421  			Partitions: []int{0, 1, 2, 3, 4, 5, 6},
   422  		},
   423  		"negative chunk size - odd partition count": {
   424  			Partitions: []int{0, 1, 2, 3, 4, 5, 6},
   425  			ChunkSize:  -1,
   426  		},
   427  		"0 chunk size - odd partition count": {
   428  			Partitions: []int{0, 1, 2, 3, 4, 5, 6},
   429  			ChunkSize:  0,
   430  		},
   431  		"5 chunk size - odd partition count": {
   432  			Partitions: []int{0, 1, 2, 3, 4, 5, 6},
   433  			ChunkSize:  5,
   434  		},
   435  		"12 chunk size - odd partition count": {
   436  			Partitions: []int{0, 1, 2, 3, 4, 5, 6},
   437  			ChunkSize:  12,
   438  		},
   439  		"default - even partition count": {
   440  			Partitions: []int{0, 1, 2, 3, 4, 5, 6, 7},
   441  		},
   442  		"negative chunk size - even partition count": {
   443  			Partitions: []int{0, 1, 2, 3, 4, 5, 6, 7},
   444  			ChunkSize:  -1,
   445  		},
   446  		"0 chunk size - even partition count": {
   447  			Partitions: []int{0, 1, 2, 3, 4, 5, 6, 7},
   448  			ChunkSize:  0,
   449  		},
   450  		"5 chunk size - even partition count": {
   451  			Partitions: []int{0, 1, 2, 3, 4, 5, 6, 7},
   452  			ChunkSize:  5,
   453  		},
   454  		"12 chunk size - even partition count": {
   455  			Partitions: []int{0, 1, 2, 3, 4, 5, 6, 7},
   456  			ChunkSize:  12,
   457  		},
   458  	}
   459  	for label, test := range testCases {
   460  		t.Run(label, func(t *testing.T) {
   461  			lb := &RoundRobin{ChunkSize: test.ChunkSize}
   462  			msg := Message{}
   463  			var partition int
   464  			var i int
   465  			expectedChunkSize := test.ChunkSize
   466  			if expectedChunkSize < 1 {
   467  				expectedChunkSize = 1
   468  			}
   469  			partitions := test.Partitions
   470  			for i = 0; i < 50; i++ {
   471  				partition = lb.Balance(msg, partitions...)
   472  				if partition != i/expectedChunkSize%len(partitions) {
   473  					t.Error("Returned partition", partition, "expecting", i/expectedChunkSize%len(partitions))
   474  				}
   475  			}
   476  		})
   477  	}
   478  }