github.com/rudderlabs/rudder-go-kit@v0.30.0/kafkaclient/client_test.go (about)

     1  package client
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"path/filepath"
    10  	"strings"
    11  	"sync"
    12  	"sync/atomic"
    13  	"testing"
    14  	"time"
    15  
    16  	"github.com/ory/dockertest/v3"
    17  	dc "github.com/ory/dockertest/v3/docker"
    18  	"github.com/segmentio/kafka-go"
    19  	"github.com/stretchr/testify/require"
    20  
    21  	"github.com/rudderlabs/rudder-go-kit/kafkaclient/testutil"
    22  	dockerKafka "github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource/kafka"
    23  	"github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource/sshserver"
    24  )
    25  
    26  const (
    27  	defaultTestTimeout = 60 * time.Second
    28  )
    29  
    30  func TestClient_Ping(t *testing.T) {
    31  	pool, err := dockertest.NewPool("")
    32  	require.NoError(t, err)
    33  
    34  	kafkaContainer, err := dockerKafka.Setup(pool, t)
    35  	require.NoError(t, err)
    36  
    37  	kafkaHost := fmt.Sprintf("localhost:%s", kafkaContainer.Ports[0])
    38  	c, err := New("tcp", []string{"bad-host", kafkaHost}, Config{})
    39  	require.NoError(t, err)
    40  
    41  	ctx := context.Background()
    42  	require.NoError(t, c.Ping(ctx))
    43  
    44  	// shuffling hosts
    45  	c, err = New("tcp", []string{kafkaHost, "bad-host", kafkaHost}, Config{})
    46  	require.NoError(t, err)
    47  	require.NoError(t, c.Ping(ctx))
    48  
    49  	require.NoError(t, kafkaContainer.Destroy())
    50  	err = c.Ping(ctx)
    51  	require.Error(t, err)
    52  	require.Contains(t, err.Error(), "connection refused")
    53  }
    54  
    55  func TestProducerBatchConsumerGroup(t *testing.T) {
    56  	// Prepare cluster - Zookeeper + 3 Kafka brokers
    57  	// We need more than one broker, or we'll be stuck with a "GROUP_COORDINATOR_NOT_AVAILABLE" error
    58  	pool, err := dockertest.NewPool("")
    59  	require.NoError(t, err)
    60  
    61  	kafkaContainer, err := dockerKafka.Setup(pool, t,
    62  		dockerKafka.WithBrokers(3))
    63  	require.NoError(t, err)
    64  
    65  	addresses := make([]string, 0, len(kafkaContainer.Ports))
    66  	for i := 0; i < len(kafkaContainer.Ports); i++ {
    67  		addresses = append(addresses, fmt.Sprintf("localhost:%s", kafkaContainer.Ports[i]))
    68  	}
    69  	c, err := New("tcp", addresses, Config{ClientID: "some-client", DialTimeout: 5 * time.Second})
    70  	require.NoError(t, err)
    71  
    72  	var (
    73  		messagesWaitGroup   sync.WaitGroup
    74  		gracefulTermination sync.WaitGroup
    75  		c01Count, c02Count  int32
    76  		noOfMessages        = 50
    77  		ctx, cancel         = context.WithCancel(context.Background())
    78  		tc                  = testutil.NewWithDialer(c.dialer, c.network, c.addresses...)
    79  	)
    80  
    81  	t.Cleanup(gracefulTermination.Wait)
    82  	t.Cleanup(cancel)
    83  
    84  	// Check connectivity and try to create the desired topic until the brokers are up and running (max 30s)
    85  	require.NoError(t, c.Ping(ctx))
    86  	require.Eventually(t, func() bool {
    87  		err := tc.CreateTopic(ctx, t.Name(), 2, 3) // partitions = 2, replication factor = 3
    88  		if err != nil {
    89  			t.Logf("Could not create topic: %v", err)
    90  		}
    91  		return err == nil
    92  	}, defaultTestTimeout, time.Second)
    93  
    94  	// Check that the topic has been created with the right number of partitions
    95  	var topics []testutil.TopicPartition
    96  	require.Eventually(t, func() bool {
    97  		topics, err = tc.ListTopics(ctx)
    98  		success := err == nil && len(topics) == 2
    99  		if !success {
   100  			t.Logf("List topics failure %+v: %v", topics, err)
   101  		}
   102  		return success
   103  	}, defaultTestTimeout, time.Second)
   104  	require.Equal(t, []testutil.TopicPartition{
   105  		{Topic: t.Name(), Partition: 0},
   106  		{Topic: t.Name(), Partition: 1},
   107  	}, topics)
   108  
   109  	// Produce X messages in a single batch
   110  	producerConf := ProducerConfig{
   111  		ClientID:     "producer-01",
   112  		WriteTimeout: 5 * time.Second,
   113  		ReadTimeout:  5 * time.Second,
   114  		Logger:       newKafkaLogger(t, false),
   115  		ErrorLogger:  newKafkaLogger(t, true),
   116  	}
   117  	p, err := c.NewProducer(producerConf)
   118  	require.NoError(t, err)
   119  	publishMessages(ctx, t, p, noOfMessages)
   120  	messagesWaitGroup.Add(noOfMessages)
   121  
   122  	// Starting consumers with group-01 and FirstOffset
   123  	var (
   124  		// The ticker is used so that the test won't end as long as we keep getting messages since the consumers
   125  		// will reset the ticker each time they receive a message
   126  		tickerMu    sync.Mutex
   127  		tickerReset = 10 * time.Second
   128  		ticker      = time.NewTicker(30 * time.Second)
   129  	)
   130  	consumerConf := ConsumerConfig{
   131  		GroupID:             "group-01",
   132  		StartOffset:         FirstOffset,
   133  		CommitInterval:      time.Second, // to make the test faster instead of committing each single message
   134  		FetchBatchesMaxWait: 10 * time.Second,
   135  		Logger:              newKafkaLogger(t, false),
   136  		ErrorLogger:         newKafkaLogger(t, true),
   137  	}
   138  	consume := func(c *Consumer, id string, count *int32) {
   139  		defer gracefulTermination.Done()
   140  		for {
   141  			_, err := c.Receive(ctx)
   142  			if errors.Is(err, io.EOF) || errors.Is(err, context.Canceled) {
   143  				t.Logf("Closing %s: %v", id, err)
   144  				return
   145  			}
   146  			require.NoError(t, err)
   147  			t.Logf("Got a message on %s", id)
   148  			tickerMu.Lock()
   149  			ticker.Reset(tickerReset)
   150  			tickerMu.Unlock()
   151  			_ = atomic.AddInt32(count, 1)
   152  			messagesWaitGroup.Done()
   153  		}
   154  	}
   155  	closeConsumer := func(c *Consumer, id string) func() {
   156  		return func() {
   157  			ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
   158  			defer cancel()
   159  			if err := c.Close(ctx); err != nil {
   160  				t.Logf("Error closing %s: %v", id, err)
   161  			}
   162  		}
   163  	}
   164  
   165  	c01 := c.NewConsumer(t.Name(), consumerConf)
   166  	t.Cleanup(closeConsumer(c01, "c01"))
   167  	gracefulTermination.Add(1)
   168  	go consume(c01, "c01", &c01Count)
   169  
   170  	c02 := c.NewConsumer(t.Name(), consumerConf)
   171  	t.Cleanup(closeConsumer(c02, "c02"))
   172  	gracefulTermination.Add(1)
   173  	go consume(c02, "c02", &c02Count)
   174  
   175  	done := make(chan struct{})
   176  	go func() {
   177  		messagesWaitGroup.Wait() // this is satisfied once the consumers receive at least "noOfMessages" messages
   178  		close(done)
   179  	}()
   180  
   181  	select {
   182  	case <-done:
   183  		require.EqualValues(t, noOfMessages, atomic.LoadInt32(&c01Count)+atomic.LoadInt32(&c02Count))
   184  	// the test won't end as long as we keep getting messages since the consumers reset the ticker
   185  	// when they receive a message
   186  	case <-ticker.C:
   187  		t.Error("Could not complete within timeout")
   188  	}
   189  
   190  	t.Logf("Messages consumed by c01: %d", atomic.LoadInt32(&c01Count))
   191  	t.Logf("Messages consumed by c02: %d", atomic.LoadInt32(&c02Count))
   192  }
   193  
   194  func TestConsumer_Partition(t *testing.T) {
   195  	// Prepare cluster - Zookeeper and one Kafka broker
   196  	pool, err := dockertest.NewPool("")
   197  	require.NoError(t, err)
   198  
   199  	kafkaContainer, err := dockerKafka.Setup(pool, t,
   200  		dockerKafka.WithBrokers(1))
   201  	require.NoError(t, err)
   202  
   203  	kafkaHost := fmt.Sprintf("localhost:%s", kafkaContainer.Ports[0])
   204  	c, err := New("tcp", []string{"bad-host", kafkaHost}, Config{ClientID: "some-client", DialTimeout: 5 * time.Second})
   205  	require.NoError(t, err)
   206  
   207  	var (
   208  		messagesWaitGroup   sync.WaitGroup
   209  		gracefulTermination sync.WaitGroup
   210  		c01Count, c02Count  int32
   211  		noOfMessages        = 50
   212  		ctx, cancel         = context.WithCancel(context.Background())
   213  		tc                  = testutil.NewWithDialer(c.dialer, c.network, c.addresses...)
   214  	)
   215  
   216  	t.Cleanup(gracefulTermination.Wait)
   217  	t.Cleanup(cancel)
   218  
   219  	// Check connectivity and try to create the desired topic until the brokers are up and running (max 30s)
   220  	require.NoError(t, c.Ping(ctx))
   221  	require.Eventually(t, func() bool {
   222  		err := tc.CreateTopic(ctx, t.Name(), 2, 1) // partitions = 2, replication factor = 1
   223  		if err != nil {
   224  			t.Logf("Could not create topic: %v", err)
   225  		}
   226  		return err == nil
   227  	}, defaultTestTimeout, time.Second)
   228  
   229  	// Check that the topic has been created with the right number of partitions
   230  	topics, err := tc.ListTopics(ctx)
   231  	require.NoError(t, err)
   232  	require.Equal(t, []testutil.TopicPartition{
   233  		{Topic: t.Name(), Partition: 0},
   234  		{Topic: t.Name(), Partition: 1},
   235  	}, topics)
   236  
   237  	// Produce X messages in a single batch
   238  	producerConf := ProducerConfig{
   239  		ClientID:    "producer-01",
   240  		Logger:      newKafkaLogger(t, false),
   241  		ErrorLogger: newKafkaLogger(t, true),
   242  	}
   243  	p, err := c.NewProducer(producerConf)
   244  	require.NoError(t, err)
   245  	publishMessages(ctx, t, p, noOfMessages)
   246  	messagesWaitGroup.Add(noOfMessages)
   247  
   248  	// Starting consumers with group-01 and FirstOffset
   249  	var (
   250  		// The ticker is used so that the test won't end as long as we keep getting messages since the consumers
   251  		// will reset the ticker each time they receive a message
   252  		tickerMu    sync.Mutex
   253  		tickerReset = 10 * time.Second
   254  		ticker      = time.NewTicker(30 * time.Second)
   255  	)
   256  	consumerConf := ConsumerConfig{
   257  		StartOffset:         FirstOffset,
   258  		CommitInterval:      time.Second, // to make the test faster instead of committing each single message
   259  		FetchBatchesMaxWait: 10 * time.Second,
   260  		Logger:              newKafkaLogger(t, false),
   261  		ErrorLogger:         newKafkaLogger(t, true),
   262  	}
   263  	consume := func(c *Consumer, id string, count *int32) {
   264  		defer gracefulTermination.Done()
   265  		for {
   266  			_, err := c.Receive(ctx)
   267  			if errors.Is(err, io.EOF) || errors.Is(err, context.Canceled) {
   268  				t.Logf("Closing %s: %v", id, err)
   269  				return
   270  			}
   271  			require.NoError(t, err)
   272  			t.Logf("Got a message on %s", id)
   273  			tickerMu.Lock()
   274  			ticker.Reset(tickerReset)
   275  			tickerMu.Unlock()
   276  			_ = atomic.AddInt32(count, 1)
   277  			messagesWaitGroup.Done()
   278  		}
   279  	}
   280  	closeConsumer := func(c *Consumer, id string) func() {
   281  		return func() {
   282  			ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
   283  			defer cancel()
   284  			if err := c.Close(ctx); err != nil {
   285  				t.Logf("Error closing %s: %v", id, err)
   286  			}
   287  		}
   288  	}
   289  
   290  	c01 := c.NewConsumer(t.Name(), consumerConf)
   291  	t.Cleanup(closeConsumer(c01, "c01"))
   292  	gracefulTermination.Add(1)
   293  	go consume(c01, "c01", &c01Count)
   294  
   295  	consumerConf.Partition = 1
   296  	c02 := c.NewConsumer(t.Name(), consumerConf)
   297  	t.Cleanup(closeConsumer(c02, "c02"))
   298  	gracefulTermination.Add(1)
   299  	go consume(c02, "c02", &c02Count)
   300  
   301  	done := make(chan struct{})
   302  	go func() {
   303  		messagesWaitGroup.Wait() // this is satisfied once the consumers receive at least "noOfMessages" messages
   304  		close(done)
   305  	}()
   306  
   307  	select {
   308  	case <-done:
   309  		require.Greater(t, atomic.LoadInt32(&c01Count), int32(0))
   310  		require.Greater(t, atomic.LoadInt32(&c02Count), int32(0))
   311  		require.EqualValues(t, noOfMessages, atomic.LoadInt32(&c01Count)+atomic.LoadInt32(&c02Count))
   312  	// the test won't end as long as we keep getting messages since the consumers reset the ticker
   313  	// when they receive a message
   314  	case <-ticker.C:
   315  		t.Error("Could not complete within timeout")
   316  	}
   317  
   318  	t.Logf("Messages consumed by c01: %d", atomic.LoadInt32(&c01Count))
   319  	t.Logf("Messages consumed by c02: %d", atomic.LoadInt32(&c02Count))
   320  }
   321  
   322  func TestWithSASL(t *testing.T) {
   323  	// Prepare cluster - Zookeeper and one Kafka broker
   324  	path, err := os.Getwd()
   325  	require.NoError(t, err)
   326  
   327  	saslConfiguration := dockerKafka.SASLConfig{
   328  		BrokerUser: dockerKafka.User{Username: "kafka1", Password: "password"},
   329  		Users: []dockerKafka.User{
   330  			{Username: "client1", Password: "password"},
   331  		},
   332  		CertificatePassword: "password",
   333  		KeyStorePath:        filepath.Join(path, "testdata", "keystore", "kafka.keystore.jks"),
   334  		TrustStorePath:      filepath.Join(path, "testdata", "truststore", "kafka.truststore.jks"),
   335  	}
   336  
   337  	hashTypes := []ScramHashGenerator{ScramPlainText, ScramSHA256, ScramSHA512}
   338  	for _, hashType := range hashTypes {
   339  		saslConfiguration := saslConfiguration // to avoid data race
   340  		t.Run(hashType.String(), func(t *testing.T) {
   341  			pool, err := dockertest.NewPool("")
   342  			require.NoError(t, err)
   343  
   344  			containerOptions := []dockerKafka.Option{dockerKafka.WithBrokers(1)}
   345  			switch hashType {
   346  			case ScramPlainText:
   347  				containerOptions = append(containerOptions, dockerKafka.WithSASLPlain(&saslConfiguration))
   348  			case ScramSHA256:
   349  				containerOptions = append(containerOptions, dockerKafka.WithSASLScramSHA256(&saslConfiguration))
   350  			case ScramSHA512:
   351  				containerOptions = append(containerOptions, dockerKafka.WithSASLScramSHA512(&saslConfiguration))
   352  			}
   353  			kafkaContainer, err := dockerKafka.Setup(pool, t, containerOptions...)
   354  			require.NoError(t, err)
   355  
   356  			kafkaHost := fmt.Sprintf("localhost:%s", kafkaContainer.Ports[0])
   357  			c, err := New("tcp", []string{"bad-host", kafkaHost}, Config{
   358  				ClientID:    "some-client",
   359  				DialTimeout: 10 * time.Second,
   360  				SASL: &SASL{
   361  					ScramHashGen: hashType,
   362  					Username:     "client1",
   363  					Password:     "password",
   364  				},
   365  				TLS: &TLS{
   366  					InsecureSkipVerify: true,
   367  				},
   368  			})
   369  			require.NoError(t, err)
   370  
   371  			require.Eventually(t, func() bool {
   372  				err := c.Ping(context.Background())
   373  				if err != nil {
   374  					t.Logf("Ping error: %v", err)
   375  				}
   376  				return err == nil
   377  			}, defaultTestTimeout, 250*time.Millisecond)
   378  
   379  			producerConf := ProducerConfig{
   380  				Logger:      newKafkaLogger(t, false),
   381  				ErrorLogger: newKafkaLogger(t, true),
   382  			}
   383  			p, err := c.NewProducer(producerConf)
   384  			require.NoError(t, err)
   385  			t.Cleanup(func() {
   386  				ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
   387  				defer cancel()
   388  				if err := p.Close(ctx); err != nil {
   389  					t.Logf("Error closing producer: %v", err)
   390  				}
   391  			})
   392  
   393  			// We will now try to publish for 30s because the cluster could still be in a "Leader Not Available" state
   394  			require.Eventually(t, func() bool {
   395  				err := p.Publish(context.Background(), Message{
   396  					Key:   []byte("hello"),
   397  					Value: []byte("ciao"),
   398  					Topic: "some-topic",
   399  				})
   400  				if err != nil {
   401  					t.Logf("Publish error: %v", err)
   402  				}
   403  				return err == nil
   404  			}, defaultTestTimeout, 100*time.Millisecond, "Could not publish within timeout")
   405  		})
   406  	}
   407  }
   408  
   409  func TestWithSASLBadCredentials(t *testing.T) {
   410  	// Prepare cluster - Zookeeper and one Kafka broker
   411  	path, err := os.Getwd()
   412  	require.NoError(t, err)
   413  
   414  	saslConfiguration := dockerKafka.SASLConfig{
   415  		BrokerUser: dockerKafka.User{Username: "kafka1", Password: "password"},
   416  		Users: []dockerKafka.User{
   417  			{Username: "client1", Password: "password"},
   418  		},
   419  		CertificatePassword: "password",
   420  		KeyStorePath:        filepath.Join(path, "testdata", "keystore", "kafka.keystore.jks"),
   421  		TrustStorePath:      filepath.Join(path, "testdata", "truststore", "kafka.truststore.jks"),
   422  	}
   423  
   424  	pool, err := dockertest.NewPool("")
   425  	require.NoError(t, err)
   426  
   427  	containerOptions := []dockerKafka.Option{
   428  		dockerKafka.WithBrokers(1),
   429  		dockerKafka.WithSASLPlain(&saslConfiguration),
   430  	}
   431  	kafkaContainer, err := dockerKafka.Setup(pool, t, containerOptions...)
   432  	require.NoError(t, err)
   433  
   434  	kafkaHost := fmt.Sprintf("localhost:%s", kafkaContainer.Ports[0])
   435  	c, err := New("tcp", []string{"bad-host", kafkaHost}, Config{
   436  		ClientID:    "some-client",
   437  		DialTimeout: 10 * time.Second,
   438  		SASL: &SASL{
   439  			ScramHashGen: ScramPlainText,
   440  			Username:     "A BAD USER",
   441  			Password:     "A BAD PASSWORD",
   442  		},
   443  		TLS: &TLS{
   444  			InsecureSkipVerify: true,
   445  		},
   446  	})
   447  	require.NoError(t, err)
   448  
   449  	require.Eventually(t, func() bool {
   450  		err := c.Ping(context.Background())
   451  		if err != nil {
   452  			t.Logf("Ping error: %v", err)
   453  		}
   454  		return strings.Contains(err.Error(), "SASL Authentication failed")
   455  	}, defaultTestTimeout, 250*time.Millisecond)
   456  }
   457  
   458  func TestProducer_Timeout(t *testing.T) {
   459  	// Prepare cluster - Zookeeper and one Kafka broker
   460  	pool, err := dockertest.NewPool("")
   461  	require.NoError(t, err)
   462  
   463  	kafkaContainer, err := dockerKafka.Setup(pool, t,
   464  		dockerKafka.WithBrokers(1))
   465  	require.NoError(t, err)
   466  
   467  	kafkaHost := fmt.Sprintf("localhost:%s", kafkaContainer.Ports[0])
   468  	c, err := New("tcp", []string{"bad-host", kafkaHost}, Config{ClientID: "some-client", DialTimeout: 5 * time.Second})
   469  	require.NoError(t, err)
   470  
   471  	ctx, cancel := context.WithCancel(context.Background())
   472  	t.Cleanup(cancel)
   473  
   474  	tc := testutil.NewWithDialer(c.dialer, c.network, c.addresses...)
   475  
   476  	// Check connectivity and try to create the desired topic until the brokers are up and running (max 30s)
   477  	require.NoError(t, c.Ping(ctx))
   478  	require.Eventually(t, func() bool {
   479  		err := tc.CreateTopic(ctx, t.Name(), 1, 1) // partitions = 2, replication factor = 1
   480  		if err != nil {
   481  			t.Logf("Could not create topic: %v", err)
   482  		}
   483  		return err == nil
   484  	}, defaultTestTimeout, time.Second)
   485  
   486  	// Check that the topic has been created with the right number of partitions
   487  	topics, err := tc.ListTopics(ctx)
   488  	require.NoError(t, err)
   489  	require.Equal(t, []testutil.TopicPartition{{Topic: t.Name(), Partition: 0}}, topics)
   490  
   491  	// Produce X messages in a single batch
   492  	producerConf := ProducerConfig{
   493  		ClientID:    "producer-01",
   494  		Logger:      newKafkaLogger(t, false),
   495  		ErrorLogger: newKafkaLogger(t, true),
   496  	}
   497  	p, err := c.NewProducer(producerConf)
   498  	require.NoError(t, err)
   499  	t.Cleanup(func() {
   500  		ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
   501  		defer cancel()
   502  		if err := p.Close(ctx); err != nil {
   503  			t.Logf("Error closing producer: %v", err)
   504  		}
   505  	})
   506  
   507  	pubCtx, pubCancel := context.WithTimeout(ctx, 30*time.Second)
   508  	err = p.Publish(pubCtx, Message{
   509  		Key:   []byte("hello"),
   510  		Value: []byte("world"),
   511  		Topic: t.Name(),
   512  	})
   513  	pubCancel()
   514  	require.NoError(t, err)
   515  
   516  	pubCtx, pubCancel = context.WithTimeout(ctx, time.Nanosecond)
   517  	err = p.Publish(pubCtx, Message{
   518  		Key:   []byte("hello"),
   519  		Value: []byte("world"),
   520  		Topic: t.Name(),
   521  	})
   522  	defer pubCancel()
   523  	require.Error(t, err)
   524  	require.ErrorIs(t, err, context.DeadlineExceeded)
   525  }
   526  
   527  func TestIsProducerErrTemporary(t *testing.T) {
   528  	// Prepare cluster - Zookeeper and one Kafka broker
   529  	pool, err := dockertest.NewPool("")
   530  	require.NoError(t, err)
   531  
   532  	kafkaContainer, err := dockerKafka.Setup(pool, t,
   533  		dockerKafka.WithBrokers(1))
   534  	require.NoError(t, err)
   535  
   536  	kafkaHost := fmt.Sprintf("localhost:%s", kafkaContainer.Ports[0])
   537  	c, err := New("tcp", []string{"bad-host", kafkaHost}, Config{ClientID: "some-client", DialTimeout: 5 * time.Second})
   538  	require.NoError(t, err)
   539  
   540  	ctx, cancel := context.WithCancel(context.Background())
   541  	t.Cleanup(cancel)
   542  
   543  	// Check connectivity and try to create the desired topic until the brokers are up and running (max 30s)
   544  	require.NoError(t, c.Ping(ctx))
   545  
   546  	tc := testutil.NewWithDialer(c.dialer, c.network, c.addresses...)
   547  	require.Eventually(t, func() bool {
   548  		err := tc.CreateTopic(ctx, t.Name(), 1, 1) // partitions = 2, replication factor = 1
   549  		if err != nil {
   550  			t.Logf("Could not create topic: %v", err)
   551  		}
   552  		return err == nil
   553  	}, defaultTestTimeout, time.Second)
   554  
   555  	// Check that the topic has been created with the right number of partitions
   556  	topics, err := tc.ListTopics(ctx)
   557  	require.NoError(t, err)
   558  	require.Equal(t, []testutil.TopicPartition{{Topic: t.Name(), Partition: 0}}, topics)
   559  
   560  	// Produce X messages in a single batch
   561  	producerConf := ProducerConfig{
   562  		ClientID:    "producer-01",
   563  		Logger:      newKafkaLogger(t, false),
   564  		ErrorLogger: newKafkaLogger(t, true),
   565  	}
   566  	p, err := c.NewProducer(producerConf)
   567  	require.NoError(t, err)
   568  	t.Cleanup(func() {
   569  		ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
   570  		defer cancel()
   571  		if err := p.Close(ctx); err != nil {
   572  			t.Logf("Error closing producer: %v", err)
   573  		}
   574  	})
   575  
   576  	p.writer.AllowAutoTopicCreation = false // important to cause side effects needed next
   577  
   578  	pubCtx, pubCancel := context.WithTimeout(ctx, 30*time.Second)
   579  	err = p.Publish(pubCtx, Message{
   580  		Key:   []byte("key-01"),
   581  		Value: []byte("value-01"),
   582  		Topic: "non-existent-topic",
   583  	}, Message{
   584  		Key:   []byte("key-02"),
   585  		Value: []byte("value-02"),
   586  		Topic: "non-existent-topic",
   587  	})
   588  	require.Truef(t, IsProducerErrTemporary(err), "Expected temporary error, got %v instead", err)
   589  	pubCancel()
   590  }
   591  
   592  func TestIsProducerWrappedErrTemporary(t *testing.T) {
   593  	err := kafka.WriteErrors{
   594  		fmt.Errorf("some error: %w", kafka.LeaderNotAvailable),
   595  		fmt.Errorf("some error: %w", kafka.RequestTimedOut),
   596  		fmt.Errorf("some error: %w", kafka.OffsetOutOfRange),
   597  		fmt.Errorf("some error: %w", kafka.Unknown),
   598  	}
   599  	require.True(t, IsProducerErrTemporary(err))
   600  
   601  	wrappedErr := fmt.Errorf("could not publish to %q: %w", "some topic", err)
   602  	require.True(t, IsProducerErrTemporary(wrappedErr))
   603  
   604  	wrappedErr = fmt.Errorf("wrapping again: %w", wrappedErr)
   605  	require.True(t, IsProducerErrTemporary(wrappedErr))
   606  }
   607  
   608  func TestWriteErrors(t *testing.T) {
   609  	err := make(kafka.WriteErrors, 0)
   610  	err = append(err, kafka.PolicyViolation)
   611  	require.False(t, IsProducerErrTemporary(err))
   612  	err = append(err, kafka.LeaderNotAvailable)
   613  	require.True(t, IsProducerErrTemporary(err))
   614  }
   615  
   616  func TestConfluentCloud(t *testing.T) {
   617  	kafkaHost := os.Getenv("TEST_KAFKA_CONFLUENT_CLOUD_HOST")
   618  	confluentCloudKey := os.Getenv("TEST_KAFKA_CONFLUENT_CLOUD_KEY")
   619  	confluentCloudSecret := os.Getenv("TEST_KAFKA_CONFLUENT_CLOUD_SECRET")
   620  
   621  	if kafkaHost == "" || confluentCloudKey == "" || confluentCloudSecret == "" {
   622  		t.Skip("Skipping because credentials or host are not provided")
   623  	}
   624  
   625  	c, err := NewConfluentCloud([]string{"bad-host", kafkaHost}, confluentCloudKey, confluentCloudSecret, Config{
   626  		ClientID:    "some-client",
   627  		DialTimeout: 45 * time.Second,
   628  	})
   629  	require.NoError(t, err)
   630  	require.NoError(t, c.Ping(context.Background()))
   631  
   632  	producerConf := ProducerConfig{
   633  		ClientID:     "producer-01",
   634  		WriteTimeout: 30 * time.Second,
   635  		Logger:       newKafkaLogger(t, false),
   636  		ErrorLogger:  newKafkaLogger(t, true),
   637  	}
   638  	p, err := c.NewProducer(
   639  		producerConf,
   640  	)
   641  	require.NoError(t, err)
   642  
   643  	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
   644  	// the topic needs to be created beforehand via the ConfluentCloud admin panel
   645  	err = p.Publish(ctx, Message{Key: []byte("key-01"), Value: []byte("value-01"), Topic: "TestConfluentAzureCloud"})
   646  	cancel()
   647  	require.NoError(t, err)
   648  
   649  	c, err = NewConfluentCloud([]string{kafkaHost}, "A BAD KEY", confluentCloudSecret, Config{
   650  		ClientID:    "some-client",
   651  		DialTimeout: 45 * time.Second,
   652  	})
   653  	require.NoError(t, err)
   654  	err = c.Ping(context.Background())
   655  	require.Error(t, err)
   656  	require.Contains(t, err.Error(), "SASL Authentication failed")
   657  }
   658  
   659  func TestAzureEventHubsCloud(t *testing.T) {
   660  	kafkaHost := os.Getenv("TEST_KAFKA_AZURE_EVENT_HUBS_CLOUD_HOST")
   661  	azureEventHubName := os.Getenv("TEST_KAFKA_AZURE_EVENT_HUBS_CLOUD_EVENTHUB_NAME")
   662  	azureEventHubsConnString := os.Getenv("TEST_KAFKA_AZURE_EVENT_HUBS_CLOUD_CONNECTION_STRING")
   663  
   664  	if kafkaHost == "" || azureEventHubName == "" || azureEventHubsConnString == "" {
   665  		t.Skip("Skipping because credentials or host are not provided")
   666  	}
   667  
   668  	c, err := NewAzureEventHubs([]string{kafkaHost}, azureEventHubsConnString, Config{
   669  		ClientID:    "some-client",
   670  		DialTimeout: 45 * time.Second,
   671  	})
   672  	require.NoError(t, err)
   673  	require.NoError(t, c.Ping(context.Background()))
   674  
   675  	producerConf := ProducerConfig{
   676  		ClientID:     "producer-01",
   677  		WriteTimeout: 30 * time.Second,
   678  		Logger:       newKafkaLogger(t, false),
   679  		ErrorLogger:  newKafkaLogger(t, true),
   680  	}
   681  	p, err := c.NewProducer(producerConf)
   682  	require.NoError(t, err)
   683  
   684  	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
   685  	err = p.Publish(ctx, Message{Key: []byte("key-01"), Value: []byte("value-01"), Topic: azureEventHubName})
   686  
   687  	cancel()
   688  	require.NoError(t, err)
   689  
   690  	c, err = NewAzureEventHubs([]string{"bad-host", kafkaHost}, "A BAD CONNECTION STRING", Config{
   691  		ClientID:    "some-client",
   692  		DialTimeout: 45 * time.Second,
   693  	})
   694  	require.NoError(t, err)
   695  	err = c.Ping(context.Background())
   696  	require.Error(t, err)
   697  	require.Contains(t, err.Error(), "SASL Authentication failed")
   698  }
   699  
   700  func TestConsumerACK(t *testing.T) {
   701  	// Prepare cluster - Zookeeper + 1 Kafka brokers
   702  	pool, err := dockertest.NewPool("")
   703  	require.NoError(t, err)
   704  
   705  	kafkaContainer, err := dockerKafka.Setup(pool, t,
   706  		dockerKafka.WithBrokers(1))
   707  	require.NoError(t, err)
   708  
   709  	kafkaHost := fmt.Sprintf("localhost:%s", kafkaContainer.Ports[0])
   710  	kafkaClient, err := New("tcp", []string{"bad-host", kafkaHost}, Config{ClientID: "some-client", DialTimeout: 5 * time.Second})
   711  	require.NoError(t, err)
   712  
   713  	var (
   714  		noOfMessages = 10
   715  		ctx, cancel  = context.WithCancel(context.Background())
   716  		tc           = testutil.NewWithDialer(kafkaClient.dialer, kafkaClient.network, kafkaClient.addresses...)
   717  	)
   718  
   719  	t.Cleanup(cancel)
   720  
   721  	// Check connectivity and try to create the desired topic until the brokers are up and running (max 30s)
   722  	require.NoError(t, kafkaClient.Ping(ctx))
   723  	require.Eventually(t, func() bool {
   724  		err := tc.CreateTopic(ctx, t.Name(), 1, 1) // partitions = 1, replication factor = 1
   725  		if err != nil {
   726  			t.Logf("Could not create topic: %v", err)
   727  		}
   728  		return err == nil
   729  	}, defaultTestTimeout, time.Second)
   730  
   731  	// Check that the topic has been created with the right number of partitions
   732  	var topics []testutil.TopicPartition
   733  	require.Eventually(t, func() bool {
   734  		topics, err = tc.ListTopics(ctx)
   735  		success := err == nil && len(topics) == 1
   736  		if !success {
   737  			t.Logf("List topics failure %+v: %v", topics, err)
   738  		}
   739  		return success
   740  	}, defaultTestTimeout, time.Second)
   741  	require.Equal(t, []testutil.TopicPartition{
   742  		{Topic: t.Name(), Partition: 0},
   743  	}, topics)
   744  
   745  	// Produce X messages in a single batch
   746  	producerConf := ProducerConfig{
   747  		ClientID:     "producer-01",
   748  		WriteTimeout: 5 * time.Second,
   749  		ReadTimeout:  5 * time.Second,
   750  		Logger:       newKafkaLogger(t, false),
   751  		ErrorLogger:  newKafkaLogger(t, true),
   752  	}
   753  	producer, err := kafkaClient.NewProducer(producerConf)
   754  	require.NoError(t, err)
   755  	publishMessages(ctx, t, producer, noOfMessages)
   756  
   757  	consumerConf := ConsumerConfig{
   758  		GroupID:             "group-01",
   759  		StartOffset:         FirstOffset,
   760  		CommitInterval:      time.Second, // to make the test faster instead of committing each single message
   761  		FetchBatchesMaxWait: 10 * time.Second,
   762  		Logger:              newKafkaLogger(t, false),
   763  		ErrorLogger:         newKafkaLogger(t, true),
   764  	}
   765  	consume := func(c *Consumer, id string, noOfMsgsToConsume int) []Message {
   766  		messages := make([]Message, 0, noOfMsgsToConsume)
   767  		for noOfMsgsToConsume > 0 {
   768  			msg, err := c.Receive(ctx)
   769  			if errors.Is(err, io.EOF) || errors.Is(err, context.Canceled) {
   770  				t.Logf("Closing %s: %v", id, err)
   771  				return messages
   772  			}
   773  			require.NoError(t, err)
   774  			t.Logf("Got a message on %s", id)
   775  			noOfMsgsToConsume--
   776  			require.NoError(t, c.Ack(ctx, msg))
   777  			messages = append(messages, msg)
   778  		}
   779  		return messages
   780  	}
   781  	closeConsumer := func(c *Consumer, id string) {
   782  		ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
   783  		defer cancel()
   784  		if err := c.Close(ctx); err != nil {
   785  			t.Logf("Error closing %s: %v", id, err)
   786  		}
   787  	}
   788  	consumer := kafkaClient.NewConsumer(t.Name(), consumerConf)
   789  	closeConsumer(consumer, "consumer") // closing consumer
   790  	// we're doing this in order to have a subscription on the topic for retention
   791  
   792  	ackCount := noOfMessages / 2
   793  	require.Greater(t, ackCount, 0)
   794  	count := 0
   795  	consumer = kafkaClient.NewConsumer(t.Name(), consumerConf) // re-creating consumer
   796  	messages := consume(consumer, "consumer", ackCount)        // consuming only half messages
   797  	require.Equal(t, ackCount, len(messages))
   798  	for _, msg := range messages {
   799  		require.Equal(t, fmt.Sprintf("key-%d", count), string(msg.Key))
   800  		require.Equal(t, fmt.Sprintf("value-%d", count), string(msg.Value))
   801  		count++
   802  	}
   803  	closeConsumer(consumer, "consumer") // closing consumer
   804  
   805  	remainingCount := noOfMessages - ackCount
   806  	require.Greater(t, remainingCount, 0)
   807  	consumer = kafkaClient.NewConsumer(t.Name(), consumerConf) // re-creating consumer
   808  	messages = consume(consumer, "consumer", remainingCount)   // consuming the rest of the messages
   809  	require.Equal(t, remainingCount, len(messages))
   810  	for _, msg := range messages {
   811  		require.Equal(t, fmt.Sprintf("key-%d", count), string(msg.Key))
   812  		require.Equal(t, fmt.Sprintf("value-%d", count), string(msg.Value))
   813  		count++
   814  	}
   815  	closeConsumer(consumer, "consumer") // closing consumer
   816  	require.Equal(t, noOfMessages, count)
   817  }
   818  
   819  func TestSSH(t *testing.T) {
   820  	pool, err := dockertest.NewPool("")
   821  	require.NoError(t, err)
   822  
   823  	// Start shared Docker network
   824  	network, err := pool.Client.CreateNetwork(dc.CreateNetworkOptions{Name: "kafka_network"})
   825  	require.NoError(t, err)
   826  	t.Cleanup(func() {
   827  		if err := pool.Client.RemoveNetwork(network.ID); err != nil {
   828  			t.Logf("Error while removing Docker network: %v", err)
   829  		}
   830  	})
   831  
   832  	// Start Kafka cluster with ZooKeeper and three brokers
   833  	_, err = dockerKafka.Setup(pool, t,
   834  		dockerKafka.WithBrokers(3),
   835  		dockerKafka.WithNetwork(network),
   836  		dockerKafka.WithoutDockerHostListeners(),
   837  	)
   838  	require.NoError(t, err)
   839  
   840  	// Let's setup the SSH server
   841  	publicKeyPath, err := filepath.Abs("./testdata/ssh/test_key.pub")
   842  	require.NoError(t, err)
   843  	sshServer, err := sshserver.Setup(pool, t,
   844  		sshserver.WithPublicKeyPath(publicKeyPath),
   845  		sshserver.WithCredentials("linuxserver.io", ""),
   846  		sshserver.WithDockerNetwork(network),
   847  	)
   848  	require.NoError(t, err)
   849  	sshServerHost := fmt.Sprintf("localhost:%d", sshServer.Port)
   850  	t.Logf("SSH server is listening on %s", sshServerHost)
   851  
   852  	// Read private key
   853  	privateKey, err := os.ReadFile("./testdata/ssh/test_key")
   854  	require.NoError(t, err)
   855  
   856  	// Setup client and ping
   857  	ctx := context.Background()
   858  	c, err := New("tcp", []string{"kafka1:9092", "kafka2:9092", "kafka3:9092"}, Config{
   859  		SSHConfig: &SSHConfig{
   860  			User:       "linuxserver.io",
   861  			Host:       sshServerHost,
   862  			PrivateKey: string(privateKey),
   863  		},
   864  	})
   865  	require.NoError(t, err)
   866  	require.Eventuallyf(t, func() bool { err = c.Ping(ctx); return err == nil }, 30*time.Second, time.Second,
   867  		"could not ping kafka: %v", err,
   868  	)
   869  
   870  	// Create topic for test
   871  	tc := testutil.NewWithDialer(c.dialer, c.network, c.addresses...)
   872  	require.Eventually(t, func() bool {
   873  		err := tc.CreateTopic(ctx, t.Name(), 1, 1) // partitions = 1, replication factor = 1
   874  		if err != nil {
   875  			t.Logf("Could not create topic: %v", err)
   876  		}
   877  		return err == nil
   878  	}, defaultTestTimeout, time.Second)
   879  
   880  	// Check that the topic has been created with the right number of partitions
   881  	var topics []testutil.TopicPartition
   882  	require.Eventually(t, func() bool {
   883  		topics, err = tc.ListTopics(ctx)
   884  		success := err == nil && len(topics) > 0
   885  		if !success {
   886  			t.Logf("List topics failure %+v: %v", topics, err)
   887  		}
   888  		return success
   889  	}, defaultTestTimeout, time.Second)
   890  	require.Equal(t, []testutil.TopicPartition{{Topic: t.Name(), Partition: 0}}, topics)
   891  
   892  	// Check producer
   893  	producerConf := ProducerConfig{
   894  		ClientID:    "producer-01",
   895  		Logger:      newKafkaLogger(t, false),
   896  		ErrorLogger: newKafkaLogger(t, true),
   897  	}
   898  	p, err := c.NewProducer(producerConf)
   899  	require.NoError(t, err)
   900  	t.Cleanup(func() {
   901  		ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
   902  		defer cancel()
   903  		if err := p.Close(ctx); err != nil {
   904  			t.Logf("Error closing producer: %v", err)
   905  		}
   906  	})
   907  
   908  	pubCtx, pubCancel := context.WithTimeout(ctx, 30*time.Second)
   909  	defer pubCancel()
   910  	require.Eventually(t, func() bool {
   911  		err = p.Publish(pubCtx,
   912  			Message{Key: []byte("key-01"), Value: []byte("value-01"), Topic: t.Name()},
   913  		)
   914  		if err != nil {
   915  			t.Logf("Could not publish message: %v", err)
   916  		}
   917  		return err == nil
   918  	}, 30*time.Second, time.Second, "could not publish message: %v", err)
   919  
   920  	// Verify that the message has been published and it's readable
   921  	consumer := c.NewConsumer(t.Name(), ConsumerConfig{})
   922  	consumerCtx, consumerCancel := context.WithTimeout(ctx, 10*time.Second)
   923  	defer consumerCancel()
   924  	msg, err := consumer.Receive(consumerCtx)
   925  	require.NoError(t, err)
   926  	require.Equal(t, "key-01", string(msg.Key))
   927  	require.Equal(t, "value-01", string(msg.Value))
   928  }
   929  
   930  func publishMessages(ctx context.Context, t *testing.T, p *Producer, noOfMessages int) {
   931  	t.Helper()
   932  	t.Cleanup(func() {
   933  		ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
   934  		defer cancel()
   935  		if err := p.Close(ctx); err != nil {
   936  			t.Logf("Error closing producer: %v", err)
   937  		}
   938  	})
   939  
   940  	messages := make([]Message, noOfMessages)
   941  	for i := 0; i < noOfMessages; i++ {
   942  		messages[i] = Message{
   943  			Key:   []byte(fmt.Sprintf("key-%d", i)),
   944  			Value: []byte(fmt.Sprintf("value-%d", i)),
   945  			Topic: t.Name(),
   946  		}
   947  	}
   948  
   949  	start, end := time.Now(), time.Duration(0)
   950  	require.Eventually(t, func() bool {
   951  		pubCtx, pubCancel := context.WithTimeout(ctx, 30*time.Second)
   952  		err := p.Publish(pubCtx, messages...)
   953  		end = time.Since(start)
   954  		pubCancel()
   955  		if err != nil {
   956  			t.Logf("Got publish error: %v", err)
   957  		}
   958  		return err == nil
   959  	}, defaultTestTimeout, time.Second)
   960  
   961  	t.Logf("Messages published (%d) in %s", noOfMessages, end)
   962  }
   963  
   964  type testLogger struct{ *testing.T }
   965  
   966  func (l *testLogger) Printf(format string, args ...interface{}) {
   967  	l.Helper()
   968  	l.Logf(format, args...)
   969  }
   970  
   971  func (l *testLogger) Infof(format string, args ...interface{}) {
   972  	l.Printf("[INFO] "+format, args...)
   973  }
   974  
   975  func (l *testLogger) Errorf(format string, args ...interface{}) {
   976  	l.Printf("[ERROR] "+format, args...)
   977  }
   978  
   979  func newKafkaLogger(t *testing.T, isErrorLogger bool) *KafkaLogger {
   980  	return &KafkaLogger{Logger: &testLogger{t}, IsErrorLogger: isErrorLogger}
   981  }