github.com/rudderlabs/rudder-go-kit@v0.30.0/kafkaclient/compression_benchmark_test.go (about)

     1  package client
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"strconv"
     7  	"testing"
     8  	"time"
     9  
    10  	"github.com/ory/dockertest/v3"
    11  	"github.com/stretchr/testify/require"
    12  
    13  	"github.com/rudderlabs/rudder-go-kit/kafkaclient/testutil"
    14  	"github.com/rudderlabs/rudder-go-kit/tcpproxy"
    15  	"github.com/rudderlabs/rudder-go-kit/testhelper"
    16  	"github.com/rudderlabs/rudder-go-kit/testhelper/docker/resource/kafka"
    17  	"github.com/rudderlabs/rudder-go-kit/testhelper/rand"
    18  )
    19  
    20  func BenchmarkCompression(b *testing.B) {
    21  	proxyPort, err := testhelper.GetFreePort()
    22  	require.NoError(b, err)
    23  
    24  	var (
    25  		ctx       = context.Background()
    26  		topic     = "foo_bar_topic"
    27  		proxyHost = "localhost:" + strconv.Itoa(proxyPort)
    28  	)
    29  
    30  	setupKafka := func(b *testing.B) string {
    31  		pool, err := dockertest.NewPool("")
    32  		require.NoError(b, err)
    33  
    34  		kafkaContainer, err := kafka.Setup(pool, b, kafka.WithCustomAdvertisedListener(proxyHost))
    35  		require.NoError(b, err)
    36  
    37  		return "localhost:" + kafkaContainer.Ports[0]
    38  	}
    39  
    40  	setupProxy := func(b *testing.B, kafkaAddr string, c Compression, bs int, bt time.Duration) (
    41  		*tcpproxy.Proxy,
    42  		*Producer,
    43  	) {
    44  		proxy := &tcpproxy.Proxy{
    45  			LocalAddr:  proxyHost,
    46  			RemoteAddr: kafkaAddr,
    47  		}
    48  		go proxy.Start(b)
    49  
    50  		client, err := New("tcp", []string{proxy.LocalAddr}, Config{})
    51  		require.NoError(b, err)
    52  		require.Eventuallyf(b, func() bool {
    53  			err = client.Ping(ctx)
    54  			return err == nil
    55  		}, 30*time.Second, 100*time.Millisecond, "failed to connect to kafka: %v", err)
    56  
    57  		producer, err := client.NewProducer(ProducerConfig{
    58  			Compression:  c,
    59  			BatchSize:    bs,
    60  			BatchTimeout: bt,
    61  		})
    62  		require.NoError(b, err)
    63  
    64  		return proxy, producer
    65  	}
    66  
    67  	run := func(addr string, comp Compression, value string, batchSize int, batchTimeout time.Duration) func(*testing.B) {
    68  		return func(b *testing.B) {
    69  			proxy, producer := setupProxy(b, addr, comp, batchSize, batchTimeout)
    70  
    71  			kafkaCtx, kafkaCtxCancel := context.WithTimeout(context.Background(), 3*time.Minute)
    72  			err = waitForKafka(kafkaCtx, topic, addr)
    73  			kafkaCtxCancel()
    74  			require.NoError(b, err)
    75  
    76  			var (
    77  				noOfErrors int
    78  				messages   = make([]Message, 0, batchSize)
    79  			)
    80  			for i := 0; i < batchSize; i++ {
    81  				messages = append(messages, Message{
    82  					Key:   []byte("my-key"),
    83  					Value: []byte(value),
    84  					Topic: topic,
    85  				})
    86  			}
    87  
    88  			b.ResetTimer()
    89  			for i := 0; i < b.N; i++ {
    90  				if err := producer.Publish(ctx, messages...); err != nil {
    91  					noOfErrors++
    92  				}
    93  			}
    94  			b.StopTimer()
    95  
    96  			_ = producer.Close(ctx)
    97  			proxy.Stop() // stopping the proxy here to properly gather the metrics
    98  
    99  			b.SetBytes(proxy.BytesReceived.Load())
   100  			b.ReportMetric(float64(proxy.BytesReceived.Load())/float64(b.N)/1024, "kb/op")
   101  			b.ReportMetric(float64(noOfErrors), "errors")
   102  		}
   103  	}
   104  
   105  	var (
   106  		compressionTypes    = []Compression{CompressionNone, CompressionGzip, CompressionSnappy, CompressionLz4, CompressionZstd}
   107  		compressionTypesMap = map[Compression]string{
   108  			CompressionNone: "none", CompressionGzip: "gzip", CompressionSnappy: "snappy", CompressionLz4: "lz4", CompressionZstd: "zstd",
   109  		}
   110  		batchSizes    = []int{1, 100, 1000}
   111  		batchTimeouts = []time.Duration{time.Nanosecond, time.Millisecond}
   112  		values        = []string{rand.String(1 << 10), rand.String(10 << 10), rand.String(100 << 10)}
   113  	)
   114  	for _, comp := range compressionTypes {
   115  		b.Run(compressionTypesMap[comp], func(b *testing.B) {
   116  			kafkaAddr := setupKafka(b) // setup kafka only once per compression combination
   117  			for _, value := range values {
   118  				for _, batchSize := range batchSizes {
   119  					for _, batchTimeout := range batchTimeouts {
   120  						b.Run(
   121  							fmt.Sprintf("%s-%d-%s", byteCount(len(value)), batchSize, batchTimeout),
   122  							run(kafkaAddr, comp, value, batchSize, batchTimeout),
   123  						)
   124  					}
   125  				}
   126  			}
   127  		})
   128  	}
   129  }
   130  
   131  func byteCount(b int) string {
   132  	const unit = 1000
   133  	if b < unit {
   134  		return fmt.Sprintf("%dB", b)
   135  	}
   136  	div, exp := int64(unit), 0
   137  	for n := b / unit; n >= unit; n /= unit {
   138  		div *= unit
   139  		exp++
   140  	}
   141  	return fmt.Sprintf("%.1f%cB",
   142  		float64(b)/float64(div), "kMGTPE"[exp])
   143  }
   144  
   145  func waitForKafka(ctx context.Context, topic, addr string) (err error) {
   146  	tc := testutil.New("tcp", addr)
   147  	for {
   148  		select {
   149  		case <-ctx.Done():
   150  			return fmt.Errorf("kafka not ready within context (%v): %v", ctx.Err(), err)
   151  		case <-time.After(50 * time.Millisecond):
   152  			var topics []testutil.TopicPartition
   153  			topics, err = tc.ListTopics(ctx)
   154  			if err != nil {
   155  				continue
   156  			}
   157  
   158  			var found bool
   159  			for _, top := range topics {
   160  				if top.Topic == topic {
   161  					found = true
   162  					break
   163  				}
   164  			}
   165  			if found {
   166  				return nil
   167  			}
   168  
   169  			if err = tc.CreateTopic(ctx, topic, 1, 1); err != nil {
   170  				continue
   171  			}
   172  		}
   173  	}
   174  }