github.com/segmentio/kafka-go@v0.4.48-0.20240318174348-3f6244eb34fd/compress/compress_test.go (about)

     1  package compress_test
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"fmt"
     7  	"io"
     8  	"io/ioutil"
     9  	"math/rand"
    10  	"net"
    11  	"os"
    12  	"path/filepath"
    13  	"strconv"
    14  	"testing"
    15  	"text/tabwriter"
    16  	"time"
    17  
    18  	gz "github.com/klauspost/compress/gzip"
    19  	"github.com/segmentio/kafka-go"
    20  	pkg "github.com/segmentio/kafka-go/compress"
    21  	"github.com/segmentio/kafka-go/compress/gzip"
    22  	"github.com/segmentio/kafka-go/compress/lz4"
    23  	"github.com/segmentio/kafka-go/compress/snappy"
    24  	"github.com/segmentio/kafka-go/compress/zstd"
    25  	ktesting "github.com/segmentio/kafka-go/testing"
    26  )
    27  
    28  func init() {
    29  	// Seeding the random source is important to prevent multiple test runs from
    30  	// reusing the same topic names.
    31  	rand.Seed(time.Now().UnixNano())
    32  }
    33  
    34  func TestCodecs(t *testing.T) {
    35  	for i, c := range pkg.Codecs {
    36  		if c != nil {
    37  			if code := c.Code(); int8(code) != int8(i) {
    38  				t.Fatal("default compression codec table is misconfigured for", c.Name())
    39  			}
    40  		}
    41  	}
    42  }
    43  
    44  func TestCompression(t *testing.T) {
    45  	msg := kafka.Message{
    46  		Value: []byte("message"),
    47  	}
    48  
    49  	testEncodeDecode(t, msg, new(gzip.Codec))
    50  	testEncodeDecode(t, msg, new(snappy.Codec))
    51  	testEncodeDecode(t, msg, new(lz4.Codec))
    52  	if ktesting.KafkaIsAtLeast("2.1.0") {
    53  		testEncodeDecode(t, msg, new(zstd.Codec))
    54  	}
    55  }
    56  
    57  func compress(codec pkg.Codec, src []byte) ([]byte, error) {
    58  	b := new(bytes.Buffer)
    59  	r := bytes.NewReader(src)
    60  	w := codec.NewWriter(b)
    61  	if _, err := io.Copy(w, r); err != nil {
    62  		w.Close()
    63  		return nil, err
    64  	}
    65  	if err := w.Close(); err != nil {
    66  		return nil, err
    67  	}
    68  	return b.Bytes(), nil
    69  }
    70  
    71  func decompress(codec pkg.Codec, src []byte) ([]byte, error) {
    72  	b := new(bytes.Buffer)
    73  	r := codec.NewReader(bytes.NewReader(src))
    74  	if _, err := io.Copy(b, r); err != nil {
    75  		r.Close()
    76  		return nil, err
    77  	}
    78  	if err := r.Close(); err != nil {
    79  		return nil, err
    80  	}
    81  	return b.Bytes(), nil
    82  }
    83  
    84  func testEncodeDecode(t *testing.T, m kafka.Message, codec pkg.Codec) {
    85  	var r1, r2 []byte
    86  	var err error
    87  
    88  	t.Run("text format of "+codec.Name(), func(t *testing.T) {
    89  		c := pkg.Compression(codec.Code())
    90  		a := strconv.Itoa(int(c))
    91  		x := pkg.Compression(-1)
    92  		y := pkg.Compression(-1)
    93  		b, err := c.MarshalText()
    94  		if err != nil {
    95  			t.Fatal(err)
    96  		}
    97  
    98  		if err := x.UnmarshalText([]byte(a)); err != nil {
    99  			t.Fatal(err)
   100  		}
   101  		if err := y.UnmarshalText(b); err != nil {
   102  			t.Fatal(err)
   103  		}
   104  
   105  		if x != c {
   106  			t.Errorf("compression mismatch after marshal/unmarshal: want=%s got=%s", c, x)
   107  		}
   108  		if y != c {
   109  			t.Errorf("compression mismatch after marshal/unmarshal: want=%s got=%s", c, y)
   110  		}
   111  	})
   112  
   113  	t.Run("encode with "+codec.Name(), func(t *testing.T) {
   114  		r1, err = compress(codec, m.Value)
   115  		if err != nil {
   116  			t.Fatal(err)
   117  		}
   118  	})
   119  
   120  	t.Run("decode with "+codec.Name(), func(t *testing.T) {
   121  		if r1 == nil {
   122  			if r1, err = compress(codec, m.Value); err != nil {
   123  				t.Fatal(err)
   124  			}
   125  		}
   126  		r2, err = decompress(codec, r1)
   127  		if err != nil {
   128  			t.Fatal(err)
   129  		}
   130  		if string(r2) != "message" {
   131  			t.Error("bad message")
   132  			t.Logf("expected: %q", string(m.Value))
   133  			t.Logf("got:      %q", string(r2))
   134  		}
   135  	})
   136  }
   137  
   138  func TestCompressedMessages(t *testing.T) {
   139  	testCompressedMessages(t, new(gzip.Codec))
   140  	testCompressedMessages(t, new(snappy.Codec))
   141  	testCompressedMessages(t, new(lz4.Codec))
   142  
   143  	if ktesting.KafkaIsAtLeast("2.1.0") {
   144  		testCompressedMessages(t, new(zstd.Codec))
   145  	}
   146  }
   147  
   148  func testCompressedMessages(t *testing.T, codec pkg.Codec) {
   149  	t.Run(codec.Name(), func(t *testing.T) {
   150  		client, topic, shutdown := newLocalClientAndTopic()
   151  		defer shutdown()
   152  
   153  		w := &kafka.Writer{
   154  			Addr:         kafka.TCP("127.0.0.1:9092"),
   155  			Topic:        topic,
   156  			Compression:  kafka.Compression(codec.Code()),
   157  			BatchTimeout: 10 * time.Millisecond,
   158  			Transport:    client.Transport,
   159  		}
   160  		defer w.Close()
   161  
   162  		offset := 0
   163  		var values []string
   164  		for i := 0; i < 10; i++ {
   165  			batch := make([]kafka.Message, i+1)
   166  			for j := range batch {
   167  				value := fmt.Sprintf("Hello World %d!", offset)
   168  				values = append(values, value)
   169  				batch[j] = kafka.Message{
   170  					Key:   []byte(strconv.Itoa(offset)),
   171  					Value: []byte(value),
   172  				}
   173  				offset++
   174  			}
   175  			ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
   176  			if err := w.WriteMessages(ctx, batch...); err != nil {
   177  				t.Errorf("error sending batch %d, reason: %+v", i+1, err)
   178  			}
   179  			cancel()
   180  		}
   181  
   182  		r := kafka.NewReader(kafka.ReaderConfig{
   183  			Brokers:   []string{"127.0.0.1:9092"},
   184  			Topic:     topic,
   185  			Partition: 0,
   186  			MaxWait:   10 * time.Millisecond,
   187  			MinBytes:  1,
   188  			MaxBytes:  1024,
   189  		})
   190  		defer r.Close()
   191  
   192  		ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
   193  		defer cancel()
   194  
   195  		// in order to ensure proper handling of decompressing message, read at
   196  		// offsets that we know to be in the middle of compressed message sets.
   197  		for base := range values {
   198  			r.SetOffset(int64(base))
   199  			for i := base; i < len(values); i++ {
   200  				msg, err := r.ReadMessage(ctx)
   201  				if err != nil {
   202  					t.Fatalf("error receiving message at loop %d, offset %d, reason: %+v", base, i, err)
   203  				}
   204  				if msg.Offset != int64(i) {
   205  					t.Fatalf("wrong offset at loop %d...expected %d but got %d", base, i, msg.Offset)
   206  				}
   207  				if strconv.Itoa(i) != string(msg.Key) {
   208  					t.Fatalf("wrong message key at loop %d...expected %d but got %s", base, i, string(msg.Key))
   209  				}
   210  				if values[i] != string(msg.Value) {
   211  					t.Fatalf("wrong message value at loop %d...expected %s but got %s", base, values[i], string(msg.Value))
   212  				}
   213  			}
   214  		}
   215  	})
   216  }
   217  
   218  func TestMixedCompressedMessages(t *testing.T) {
   219  	client, topic, shutdown := newLocalClientAndTopic()
   220  	defer shutdown()
   221  
   222  	offset := 0
   223  	var values []string
   224  	produce := func(n int, codec pkg.Codec) {
   225  		w := &kafka.Writer{
   226  			Addr:      kafka.TCP("127.0.0.1:9092"),
   227  			Topic:     topic,
   228  			Transport: client.Transport,
   229  		}
   230  		defer w.Close()
   231  
   232  		if codec != nil {
   233  			w.Compression = kafka.Compression(codec.Code())
   234  		}
   235  
   236  		msgs := make([]kafka.Message, n)
   237  		for i := range msgs {
   238  			value := fmt.Sprintf("Hello World %d!", offset)
   239  			values = append(values, value)
   240  			offset++
   241  			msgs[i] = kafka.Message{Value: []byte(value)}
   242  		}
   243  
   244  		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
   245  		defer cancel()
   246  		if err := w.WriteMessages(ctx, msgs...); err != nil {
   247  			t.Errorf("failed to produce messages: %+v", err)
   248  		}
   249  	}
   250  
   251  	// produce messages that interleave uncompressed messages and messages with
   252  	// different compression codecs.  reader should be able to properly handle
   253  	// all of them.
   254  	produce(10, nil)
   255  	produce(20, new(gzip.Codec))
   256  	produce(5, nil)
   257  	produce(10, new(snappy.Codec))
   258  	produce(10, new(lz4.Codec))
   259  	produce(5, nil)
   260  
   261  	r := kafka.NewReader(kafka.ReaderConfig{
   262  		Brokers:   []string{"127.0.0.1:9092"},
   263  		Topic:     topic,
   264  		Partition: 0,
   265  		MaxWait:   10 * time.Millisecond,
   266  		MinBytes:  1,
   267  		MaxBytes:  1024,
   268  	})
   269  	defer r.Close()
   270  
   271  	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
   272  	defer cancel()
   273  
   274  	// in order to ensure proper handling of decompressing message, read at
   275  	// offsets that we know to be in the middle of compressed message sets.
   276  	for base := range values {
   277  		r.SetOffset(int64(base))
   278  		for i := base; i < len(values); i++ {
   279  			msg, err := r.ReadMessage(ctx)
   280  			if err != nil {
   281  				t.Errorf("error receiving message at loop %d, offset %d, reason: %+v", base, i, err)
   282  			}
   283  			if msg.Offset != int64(i) {
   284  				t.Errorf("wrong offset at loop %d...expected %d but got %d", base, i, msg.Offset)
   285  			}
   286  			if values[i] != string(msg.Value) {
   287  				t.Errorf("wrong message value at loop %d...expected %s but got %s", base, values[i], string(msg.Value))
   288  			}
   289  		}
   290  	}
   291  }
   292  
   293  type noopCodec struct{}
   294  
   295  func (noopCodec) Code() int8 {
   296  	return 0
   297  }
   298  
   299  func (noopCodec) Name() string {
   300  	return "none"
   301  }
   302  
   303  func (noopCodec) NewReader(r io.Reader) io.ReadCloser {
   304  	return ioutil.NopCloser(r)
   305  }
   306  
   307  func (noopCodec) NewWriter(w io.Writer) io.WriteCloser {
   308  	return nopWriteCloser{w}
   309  }
   310  
   311  type nopWriteCloser struct{ io.Writer }
   312  
   313  func (nopWriteCloser) Close() error { return nil }
   314  
   315  func BenchmarkCompression(b *testing.B) {
   316  	benchmarks := []struct {
   317  		codec    pkg.Codec
   318  		function func(*testing.B, pkg.Codec, *bytes.Buffer, []byte) float64
   319  	}{
   320  		{
   321  			codec:    &noopCodec{},
   322  			function: benchmarkCompression,
   323  		},
   324  		{
   325  			codec:    new(gzip.Codec),
   326  			function: benchmarkCompression,
   327  		},
   328  		{
   329  			codec:    new(snappy.Codec),
   330  			function: benchmarkCompression,
   331  		},
   332  		{
   333  			codec:    new(lz4.Codec),
   334  			function: benchmarkCompression,
   335  		},
   336  		{
   337  			codec:    new(zstd.Codec),
   338  			function: benchmarkCompression,
   339  		},
   340  	}
   341  
   342  	f, err := os.Open(filepath.Join(os.Getenv("GOROOT"), "src/encoding/json/testdata/code.json.gz"))
   343  	if err != nil {
   344  		b.Fatal(err)
   345  	}
   346  	defer f.Close()
   347  
   348  	z, err := gz.NewReader(f)
   349  	if err != nil {
   350  		b.Fatal(err)
   351  	}
   352  
   353  	payload, err := ioutil.ReadAll(z)
   354  	if err != nil {
   355  		b.Fatal(err)
   356  	}
   357  
   358  	buffer := bytes.Buffer{}
   359  	buffer.Grow(len(payload))
   360  
   361  	ts := &bytes.Buffer{}
   362  	tw := tabwriter.NewWriter(ts, 0, 8, 0, '\t', 0)
   363  	defer func() {
   364  		tw.Flush()
   365  		fmt.Printf("input => %.2f MB\n", float64(len(payload))/(1024*1024))
   366  		fmt.Println(ts)
   367  	}()
   368  
   369  	for i := range benchmarks {
   370  		benchmark := &benchmarks[i]
   371  		ratio := 0.0
   372  
   373  		b.Run(benchmark.codec.Name(), func(b *testing.B) {
   374  			ratio = benchmark.function(b, benchmark.codec, &buffer, payload)
   375  		})
   376  
   377  		fmt.Fprintf(tw, "  %s:\t%.2f%%\n", benchmark.codec.Name(), 100*ratio)
   378  	}
   379  }
   380  
   381  func benchmarkCompression(b *testing.B, codec pkg.Codec, buf *bytes.Buffer, payload []byte) float64 {
   382  	// In case only the decompression benchmark are run, we use this flags to
   383  	// detect whether we have to compress the payload before the decompression
   384  	// benchmarks.
   385  	compressed := false
   386  
   387  	b.Run("compress", func(b *testing.B) {
   388  		compressed = true
   389  		r := bytes.NewReader(payload)
   390  		b.ReportAllocs()
   391  
   392  		for i := 0; i < b.N; i++ {
   393  			buf.Reset()
   394  			r.Reset(payload)
   395  			w := codec.NewWriter(buf)
   396  
   397  			_, err := io.Copy(w, r)
   398  			if err != nil {
   399  				b.Fatal(err)
   400  			}
   401  			if err := w.Close(); err != nil {
   402  				b.Fatal(err)
   403  			}
   404  		}
   405  
   406  		b.SetBytes(int64(buf.Len()))
   407  	})
   408  
   409  	if !compressed {
   410  		r := bytes.NewReader(payload)
   411  		w := codec.NewWriter(buf)
   412  
   413  		_, err := io.Copy(w, r)
   414  		if err != nil {
   415  			b.Fatal(err)
   416  		}
   417  		if err := w.Close(); err != nil {
   418  			b.Fatal(err)
   419  		}
   420  	}
   421  
   422  	b.Run("decompress", func(b *testing.B) {
   423  		c := bytes.NewReader(buf.Bytes())
   424  		b.ReportAllocs()
   425  		for i := 0; i < b.N; i++ {
   426  			c.Reset(buf.Bytes())
   427  			r := codec.NewReader(c)
   428  
   429  			n, err := io.Copy(ioutil.Discard, r)
   430  			if err != nil {
   431  				b.Fatal(err)
   432  			}
   433  			if err := r.Close(); err != nil {
   434  				b.Fatal(err)
   435  			}
   436  
   437  			b.SetBytes(n)
   438  		}
   439  	})
   440  
   441  	return 1 - (float64(buf.Len()) / float64(len(payload)))
   442  }
   443  
   444  func init() {
   445  	rand.Seed(time.Now().UnixNano())
   446  }
   447  
   448  func makeTopic() string {
   449  	return fmt.Sprintf("kafka-go-%016x", rand.Int63())
   450  }
   451  
   452  func newLocalClientAndTopic() (*kafka.Client, string, func()) {
   453  	topic := makeTopic()
   454  	client, shutdown := newLocalClient()
   455  
   456  	_, err := client.CreateTopics(context.Background(), &kafka.CreateTopicsRequest{
   457  		Topics: []kafka.TopicConfig{{
   458  			Topic:             topic,
   459  			NumPartitions:     1,
   460  			ReplicationFactor: 1,
   461  		}},
   462  	})
   463  	if err != nil {
   464  		shutdown()
   465  		panic(err)
   466  	}
   467  
   468  	// Topic creation seems to be asynchronous. Metadata for the topic partition
   469  	// layout in the cluster is available in the controller before being synced
   470  	// with the other brokers, which causes "Error:[3] Unknown Topic Or Partition"
   471  	// when sending requests to the partition leaders.
   472  	for i := 0; i < 20; i++ {
   473  		r, err := client.Fetch(context.Background(), &kafka.FetchRequest{
   474  			Topic:     topic,
   475  			Partition: 0,
   476  			Offset:    0,
   477  		})
   478  		if err == nil && r.Error == nil {
   479  			break
   480  		}
   481  		time.Sleep(100 * time.Millisecond)
   482  	}
   483  
   484  	return client, topic, func() {
   485  		client.DeleteTopics(context.Background(), &kafka.DeleteTopicsRequest{
   486  			Topics: []string{topic},
   487  		})
   488  		shutdown()
   489  	}
   490  }
   491  
   492  func newLocalClient() (*kafka.Client, func()) {
   493  	return newClient(kafka.TCP("127.0.0.1:9092"))
   494  }
   495  
   496  func newClient(addr net.Addr) (*kafka.Client, func()) {
   497  	conns := &ktesting.ConnWaitGroup{
   498  		DialFunc: (&net.Dialer{}).DialContext,
   499  	}
   500  
   501  	transport := &kafka.Transport{
   502  		Dial: conns.Dial,
   503  	}
   504  
   505  	client := &kafka.Client{
   506  		Addr:      addr,
   507  		Timeout:   5 * time.Second,
   508  		Transport: transport,
   509  	}
   510  
   511  	return client, func() { transport.CloseIdleConnections(); conns.Wait() }
   512  }