github.com/segmentio/kafka-go@v0.4.48-0.20240318174348-3f6244eb34fd/compress/compress_test.go (about) 1 package compress_test 2 3 import ( 4 "bytes" 5 "context" 6 "fmt" 7 "io" 8 "io/ioutil" 9 "math/rand" 10 "net" 11 "os" 12 "path/filepath" 13 "strconv" 14 "testing" 15 "text/tabwriter" 16 "time" 17 18 gz "github.com/klauspost/compress/gzip" 19 "github.com/segmentio/kafka-go" 20 pkg "github.com/segmentio/kafka-go/compress" 21 "github.com/segmentio/kafka-go/compress/gzip" 22 "github.com/segmentio/kafka-go/compress/lz4" 23 "github.com/segmentio/kafka-go/compress/snappy" 24 "github.com/segmentio/kafka-go/compress/zstd" 25 ktesting "github.com/segmentio/kafka-go/testing" 26 ) 27 28 func init() { 29 // Seeding the random source is important to prevent multiple test runs from 30 // reusing the same topic names. 31 rand.Seed(time.Now().UnixNano()) 32 } 33 34 func TestCodecs(t *testing.T) { 35 for i, c := range pkg.Codecs { 36 if c != nil { 37 if code := c.Code(); int8(code) != int8(i) { 38 t.Fatal("default compression codec table is misconfigured for", c.Name()) 39 } 40 } 41 } 42 } 43 44 func TestCompression(t *testing.T) { 45 msg := kafka.Message{ 46 Value: []byte("message"), 47 } 48 49 testEncodeDecode(t, msg, new(gzip.Codec)) 50 testEncodeDecode(t, msg, new(snappy.Codec)) 51 testEncodeDecode(t, msg, new(lz4.Codec)) 52 if ktesting.KafkaIsAtLeast("2.1.0") { 53 testEncodeDecode(t, msg, new(zstd.Codec)) 54 } 55 } 56 57 func compress(codec pkg.Codec, src []byte) ([]byte, error) { 58 b := new(bytes.Buffer) 59 r := bytes.NewReader(src) 60 w := codec.NewWriter(b) 61 if _, err := io.Copy(w, r); err != nil { 62 w.Close() 63 return nil, err 64 } 65 if err := w.Close(); err != nil { 66 return nil, err 67 } 68 return b.Bytes(), nil 69 } 70 71 func decompress(codec pkg.Codec, src []byte) ([]byte, error) { 72 b := new(bytes.Buffer) 73 r := codec.NewReader(bytes.NewReader(src)) 74 if _, err := io.Copy(b, r); err != nil { 75 r.Close() 76 return nil, err 77 } 78 if err := r.Close(); err != nil { 79 return nil, err 80 } 81 return b.Bytes(), nil 82 } 83 84 func testEncodeDecode(t *testing.T, m kafka.Message, codec pkg.Codec) { 85 var r1, r2 []byte 86 var err error 87 88 t.Run("text format of "+codec.Name(), func(t *testing.T) { 89 c := pkg.Compression(codec.Code()) 90 a := strconv.Itoa(int(c)) 91 x := pkg.Compression(-1) 92 y := pkg.Compression(-1) 93 b, err := c.MarshalText() 94 if err != nil { 95 t.Fatal(err) 96 } 97 98 if err := x.UnmarshalText([]byte(a)); err != nil { 99 t.Fatal(err) 100 } 101 if err := y.UnmarshalText(b); err != nil { 102 t.Fatal(err) 103 } 104 105 if x != c { 106 t.Errorf("compression mismatch after marshal/unmarshal: want=%s got=%s", c, x) 107 } 108 if y != c { 109 t.Errorf("compression mismatch after marshal/unmarshal: want=%s got=%s", c, y) 110 } 111 }) 112 113 t.Run("encode with "+codec.Name(), func(t *testing.T) { 114 r1, err = compress(codec, m.Value) 115 if err != nil { 116 t.Fatal(err) 117 } 118 }) 119 120 t.Run("decode with "+codec.Name(), func(t *testing.T) { 121 if r1 == nil { 122 if r1, err = compress(codec, m.Value); err != nil { 123 t.Fatal(err) 124 } 125 } 126 r2, err = decompress(codec, r1) 127 if err != nil { 128 t.Fatal(err) 129 } 130 if string(r2) != "message" { 131 t.Error("bad message") 132 t.Logf("expected: %q", string(m.Value)) 133 t.Logf("got: %q", string(r2)) 134 } 135 }) 136 } 137 138 func TestCompressedMessages(t *testing.T) { 139 testCompressedMessages(t, new(gzip.Codec)) 140 testCompressedMessages(t, new(snappy.Codec)) 141 testCompressedMessages(t, new(lz4.Codec)) 142 143 if ktesting.KafkaIsAtLeast("2.1.0") { 144 testCompressedMessages(t, new(zstd.Codec)) 145 } 146 } 147 148 func testCompressedMessages(t *testing.T, codec pkg.Codec) { 149 t.Run(codec.Name(), func(t *testing.T) { 150 client, topic, shutdown := newLocalClientAndTopic() 151 defer shutdown() 152 153 w := &kafka.Writer{ 154 Addr: kafka.TCP("127.0.0.1:9092"), 155 Topic: topic, 156 Compression: kafka.Compression(codec.Code()), 157 BatchTimeout: 10 * time.Millisecond, 158 Transport: client.Transport, 159 } 160 defer w.Close() 161 162 offset := 0 163 var values []string 164 for i := 0; i < 10; i++ { 165 batch := make([]kafka.Message, i+1) 166 for j := range batch { 167 value := fmt.Sprintf("Hello World %d!", offset) 168 values = append(values, value) 169 batch[j] = kafka.Message{ 170 Key: []byte(strconv.Itoa(offset)), 171 Value: []byte(value), 172 } 173 offset++ 174 } 175 ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) 176 if err := w.WriteMessages(ctx, batch...); err != nil { 177 t.Errorf("error sending batch %d, reason: %+v", i+1, err) 178 } 179 cancel() 180 } 181 182 r := kafka.NewReader(kafka.ReaderConfig{ 183 Brokers: []string{"127.0.0.1:9092"}, 184 Topic: topic, 185 Partition: 0, 186 MaxWait: 10 * time.Millisecond, 187 MinBytes: 1, 188 MaxBytes: 1024, 189 }) 190 defer r.Close() 191 192 ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) 193 defer cancel() 194 195 // in order to ensure proper handling of decompressing message, read at 196 // offsets that we know to be in the middle of compressed message sets. 197 for base := range values { 198 r.SetOffset(int64(base)) 199 for i := base; i < len(values); i++ { 200 msg, err := r.ReadMessage(ctx) 201 if err != nil { 202 t.Fatalf("error receiving message at loop %d, offset %d, reason: %+v", base, i, err) 203 } 204 if msg.Offset != int64(i) { 205 t.Fatalf("wrong offset at loop %d...expected %d but got %d", base, i, msg.Offset) 206 } 207 if strconv.Itoa(i) != string(msg.Key) { 208 t.Fatalf("wrong message key at loop %d...expected %d but got %s", base, i, string(msg.Key)) 209 } 210 if values[i] != string(msg.Value) { 211 t.Fatalf("wrong message value at loop %d...expected %s but got %s", base, values[i], string(msg.Value)) 212 } 213 } 214 } 215 }) 216 } 217 218 func TestMixedCompressedMessages(t *testing.T) { 219 client, topic, shutdown := newLocalClientAndTopic() 220 defer shutdown() 221 222 offset := 0 223 var values []string 224 produce := func(n int, codec pkg.Codec) { 225 w := &kafka.Writer{ 226 Addr: kafka.TCP("127.0.0.1:9092"), 227 Topic: topic, 228 Transport: client.Transport, 229 } 230 defer w.Close() 231 232 if codec != nil { 233 w.Compression = kafka.Compression(codec.Code()) 234 } 235 236 msgs := make([]kafka.Message, n) 237 for i := range msgs { 238 value := fmt.Sprintf("Hello World %d!", offset) 239 values = append(values, value) 240 offset++ 241 msgs[i] = kafka.Message{Value: []byte(value)} 242 } 243 244 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 245 defer cancel() 246 if err := w.WriteMessages(ctx, msgs...); err != nil { 247 t.Errorf("failed to produce messages: %+v", err) 248 } 249 } 250 251 // produce messages that interleave uncompressed messages and messages with 252 // different compression codecs. reader should be able to properly handle 253 // all of them. 254 produce(10, nil) 255 produce(20, new(gzip.Codec)) 256 produce(5, nil) 257 produce(10, new(snappy.Codec)) 258 produce(10, new(lz4.Codec)) 259 produce(5, nil) 260 261 r := kafka.NewReader(kafka.ReaderConfig{ 262 Brokers: []string{"127.0.0.1:9092"}, 263 Topic: topic, 264 Partition: 0, 265 MaxWait: 10 * time.Millisecond, 266 MinBytes: 1, 267 MaxBytes: 1024, 268 }) 269 defer r.Close() 270 271 ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) 272 defer cancel() 273 274 // in order to ensure proper handling of decompressing message, read at 275 // offsets that we know to be in the middle of compressed message sets. 276 for base := range values { 277 r.SetOffset(int64(base)) 278 for i := base; i < len(values); i++ { 279 msg, err := r.ReadMessage(ctx) 280 if err != nil { 281 t.Errorf("error receiving message at loop %d, offset %d, reason: %+v", base, i, err) 282 } 283 if msg.Offset != int64(i) { 284 t.Errorf("wrong offset at loop %d...expected %d but got %d", base, i, msg.Offset) 285 } 286 if values[i] != string(msg.Value) { 287 t.Errorf("wrong message value at loop %d...expected %s but got %s", base, values[i], string(msg.Value)) 288 } 289 } 290 } 291 } 292 293 type noopCodec struct{} 294 295 func (noopCodec) Code() int8 { 296 return 0 297 } 298 299 func (noopCodec) Name() string { 300 return "none" 301 } 302 303 func (noopCodec) NewReader(r io.Reader) io.ReadCloser { 304 return ioutil.NopCloser(r) 305 } 306 307 func (noopCodec) NewWriter(w io.Writer) io.WriteCloser { 308 return nopWriteCloser{w} 309 } 310 311 type nopWriteCloser struct{ io.Writer } 312 313 func (nopWriteCloser) Close() error { return nil } 314 315 func BenchmarkCompression(b *testing.B) { 316 benchmarks := []struct { 317 codec pkg.Codec 318 function func(*testing.B, pkg.Codec, *bytes.Buffer, []byte) float64 319 }{ 320 { 321 codec: &noopCodec{}, 322 function: benchmarkCompression, 323 }, 324 { 325 codec: new(gzip.Codec), 326 function: benchmarkCompression, 327 }, 328 { 329 codec: new(snappy.Codec), 330 function: benchmarkCompression, 331 }, 332 { 333 codec: new(lz4.Codec), 334 function: benchmarkCompression, 335 }, 336 { 337 codec: new(zstd.Codec), 338 function: benchmarkCompression, 339 }, 340 } 341 342 f, err := os.Open(filepath.Join(os.Getenv("GOROOT"), "src/encoding/json/testdata/code.json.gz")) 343 if err != nil { 344 b.Fatal(err) 345 } 346 defer f.Close() 347 348 z, err := gz.NewReader(f) 349 if err != nil { 350 b.Fatal(err) 351 } 352 353 payload, err := ioutil.ReadAll(z) 354 if err != nil { 355 b.Fatal(err) 356 } 357 358 buffer := bytes.Buffer{} 359 buffer.Grow(len(payload)) 360 361 ts := &bytes.Buffer{} 362 tw := tabwriter.NewWriter(ts, 0, 8, 0, '\t', 0) 363 defer func() { 364 tw.Flush() 365 fmt.Printf("input => %.2f MB\n", float64(len(payload))/(1024*1024)) 366 fmt.Println(ts) 367 }() 368 369 for i := range benchmarks { 370 benchmark := &benchmarks[i] 371 ratio := 0.0 372 373 b.Run(benchmark.codec.Name(), func(b *testing.B) { 374 ratio = benchmark.function(b, benchmark.codec, &buffer, payload) 375 }) 376 377 fmt.Fprintf(tw, " %s:\t%.2f%%\n", benchmark.codec.Name(), 100*ratio) 378 } 379 } 380 381 func benchmarkCompression(b *testing.B, codec pkg.Codec, buf *bytes.Buffer, payload []byte) float64 { 382 // In case only the decompression benchmark are run, we use this flags to 383 // detect whether we have to compress the payload before the decompression 384 // benchmarks. 385 compressed := false 386 387 b.Run("compress", func(b *testing.B) { 388 compressed = true 389 r := bytes.NewReader(payload) 390 b.ReportAllocs() 391 392 for i := 0; i < b.N; i++ { 393 buf.Reset() 394 r.Reset(payload) 395 w := codec.NewWriter(buf) 396 397 _, err := io.Copy(w, r) 398 if err != nil { 399 b.Fatal(err) 400 } 401 if err := w.Close(); err != nil { 402 b.Fatal(err) 403 } 404 } 405 406 b.SetBytes(int64(buf.Len())) 407 }) 408 409 if !compressed { 410 r := bytes.NewReader(payload) 411 w := codec.NewWriter(buf) 412 413 _, err := io.Copy(w, r) 414 if err != nil { 415 b.Fatal(err) 416 } 417 if err := w.Close(); err != nil { 418 b.Fatal(err) 419 } 420 } 421 422 b.Run("decompress", func(b *testing.B) { 423 c := bytes.NewReader(buf.Bytes()) 424 b.ReportAllocs() 425 for i := 0; i < b.N; i++ { 426 c.Reset(buf.Bytes()) 427 r := codec.NewReader(c) 428 429 n, err := io.Copy(ioutil.Discard, r) 430 if err != nil { 431 b.Fatal(err) 432 } 433 if err := r.Close(); err != nil { 434 b.Fatal(err) 435 } 436 437 b.SetBytes(n) 438 } 439 }) 440 441 return 1 - (float64(buf.Len()) / float64(len(payload))) 442 } 443 444 func init() { 445 rand.Seed(time.Now().UnixNano()) 446 } 447 448 func makeTopic() string { 449 return fmt.Sprintf("kafka-go-%016x", rand.Int63()) 450 } 451 452 func newLocalClientAndTopic() (*kafka.Client, string, func()) { 453 topic := makeTopic() 454 client, shutdown := newLocalClient() 455 456 _, err := client.CreateTopics(context.Background(), &kafka.CreateTopicsRequest{ 457 Topics: []kafka.TopicConfig{{ 458 Topic: topic, 459 NumPartitions: 1, 460 ReplicationFactor: 1, 461 }}, 462 }) 463 if err != nil { 464 shutdown() 465 panic(err) 466 } 467 468 // Topic creation seems to be asynchronous. Metadata for the topic partition 469 // layout in the cluster is available in the controller before being synced 470 // with the other brokers, which causes "Error:[3] Unknown Topic Or Partition" 471 // when sending requests to the partition leaders. 472 for i := 0; i < 20; i++ { 473 r, err := client.Fetch(context.Background(), &kafka.FetchRequest{ 474 Topic: topic, 475 Partition: 0, 476 Offset: 0, 477 }) 478 if err == nil && r.Error == nil { 479 break 480 } 481 time.Sleep(100 * time.Millisecond) 482 } 483 484 return client, topic, func() { 485 client.DeleteTopics(context.Background(), &kafka.DeleteTopicsRequest{ 486 Topics: []string{topic}, 487 }) 488 shutdown() 489 } 490 } 491 492 func newLocalClient() (*kafka.Client, func()) { 493 return newClient(kafka.TCP("127.0.0.1:9092")) 494 } 495 496 func newClient(addr net.Addr) (*kafka.Client, func()) { 497 conns := &ktesting.ConnWaitGroup{ 498 DialFunc: (&net.Dialer{}).DialContext, 499 } 500 501 transport := &kafka.Transport{ 502 Dial: conns.Dial, 503 } 504 505 client := &kafka.Client{ 506 Addr: addr, 507 Timeout: 5 * time.Second, 508 Transport: transport, 509 } 510 511 return client, func() { transport.CloseIdleConnections(); conns.Wait() } 512 }