github.com/hamba/avro@v1.8.0/ocf/ocf_test.go (about) 1 package ocf_test 2 3 import ( 4 "bytes" 5 "compress/flate" 6 "errors" 7 "os" 8 "testing" 9 10 "github.com/hamba/avro" 11 "github.com/hamba/avro/ocf" 12 "github.com/stretchr/testify/assert" 13 "github.com/stretchr/testify/require" 14 ) 15 16 var schema = `{ 17 "type":"record", 18 "name":"FullRecord", 19 "namespace":"org.hamba.avro", 20 "fields":[ 21 {"name":"strings","type":{"type":"array","items":"string"}}, 22 {"name":"longs","type":{"type":"array","items":"long"}}, 23 {"name":"enum","type":{"type":"enum","name":"foo","symbols":["A","B","C","D"]}}, 24 {"name":"map","type":{"type":"map","values":"int"}}, 25 {"name":"nullable","type":["null","string"]}, 26 {"name":"fixed","type":{"type":"fixed","name":"md5","size":16}}, 27 {"name":"record","type":{ 28 "type":"record", 29 "name":"TestRecord", 30 "fields":[ 31 {"name":"long","type":"long"}, 32 {"name":"string","type":"string"}, 33 {"name":"int","type":"int"}, 34 {"name":"float","type":"float"}, 35 {"name":"double","type":"double"}, 36 {"name":"bool","type":"boolean"} 37 ] 38 }} 39 ] 40 }` 41 42 type FullRecord struct { 43 Strings []string `avro:"strings"` 44 Longs []int64 `avro:"longs"` 45 Enum string `avro:"enum"` 46 Map map[string]int `avro:"map"` 47 Nullable *string `avro:"nullable"` 48 Fixed [16]byte `avro:"fixed"` 49 Record *TestRecord `avro:"record"` 50 } 51 52 type TestRecord struct { 53 Long int64 `avro:"long"` 54 String string `avro:"string"` 55 Int int32 `avro:"int"` 56 Float float32 `avro:"float"` 57 Double float64 `avro:"double"` 58 Bool bool `avro:"bool"` 59 } 60 61 func TestNewDecoder_InvalidHeader(t *testing.T) { 62 data := []byte{'O', 'b', 'j'} 63 64 _, err := ocf.NewDecoder(bytes.NewReader(data)) 65 66 assert.Error(t, err) 67 } 68 69 func TestNewDecoder_InvalidMagic(t *testing.T) { 70 data := []byte{'f', 'o', 'o', 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} 71 72 _, err := ocf.NewDecoder(bytes.NewReader(data)) 73 74 assert.Error(t, err) 75 } 76 77 func TestNewDecoder_InvalidSchema(t *testing.T) { 78 data := []byte{'O', 'b', 'j', 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} 79 80 _, err := ocf.NewDecoder(bytes.NewReader(data)) 81 82 assert.Error(t, err) 83 } 84 85 func TestNewDecoder_InvalidCodec(t *testing.T) { 86 data := []byte{'O', 'b', 'j', 0x1, 0x3, 0x4c, 0x16, 'a', 'v', 'r', 'o', '.', 's', 'c', 'h', 'e', 'm', 'a', 0xc, 0x22, 'l', 'o', 'n', 'g', 87 0x22, 0x14, 'a', 'v', 'r', 'o', 0x2e, 'c', 'o', 'd', 'e', 'c', 0xe, 'd', 'e', 'a', 'l', 'a', 't', 'e', 0x0, 88 0x72, 0xce, 0x78, 0x7, 0x35, 0x81, 0xb0, 0x80, 0x77, 0x59, 0xa9, 0x83, 0xaf, 0x90, 0x3e, 0xaf, 89 } 90 91 _, err := ocf.NewDecoder(bytes.NewReader(data)) 92 93 assert.Error(t, err) 94 } 95 96 func TestDecoder(t *testing.T) { 97 unionStr := "union value" 98 want := FullRecord{ 99 Strings: []string{"string1", "string2", "string3", "string4", "string5"}, 100 Longs: []int64{1, 2, 3, 4, 5}, 101 Enum: "C", 102 Map: map[string]int{ 103 "key1": 1, 104 "key2": 2, 105 "key3": 3, 106 "key4": 4, 107 "key5": 5, 108 }, 109 Nullable: &unionStr, 110 Fixed: [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04}, 111 Record: &TestRecord{ 112 Long: 1925639126735, 113 String: "I am a test record", 114 Int: 666, 115 Float: 7171.17, 116 Double: 916734926348163.01973408746523, 117 Bool: true, 118 }, 119 } 120 121 f, err := os.Open("../testdata/full.avro") 122 if err != nil { 123 t.Error(err) 124 return 125 } 126 t.Cleanup(func() { _ = f.Close() }) 127 128 dec, err := ocf.NewDecoder(f) 129 if err != nil { 130 t.Error(err) 131 return 132 } 133 134 var count int 135 for dec.HasNext() { 136 count++ 137 var got FullRecord 138 err = dec.Decode(&got) 139 140 assert.NoError(t, err) 141 assert.Equal(t, want, got) 142 } 143 144 assert.NoError(t, dec.Error()) 145 assert.Equal(t, 1, count) 146 } 147 148 func TestDecoderDeflate(t *testing.T) { 149 unionStr := "union value" 150 want := FullRecord{ 151 Strings: []string{"string1", "string2", "string3", "string4", "string5"}, 152 Longs: []int64{1, 2, 3, 4, 5}, 153 Enum: "C", 154 Map: map[string]int{ 155 "key1": 1, 156 "key2": 2, 157 "key3": 3, 158 "key4": 4, 159 "key5": 5, 160 }, 161 Nullable: &unionStr, 162 Fixed: [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04}, 163 Record: &TestRecord{ 164 Long: 1925639126735, 165 String: "I am a test record", 166 Int: 666, 167 Float: 7171.17, 168 Double: 916734926348163.01973408746523, 169 Bool: true, 170 }, 171 } 172 173 f, err := os.Open("../testdata/full-deflate.avro") 174 if err != nil { 175 t.Error(err) 176 return 177 } 178 t.Cleanup(func() { _ = f.Close() }) 179 180 dec, err := ocf.NewDecoder(f) 181 if err != nil { 182 t.Error(err) 183 return 184 } 185 186 var count int 187 for dec.HasNext() { 188 count++ 189 var got FullRecord 190 err = dec.Decode(&got) 191 192 assert.NoError(t, err) 193 assert.Equal(t, want, got) 194 } 195 196 assert.NoError(t, dec.Error()) 197 assert.Equal(t, 1, count) 198 } 199 200 func TestDecoderDeflate_InvalidData(t *testing.T) { 201 f, err := os.Open("../testdata/deflate-invalid-data.avro") 202 if err != nil { 203 t.Error(err) 204 return 205 } 206 t.Cleanup(func() { _ = f.Close() }) 207 208 dec, err := ocf.NewDecoder(f) 209 if err != nil { 210 t.Error(err) 211 return 212 } 213 214 dec.HasNext() 215 216 assert.Error(t, dec.Error()) 217 } 218 219 func TestDecoderSnappy(t *testing.T) { 220 unionStr := "union value" 221 want := FullRecord{ 222 Strings: []string{"string1", "string2", "string3", "string4", "string5"}, 223 Longs: []int64{1, 2, 3, 4, 5}, 224 Enum: "C", 225 Map: map[string]int{ 226 "key1": 1, 227 "key2": 2, 228 "key3": 3, 229 "key4": 4, 230 "key5": 5, 231 }, 232 Nullable: &unionStr, 233 Fixed: [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04}, 234 Record: &TestRecord{ 235 Long: 1925639126735, 236 String: "I am a test record", 237 Int: 666, 238 Float: 7171.17, 239 Double: 916734926348163.01973408746523, 240 Bool: true, 241 }, 242 } 243 244 f, err := os.Open("../testdata/full-snappy.avro") 245 if err != nil { 246 t.Error(err) 247 return 248 } 249 t.Cleanup(func() { _ = f.Close() }) 250 251 dec, err := ocf.NewDecoder(f) 252 if err != nil { 253 t.Error(err) 254 return 255 } 256 257 var count int 258 for dec.HasNext() { 259 count++ 260 var got FullRecord 261 err = dec.Decode(&got) 262 263 assert.NoError(t, err) 264 assert.Equal(t, want, got) 265 } 266 267 assert.NoError(t, dec.Error()) 268 assert.Equal(t, 1, count) 269 } 270 271 func TestDecoderSnappy_InvalidData(t *testing.T) { 272 f, err := os.Open("../testdata/snappy-invalid-data.avro") 273 if err != nil { 274 t.Error(err) 275 return 276 } 277 t.Cleanup(func() { _ = f.Close() }) 278 279 dec, err := ocf.NewDecoder(f) 280 if err != nil { 281 t.Error(err) 282 return 283 } 284 285 dec.HasNext() 286 287 assert.Error(t, dec.Error()) 288 } 289 290 func TestDecoderSnappy_ShortCRC(t *testing.T) { 291 f, err := os.Open("../testdata/snappy-short-crc.avro") 292 if err != nil { 293 t.Error(err) 294 return 295 } 296 t.Cleanup(func() { _ = f.Close() }) 297 298 dec, err := ocf.NewDecoder(f) 299 if err != nil { 300 t.Error(err) 301 return 302 } 303 304 dec.HasNext() 305 306 assert.Error(t, dec.Error()) 307 } 308 309 func TestDecoderSnappy_InvalidCRC(t *testing.T) { 310 f, err := os.Open("../testdata/snappy-invalid-crc.avro") 311 if err != nil { 312 t.Error(err) 313 return 314 } 315 t.Cleanup(func() { _ = f.Close() }) 316 317 dec, err := ocf.NewDecoder(f) 318 if err != nil { 319 t.Error(err) 320 return 321 } 322 323 dec.HasNext() 324 325 assert.Error(t, dec.Error()) 326 } 327 328 func TestDecoder_DecodeAvroError(t *testing.T) { 329 data := []byte{'O', 'b', 'j', 0x01, 0x01, 0x26, 0x16, 'a', 'v', 'r', 'o', '.', 's', 'c', 'h', 'e', 'm', 'a', 330 0x0c, '"', 'l', 'o', 'n', 'g', '"', 0x00, 0xfb, 0x2b, 0x0f, 0x1a, 0xdd, 0xfd, 0x90, 0x7d, 0x87, 0x12, 331 0x15, 0x29, 0xd7, 0x1d, 0x1c, 0xdd, 0x02, 0x16, 0xe2, 0xa2, 0xf3, 0xad, 0xad, 0xad, 0xe2, 0xa2, 0xf3, 332 0xad, 0xad, 0xfb, 0x2b, 0x0f, 0x1a, 0xdd, 0xfd, 0x90, 0x7d, 0x87, 0x12, 0x15, 0x29, 0xd7, 0x1d, 0x1c, 0xdd, 333 } 334 335 dec, _ := ocf.NewDecoder(bytes.NewReader(data)) 336 _ = dec.HasNext() 337 338 var l int64 339 err := dec.Decode(&l) 340 341 assert.Error(t, err) 342 } 343 344 func TestDecoder_DecodeMustCallHasNext(t *testing.T) { 345 data := []byte{'O', 'b', 'j', 0x01, 0x01, 0x26, 0x16, 'a', 'v', 'r', 'o', '.', 's', 'c', 'h', 'e', 'm', 'a', 346 0x0c, '"', 'l', 'o', 'n', 'g', '"', 0x00, 0xfb, 0x2b, 0x0f, 0x1a, 0xdd, 0xfd, 0x90, 0x7d, 0x87, 0x12, 347 0x15, 0x29, 0xd7, 0x1d, 0x1c, 0xdd, 0x02, 0x02, 0x02, 0xfb, 0x2b, 0x0f, 0x1a, 0xdd, 0xfd, 0x90, 0x7d, 348 0x87, 0x12, 0x15, 0x29, 0xd7, 0x1d, 0x1c, 0xdd, 349 } 350 351 dec, _ := ocf.NewDecoder(bytes.NewReader(data)) 352 353 var l int64 354 err := dec.Decode(&l) 355 356 assert.Error(t, err) 357 } 358 359 func TestDecoder_InvalidBlock(t *testing.T) { 360 data := []byte{'O', 'b', 'j', 0x01, 0x01, 0x26, 0x16, 'a', 'v', 'r', 'o', '.', 's', 'c', 'h', 'e', 'm', 'a', 361 0x0c, '"', 'l', 'o', 'n', 'g', '"', 0x00, 0xfa, 0x2b, 0x0f, 0x1a, 0xdd, 0xfd, 0x90, 0x7d, 0x87, 0x12, 362 0x15, 0x29, 0xd7, 0x1d, 0x1c, 0xdd, 0x02, 0x02, 0x02, 0xfb, 0x2b, 0x0f, 0x1a, 0xdd, 0xfd, 0x90, 0x7d, 363 0x87, 0x12, 0x15, 0x29, 0xd7, 0x1d, 0x1c, 0xdd, 364 } 365 366 dec, _ := ocf.NewDecoder(bytes.NewReader(data)) 367 368 got := dec.HasNext() 369 370 assert.False(t, got) 371 assert.Error(t, dec.Error()) 372 } 373 374 func TestNewEncoder_InvalidSchema(t *testing.T) { 375 buf := &bytes.Buffer{} 376 377 _, err := ocf.NewEncoder(``, buf) 378 379 assert.Error(t, err) 380 } 381 382 func TestNewEncoder_InvalidCodec(t *testing.T) { 383 buf := &bytes.Buffer{} 384 385 _, err := ocf.NewEncoder(`"long"`, buf, ocf.WithCodec(ocf.CodecName("test"))) 386 387 assert.Error(t, err) 388 } 389 390 func TestEncoder(t *testing.T) { 391 unionStr := "union value" 392 record := FullRecord{ 393 Strings: []string{"string1", "string2", "string3", "string4", "string5"}, 394 Longs: []int64{1, 2, 3, 4, 5}, 395 Enum: "C", 396 Map: map[string]int{ 397 "key1": 1, 398 "key2": 2, 399 "key3": 3, 400 "key4": 4, 401 "key5": 5, 402 }, 403 Nullable: &unionStr, 404 Fixed: [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04}, 405 Record: &TestRecord{ 406 Long: 1925639126735, 407 String: "I am a test record", 408 Int: 666, 409 Float: 7171.17, 410 Double: 916734926348163.01973408746523, 411 Bool: true, 412 }, 413 } 414 415 buf := &bytes.Buffer{} 416 enc, err := ocf.NewEncoder(schema, buf) 417 if err != nil { 418 t.Error(err) 419 return 420 } 421 422 err = enc.Encode(record) 423 assert.NoError(t, err) 424 425 err = enc.Close() 426 assert.NoError(t, err) 427 } 428 429 func TestEncoder_Write(t *testing.T) { 430 unionStr := "union value" 431 record := FullRecord{ 432 Strings: []string{"string1", "string2", "string3", "string4", "string5"}, 433 Longs: []int64{1, 2, 3, 4, 5}, 434 Enum: "C", 435 Map: map[string]int{ 436 "key1": 1, 437 "key2": 2, 438 "key3": 3, 439 "key4": 4, 440 "key5": 5, 441 }, 442 Nullable: &unionStr, 443 Fixed: [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04}, 444 Record: &TestRecord{ 445 Long: 1925639126735, 446 String: "I am a test record", 447 Int: 666, 448 Float: 7171.17, 449 Double: 916734926348163.01973408746523, 450 Bool: true, 451 }, 452 } 453 454 buf := &bytes.Buffer{} 455 enc, err := ocf.NewEncoder(schema, buf) 456 require.NoError(t, err) 457 458 encodedBytes, err := avro.Marshal(avro.MustParse(schema), record) 459 require.NoError(t, err) 460 461 n, err := enc.Write(encodedBytes) 462 require.NoError(t, err) 463 464 err = enc.Close() 465 require.NoError(t, err) 466 467 require.Equal(t, n, len(encodedBytes)) 468 require.Equal(t, 957, buf.Len()) 469 } 470 471 func TestEncoder_EncodeCompressesDeflate(t *testing.T) { 472 unionStr := "union value" 473 record := FullRecord{ 474 Strings: []string{"string1", "string2", "string3", "string4", "string5"}, 475 Longs: []int64{1, 2, 3, 4, 5}, 476 Enum: "C", 477 Map: map[string]int{ 478 "key1": 1, 479 "key2": 2, 480 "key3": 3, 481 "key4": 4, 482 "key5": 5, 483 }, 484 Nullable: &unionStr, 485 Fixed: [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04}, 486 Record: &TestRecord{ 487 Long: 1925639126735, 488 String: "I am a test record", 489 Int: 666, 490 Float: 7171.17, 491 Double: 916734926348163.01973408746523, 492 Bool: true, 493 }, 494 } 495 496 buf := &bytes.Buffer{} 497 enc, _ := ocf.NewEncoder(schema, buf, ocf.WithCodec(ocf.Deflate)) 498 499 err := enc.Encode(record) 500 assert.NoError(t, err) 501 502 err = enc.Close() 503 assert.NoError(t, err) 504 505 assert.Equal(t, 926, buf.Len()) 506 } 507 508 func TestEncoder_EncodeCompressesDeflateWithLevel(t *testing.T) { 509 unionStr := "union value" 510 record := FullRecord{ 511 Strings: []string{"string1", "string2", "string3", "string4", "string5"}, 512 Longs: []int64{1, 2, 3, 4, 5}, 513 Enum: "C", 514 Map: map[string]int{ 515 "key1": 1, 516 "key2": 2, 517 "key3": 3, 518 "key4": 4, 519 "key5": 5, 520 }, 521 Nullable: &unionStr, 522 Fixed: [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04}, 523 Record: &TestRecord{ 524 Long: 1925639126735, 525 String: "I am a test record", 526 Int: 666, 527 Float: 7171.17, 528 Double: 916734926348163.01973408746523, 529 Bool: true, 530 }, 531 } 532 533 buf := &bytes.Buffer{} 534 enc, _ := ocf.NewEncoder(schema, buf, ocf.WithCompressionLevel(flate.BestCompression)) 535 536 err := enc.Encode(record) 537 assert.NoError(t, err) 538 539 err = enc.Close() 540 assert.NoError(t, err) 541 542 assert.Equal(t, 926, buf.Len()) 543 } 544 545 func TestEncoder_EncodeCompressesSnappy(t *testing.T) { 546 unionStr := "union value" 547 record := FullRecord{ 548 Strings: []string{"string1", "string2", "string3", "string4", "string5"}, 549 Longs: []int64{1, 2, 3, 4, 5}, 550 Enum: "C", 551 Map: map[string]int{ 552 "key1": 1, 553 "key2": 2, 554 "key3": 3, 555 "key4": 4, 556 "key5": 5, 557 }, 558 Nullable: &unionStr, 559 Fixed: [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04}, 560 Record: &TestRecord{ 561 Long: 1925639126735, 562 String: "I am a test record", 563 Int: 666, 564 Float: 7171.17, 565 Double: 916734926348163.01973408746523, 566 Bool: true, 567 }, 568 } 569 570 buf := &bytes.Buffer{} 571 enc, _ := ocf.NewEncoder(schema, buf, ocf.WithBlockLength(1), ocf.WithCodec(ocf.Snappy)) 572 573 err := enc.Encode(record) 574 assert.NoError(t, err) 575 576 err = enc.Close() 577 assert.NoError(t, err) 578 579 assert.Equal(t, 938, buf.Len()) 580 } 581 582 func TestEncoder_EncodeError(t *testing.T) { 583 buf := &bytes.Buffer{} 584 enc, _ := ocf.NewEncoder(`"long"`, buf) 585 586 err := enc.Encode("test") 587 588 assert.Error(t, err) 589 } 590 591 func TestEncoder_EncodeWritesBlocks(t *testing.T) { 592 buf := &bytes.Buffer{} 593 enc, _ := ocf.NewEncoder(`"long"`, buf, ocf.WithBlockLength(1)) 594 t.Cleanup(func() { _ = enc.Close() }) 595 596 err := enc.Encode(int64(1)) 597 598 assert.NoError(t, err) 599 assert.Equal(t, 77, buf.Len()) 600 } 601 602 func TestEncoder_EncodeHandlesWriteBlockError(t *testing.T) { 603 w := &errorWriter{} 604 enc, _ := ocf.NewEncoder(`"long"`, w, ocf.WithBlockLength(1)) 605 t.Cleanup(func() { _ = enc.Close() }) 606 607 err := enc.Encode(int64(1)) 608 609 assert.Error(t, err) 610 } 611 612 func TestEncoder_CloseHandlesWriteBlockError(t *testing.T) { 613 w := &errorWriter{} 614 enc, _ := ocf.NewEncoder(`"long"`, w) 615 _ = enc.Encode(int64(1)) 616 617 err := enc.Close() 618 619 assert.Error(t, err) 620 } 621 622 func TestEncodeDecodeMetadata(t *testing.T) { 623 buf := &bytes.Buffer{} 624 enc, _ := ocf.NewEncoder(`"long"`, buf, ocf.WithMetadata(map[string][]byte{ 625 "test": []byte("foo"), 626 })) 627 628 err := enc.Encode(int64(1)) 629 assert.NoError(t, err) 630 631 _ = enc.Close() 632 633 dec, err := ocf.NewDecoder(buf) 634 assert.NoError(t, err) 635 636 assert.Equal(t, []byte("foo"), dec.Metadata()["test"]) 637 } 638 639 type errorWriter struct{} 640 641 func (*errorWriter) Write(p []byte) (n int, err error) { 642 return 0, errors.New("test") 643 }