github.com/hamba/avro/v2@v2.22.1-0.20240518180522-aff3955acf7d/ocf/ocf_test.go (about) 1 package ocf_test 2 3 import ( 4 "bytes" 5 "compress/flate" 6 "errors" 7 "flag" 8 "io" 9 "os" 10 "testing" 11 12 "github.com/hamba/avro/v2" 13 "github.com/hamba/avro/v2/ocf" 14 "github.com/stretchr/testify/assert" 15 "github.com/stretchr/testify/require" 16 ) 17 18 var update = flag.Bool("update", false, "update the golden files") 19 20 var schema = `{ 21 "type":"record", 22 "name":"FullRecord", 23 "namespace":"org.hamba.avro", 24 "fields":[ 25 {"name":"strings","type":{"type":"array","items":"string"}}, 26 {"name":"longs","type":{"type":"array","items":"long"}}, 27 {"name":"enum","type":{"type":"enum","name":"foo","symbols":["A","B","C","D"]}}, 28 {"name":"map","type":{"type":"map","values":"int"}}, 29 {"name":"nullable","type":["null","string"]}, 30 {"name":"fixed","type":{"type":"fixed","name":"md5","size":16}}, 31 {"name":"record","type":{ 32 "type":"record", 33 "name":"TestRecord", 34 "fields":[ 35 {"name":"long","type":"long"}, 36 {"name":"string","type":"string"}, 37 {"name":"int","type":"int"}, 38 {"name":"float","type":"float"}, 39 {"name":"double","type":"double"}, 40 {"name":"bool","type":"boolean"} 41 ] 42 }} 43 ] 44 }` 45 46 type FullRecord struct { 47 Strings []string `avro:"strings"` 48 Longs []int64 `avro:"longs"` 49 Enum string `avro:"enum"` 50 Map map[string]int `avro:"map"` 51 Nullable *string `avro:"nullable"` 52 Fixed [16]byte `avro:"fixed"` 53 Record *TestRecord `avro:"record"` 54 } 55 56 type TestRecord struct { 57 Long int64 `avro:"long"` 58 String string `avro:"string"` 59 Int int32 `avro:"int"` 60 Float float32 `avro:"float"` 61 Double float64 `avro:"double"` 62 Bool bool `avro:"bool"` 63 } 64 65 func TestNewDecoder_InvalidHeader(t *testing.T) { 66 data := []byte{'O', 'b', 'j'} 67 68 _, err := ocf.NewDecoder(bytes.NewReader(data)) 69 70 assert.Error(t, err) 71 } 72 73 func TestNewDecoder_InvalidMagic(t *testing.T) { 74 data := []byte{'f', 'o', 'o', 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} 75 76 _, err := ocf.NewDecoder(bytes.NewReader(data)) 77 78 assert.Error(t, err) 79 } 80 81 func TestNewDecoder_InvalidSchema(t *testing.T) { 82 data := []byte{'O', 'b', 'j', 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} 83 84 _, err := ocf.NewDecoder(bytes.NewReader(data)) 85 86 assert.Error(t, err) 87 } 88 89 func TestNewDecoder_InvalidCodec(t *testing.T) { 90 data := []byte{'O', 'b', 'j', 0x1, 0x3, 0x4c, 0x16, 'a', 'v', 'r', 'o', '.', 's', 'c', 'h', 'e', 'm', 'a', 0xc, 0x22, 'l', 'o', 'n', 'g', 91 0x22, 0x14, 'a', 'v', 'r', 'o', 0x2e, 'c', 'o', 'd', 'e', 'c', 0xe, 'd', 'e', 'a', 'l', 'a', 't', 'e', 0x0, 92 0x72, 0xce, 0x78, 0x7, 0x35, 0x81, 0xb0, 0x80, 0x77, 0x59, 0xa9, 0x83, 0xaf, 0x90, 0x3e, 0xaf, 93 } 94 95 _, err := ocf.NewDecoder(bytes.NewReader(data)) 96 97 assert.Error(t, err) 98 } 99 100 func TestDecoder(t *testing.T) { 101 unionStr := "union value" 102 want := FullRecord{ 103 Strings: []string{"string1", "string2", "string3", "string4", "string5"}, 104 Longs: []int64{1, 2, 3, 4, 5}, 105 Enum: "C", 106 Map: map[string]int{ 107 "key1": 1, 108 "key2": 2, 109 "key3": 3, 110 "key4": 4, 111 "key5": 5, 112 }, 113 Nullable: &unionStr, 114 Fixed: [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04}, 115 Record: &TestRecord{ 116 Long: 1925639126735, 117 String: "I am a test record", 118 Int: 666, 119 Float: 7171.17, 120 Double: 916734926348163.01973408746523, 121 Bool: true, 122 }, 123 } 124 125 f, err := os.Open("testdata/full.avro") 126 if err != nil { 127 t.Error(err) 128 return 129 } 130 t.Cleanup(func() { _ = f.Close() }) 131 132 dec, err := ocf.NewDecoder(f) 133 require.NoError(t, err) 134 135 var count int 136 for dec.HasNext() { 137 count++ 138 var got FullRecord 139 err = dec.Decode(&got) 140 141 require.NoError(t, err) 142 assert.Equal(t, want, got) 143 } 144 145 require.NoError(t, dec.Error()) 146 assert.Equal(t, 1, count) 147 } 148 149 func TestDecoder_WithDeflate(t *testing.T) { 150 unionStr := "union value" 151 want := FullRecord{ 152 Strings: []string{"string1", "string2", "string3", "string4", "string5"}, 153 Longs: []int64{1, 2, 3, 4, 5}, 154 Enum: "C", 155 Map: map[string]int{ 156 "key1": 1, 157 "key2": 2, 158 "key3": 3, 159 "key4": 4, 160 "key5": 5, 161 }, 162 Nullable: &unionStr, 163 Fixed: [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04}, 164 Record: &TestRecord{ 165 Long: 1925639126735, 166 String: "I am a test record", 167 Int: 666, 168 Float: 7171.17, 169 Double: 916734926348163.01973408746523, 170 Bool: true, 171 }, 172 } 173 174 f, err := os.Open("testdata/full-deflate.avro") 175 if err != nil { 176 t.Error(err) 177 return 178 } 179 t.Cleanup(func() { _ = f.Close() }) 180 181 dec, err := ocf.NewDecoder(f) 182 require.NoError(t, err) 183 184 var count int 185 for dec.HasNext() { 186 count++ 187 var got FullRecord 188 err = dec.Decode(&got) 189 190 require.NoError(t, err) 191 assert.Equal(t, want, got) 192 } 193 194 require.NoError(t, dec.Error()) 195 assert.Equal(t, 1, count) 196 } 197 198 func TestDecoder_WithDeflateHandlesInvalidData(t *testing.T) { 199 f, err := os.Open("testdata/deflate-invalid-data.avro") 200 if err != nil { 201 t.Error(err) 202 return 203 } 204 t.Cleanup(func() { _ = f.Close() }) 205 206 dec, err := ocf.NewDecoder(f) 207 require.NoError(t, err) 208 209 dec.HasNext() 210 211 assert.Error(t, dec.Error()) 212 } 213 214 func TestDecoder_WithSnappy(t *testing.T) { 215 unionStr := "union value" 216 want := FullRecord{ 217 Strings: []string{"string1", "string2", "string3", "string4", "string5"}, 218 Longs: []int64{1, 2, 3, 4, 5}, 219 Enum: "C", 220 Map: map[string]int{ 221 "key1": 1, 222 "key2": 2, 223 "key3": 3, 224 "key4": 4, 225 "key5": 5, 226 }, 227 Nullable: &unionStr, 228 Fixed: [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04}, 229 Record: &TestRecord{ 230 Long: 1925639126735, 231 String: "I am a test record", 232 Int: 666, 233 Float: 7171.17, 234 Double: 916734926348163.01973408746523, 235 Bool: true, 236 }, 237 } 238 239 f, err := os.Open("testdata/full-snappy.avro") 240 if err != nil { 241 t.Error(err) 242 return 243 } 244 t.Cleanup(func() { _ = f.Close() }) 245 246 dec, err := ocf.NewDecoder(f) 247 require.NoError(t, err) 248 249 var count int 250 for dec.HasNext() { 251 count++ 252 var got FullRecord 253 err = dec.Decode(&got) 254 255 require.NoError(t, err) 256 assert.Equal(t, want, got) 257 } 258 259 require.NoError(t, dec.Error()) 260 assert.Equal(t, 1, count) 261 } 262 263 func TestDecoder_WithSnappyHandlesInvalidData(t *testing.T) { 264 f, err := os.Open("testdata/snappy-invalid-data.avro") 265 if err != nil { 266 t.Error(err) 267 return 268 } 269 t.Cleanup(func() { _ = f.Close() }) 270 271 dec, err := ocf.NewDecoder(f) 272 require.NoError(t, err) 273 274 dec.HasNext() 275 276 assert.Error(t, dec.Error()) 277 } 278 279 func TestDecoder_WithSnappyHandlesShortCRC(t *testing.T) { 280 f, err := os.Open("testdata/snappy-short-crc.avro") 281 if err != nil { 282 t.Error(err) 283 return 284 } 285 t.Cleanup(func() { _ = f.Close() }) 286 287 dec, err := ocf.NewDecoder(f) 288 require.NoError(t, err) 289 290 dec.HasNext() 291 292 assert.Error(t, dec.Error()) 293 } 294 295 func TestDecoder_WithSnappyHandlesInvalidCRC(t *testing.T) { 296 f, err := os.Open("testdata/snappy-invalid-crc.avro") 297 if err != nil { 298 t.Error(err) 299 return 300 } 301 t.Cleanup(func() { _ = f.Close() }) 302 303 dec, err := ocf.NewDecoder(f) 304 if err != nil { 305 t.Error(err) 306 return 307 } 308 309 dec.HasNext() 310 311 assert.Error(t, dec.Error()) 312 } 313 314 func TestDecoder_WithZStandard(t *testing.T) { 315 unionStr := "union value" 316 want := FullRecord{ 317 Strings: []string{"string1", "string2", "string3", "string4", "string5"}, 318 Longs: []int64{1, 2, 3, 4, 5}, 319 Enum: "C", 320 Map: map[string]int{ 321 "key1": 1, 322 "key2": 2, 323 "key3": 3, 324 "key4": 4, 325 "key5": 5, 326 }, 327 Nullable: &unionStr, 328 Fixed: [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04}, 329 Record: &TestRecord{ 330 Long: 1925639126735, 331 String: "I am a test record", 332 Int: 666, 333 Float: 7171.17, 334 Double: 916734926348163.01973408746523, 335 Bool: true, 336 }, 337 } 338 339 f, err := os.Open("testdata/full-zstd.avro") 340 require.NoError(t, err) 341 t.Cleanup(func() { _ = f.Close() }) 342 343 dec, err := ocf.NewDecoder(f) 344 require.NoError(t, err) 345 346 var count int 347 for dec.HasNext() { 348 count++ 349 var got FullRecord 350 err = dec.Decode(&got) 351 352 require.NoError(t, err) 353 assert.Equal(t, want, got) 354 } 355 356 require.NoError(t, dec.Error()) 357 assert.Equal(t, 1, count) 358 } 359 360 func TestDecoder_WithZStandardHandlesInvalidData(t *testing.T) { 361 f, err := os.Open("testdata/zstd-invalid-data.avro") 362 require.NoError(t, err) 363 t.Cleanup(func() { _ = f.Close() }) 364 365 dec, err := ocf.NewDecoder(f) 366 require.NoError(t, err) 367 368 dec.HasNext() 369 370 assert.Error(t, dec.Error()) 371 } 372 373 func TestDecoder_DecodeAvroError(t *testing.T) { 374 data := []byte{'O', 'b', 'j', 0x01, 0x01, 0x26, 0x16, 'a', 'v', 'r', 'o', '.', 's', 'c', 'h', 'e', 'm', 'a', 375 0x0c, '"', 'l', 'o', 'n', 'g', '"', 0x00, 0xfb, 0x2b, 0x0f, 0x1a, 0xdd, 0xfd, 0x90, 0x7d, 0x87, 0x12, 376 0x15, 0x29, 0xd7, 0x1d, 0x1c, 0xdd, 0x02, 0x16, 0xe2, 0xa2, 0xf3, 0xad, 0xad, 0xad, 0xe2, 0xa2, 0xf3, 377 0xad, 0xad, 0xfb, 0x2b, 0x0f, 0x1a, 0xdd, 0xfd, 0x90, 0x7d, 0x87, 0x12, 0x15, 0x29, 0xd7, 0x1d, 0x1c, 0xdd, 378 } 379 380 dec, _ := ocf.NewDecoder(bytes.NewReader(data)) 381 _ = dec.HasNext() 382 383 var l int64 384 err := dec.Decode(&l) 385 386 assert.Error(t, err) 387 } 388 389 func TestDecoder_DecodeMustCallHasNext(t *testing.T) { 390 data := []byte{'O', 'b', 'j', 0x01, 0x01, 0x26, 0x16, 'a', 'v', 'r', 'o', '.', 's', 'c', 'h', 'e', 'm', 'a', 391 0x0c, '"', 'l', 'o', 'n', 'g', '"', 0x00, 0xfb, 0x2b, 0x0f, 0x1a, 0xdd, 0xfd, 0x90, 0x7d, 0x87, 0x12, 392 0x15, 0x29, 0xd7, 0x1d, 0x1c, 0xdd, 0x02, 0x02, 0x02, 0xfb, 0x2b, 0x0f, 0x1a, 0xdd, 0xfd, 0x90, 0x7d, 393 0x87, 0x12, 0x15, 0x29, 0xd7, 0x1d, 0x1c, 0xdd, 394 } 395 396 dec, _ := ocf.NewDecoder(bytes.NewReader(data)) 397 398 var l int64 399 err := dec.Decode(&l) 400 401 assert.Error(t, err) 402 } 403 404 func TestDecoder_InvalidBlock(t *testing.T) { 405 data := []byte{'O', 'b', 'j', 0x01, 0x01, 0x26, 0x16, 'a', 'v', 'r', 'o', '.', 's', 'c', 'h', 'e', 'm', 'a', 406 0x0c, '"', 'l', 'o', 'n', 'g', '"', 0x00, 0xfa, 0x2b, 0x0f, 0x1a, 0xdd, 0xfd, 0x90, 0x7d, 0x87, 0x12, 407 0x15, 0x29, 0xd7, 0x1d, 0x1c, 0xdd, 0x02, 0x02, 0x02, 0xfb, 0x2b, 0x0f, 0x1a, 0xdd, 0xfd, 0x90, 0x7d, 408 0x87, 0x12, 0x15, 0x29, 0xd7, 0x1d, 0x1c, 0xdd, 409 } 410 411 dec, _ := ocf.NewDecoder(bytes.NewReader(data)) 412 413 got := dec.HasNext() 414 415 assert.False(t, got) 416 assert.Error(t, dec.Error()) 417 } 418 419 func TestNewEncoder_InvalidSchema(t *testing.T) { 420 buf := &bytes.Buffer{} 421 422 _, err := ocf.NewEncoder(``, buf) 423 424 assert.Error(t, err) 425 } 426 427 func TestNewEncoder_InvalidCodec(t *testing.T) { 428 buf := &bytes.Buffer{} 429 430 _, err := ocf.NewEncoder(`"long"`, buf, ocf.WithCodec(ocf.CodecName("test"))) 431 432 assert.Error(t, err) 433 } 434 435 func TestEncoder(t *testing.T) { 436 unionStr := "union value" 437 record := FullRecord{ 438 Strings: []string{"string1", "string2", "string3", "string4", "string5"}, 439 Longs: []int64{1, 2, 3, 4, 5}, 440 Enum: "C", 441 Map: map[string]int{ 442 "key1": 1, 443 "key2": 2, 444 "key3": 3, 445 "key4": 4, 446 "key5": 5, 447 }, 448 Nullable: &unionStr, 449 Fixed: [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04}, 450 Record: &TestRecord{ 451 Long: 1925639126735, 452 String: "I am a test record", 453 Int: 666, 454 Float: 7171.17, 455 Double: 916734926348163.01973408746523, 456 Bool: true, 457 }, 458 } 459 460 buf := &bytes.Buffer{} 461 enc, err := ocf.NewEncoder(schema, buf) 462 require.NoError(t, err) 463 464 err = enc.Encode(record) 465 require.NoError(t, err) 466 467 err = enc.Close() 468 assert.NoError(t, err) 469 } 470 471 func TestEncoder_WithEncodingConfig(t *testing.T) { 472 arrSchema := `{"type": "array", "items": "long"}` 473 syncMarker := [16]byte{0x1F, 0x1F, 0x1F, 0x1F, 0x2F, 0x2F, 0x2F, 0x2F, 0x3F, 0x3F, 0x3F, 0x3F, 0x4F, 0x4F, 0x4F, 0x4F} 474 475 skipOcfHeader := func(encoded []byte) []byte { 476 index := bytes.Index(encoded, syncMarker[:]) 477 require.False(t, index == -1) 478 return encoded[index+len(syncMarker):] // +1 for the null byte 479 } 480 481 tests := []struct { 482 name string 483 data any 484 encConfig avro.API 485 wantPayload []byte // without OCF header 486 }{ 487 { 488 name: "no encoding config", 489 data: []int64{1, 2, 3, 4, 5}, 490 wantPayload: []byte{ 491 0x2, 0x10, // OCF block header: 1 elems, 8 bytes 492 0x9, 0xA, // array block header: 5 elems, 5 bytes 493 0x2, 0x4, 0x6, 0x8, 0xA, 0x0, // array block payload with terminator 494 0x1F, 0x1F, 0x1F, 0x1F, 0x2F, 0x2F, 0x2F, 0x2F, 0x3F, 0x3F, 0x3F, 0x3F, 0x4F, 0x4F, 0x4F, 0x4F, // OCF trailing sync marker 495 }, 496 }, 497 { 498 name: "no array bytes size", 499 encConfig: avro.Config{DisableBlockSizeHeader: true}.Freeze(), 500 data: []int64{1, 2, 3, 4, 5}, 501 wantPayload: []byte{ 502 0x2, 0x0E, // OCF block header: 1 elem, 7 bytes 503 0xA, // array block header: 5 elems 504 0x2, 0x4, 0x6, 0x8, 0xA, 0x0, // array block payload with terminator 505 0x1F, 0x1F, 0x1F, 0x1F, 0x2F, 0x2F, 0x2F, 0x2F, 0x3F, 0x3F, 0x3F, 0x3F, 0x4F, 0x4F, 0x4F, 0x4F, // OCF trailing sync marker 506 }, 507 }, 508 { 509 name: "non-default array block length", 510 encConfig: avro.Config{BlockLength: 5}.Freeze(), 511 data: []int64{1, 2, 3, 4, 5, 6, 7, 8, 9}, 512 wantPayload: []byte{ 513 0x2, 0x1c, // OCF block header: 1 elems, 15 bytes 514 0x9, 0xA, // array block 1 header: 5 elems, 5 bytes 515 0x2, 0x4, 0x6, 0x8, 0xA, // array block 1 516 0x7, 0x8, // array block 2 header: 4 elems, 4 bytes 517 0xC, 0xE, 0x10, 0x12, 0x0, // array block 2 with terminator 518 0x1F, 0x1F, 0x1F, 0x1F, 0x2F, 0x2F, 0x2F, 0x2F, 0x3F, 0x3F, 0x3F, 0x3F, 0x4F, 0x4F, 0x4F, 0x4F, // OCF sync marker 519 }, 520 }, 521 } 522 523 for _, tt := range tests { 524 t.Run(tt.name, func(t *testing.T) { 525 buf := &bytes.Buffer{} 526 opts := []ocf.EncoderFunc{ocf.WithSyncBlock(syncMarker)} 527 if tt.encConfig != nil { 528 opts = append(opts, ocf.WithEncodingConfig(tt.encConfig)) 529 } 530 enc, err := ocf.NewEncoder(arrSchema, buf, opts...) 531 require.NoError(t, err) 532 533 err = enc.Encode(tt.data) 534 require.NoError(t, err) 535 536 err = enc.Close() 537 assert.NoError(t, err) 538 539 assert.Equal(t, tt.wantPayload, skipOcfHeader(buf.Bytes())) 540 }) 541 } 542 543 } 544 545 func TestEncoder_ExistingOCF(t *testing.T) { 546 record := FullRecord{ 547 Strings: []string{"another", "record"}, 548 Enum: "A", 549 Record: &TestRecord{}, 550 } 551 552 file := copyToTemp(t, "testdata/full.avro") 553 t.Cleanup(func() { 554 _ = file.Close() 555 _ = os.Remove(file.Name()) 556 }) 557 558 enc, err := ocf.NewEncoder(schema, file) 559 require.NoError(t, err) 560 561 err = enc.Encode(record) 562 require.NoError(t, err) 563 564 err = enc.Close() 565 assert.NoError(t, err) 566 567 _, err = file.Seek(0, 0) 568 require.NoError(t, err) 569 got, err := io.ReadAll(file) 570 require.NoError(t, err) 571 572 if *update { 573 err = os.WriteFile("testdata/full-appended.avro", got, 0o644) 574 require.NoError(t, err) 575 } 576 577 want, err := os.ReadFile("testdata/full-appended.avro") 578 require.NoError(t, err) 579 assert.Equal(t, want, got) 580 } 581 582 func TestEncoder_NilWriter(t *testing.T) { 583 _, err := ocf.NewEncoder(schema, nil) 584 585 assert.Error(t, err) 586 } 587 588 func TestEncoder_Write(t *testing.T) { 589 unionStr := "union value" 590 record := FullRecord{ 591 Strings: []string{"string1", "string2", "string3", "string4", "string5"}, 592 Longs: []int64{1, 2, 3, 4, 5}, 593 Enum: "C", 594 Map: map[string]int{ 595 "key1": 1, 596 "key2": 2, 597 "key3": 3, 598 "key4": 4, 599 "key5": 5, 600 }, 601 Nullable: &unionStr, 602 Fixed: [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04}, 603 Record: &TestRecord{ 604 Long: 1925639126735, 605 String: "I am a test record", 606 Int: 666, 607 Float: 7171.17, 608 Double: 916734926348163.01973408746523, 609 Bool: true, 610 }, 611 } 612 613 buf := &bytes.Buffer{} 614 enc, err := ocf.NewEncoder(schema, buf) 615 require.NoError(t, err) 616 617 encodedBytes, err := avro.Marshal(avro.MustParse(schema), record) 618 require.NoError(t, err) 619 620 n, err := enc.Write(encodedBytes) 621 require.NoError(t, err) 622 623 err = enc.Close() 624 require.NoError(t, err) 625 626 require.Equal(t, n, len(encodedBytes)) 627 require.Equal(t, 957, buf.Len()) 628 } 629 630 func TestEncoder_EncodeCompressesDeflate(t *testing.T) { 631 unionStr := "union value" 632 record := FullRecord{ 633 Strings: []string{"string1", "string2", "string3", "string4", "string5"}, 634 Longs: []int64{1, 2, 3, 4, 5}, 635 Enum: "C", 636 Map: map[string]int{ 637 "key1": 1, 638 "key2": 2, 639 "key3": 3, 640 "key4": 4, 641 "key5": 5, 642 }, 643 Nullable: &unionStr, 644 Fixed: [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04}, 645 Record: &TestRecord{ 646 Long: 1925639126735, 647 String: "I am a test record", 648 Int: 666, 649 Float: 7171.17, 650 Double: 916734926348163.01973408746523, 651 Bool: true, 652 }, 653 } 654 655 buf := &bytes.Buffer{} 656 enc, _ := ocf.NewEncoder(schema, buf, ocf.WithCodec(ocf.Deflate)) 657 658 err := enc.Encode(record) 659 assert.NoError(t, err) 660 661 err = enc.Close() 662 663 require.NoError(t, err) 664 assert.Equal(t, 926, buf.Len()) 665 } 666 667 func TestEncoder_EncodeCompressesDeflateWithLevel(t *testing.T) { 668 unionStr := "union value" 669 record := FullRecord{ 670 Strings: []string{"string1", "string2", "string3", "string4", "string5"}, 671 Longs: []int64{1, 2, 3, 4, 5}, 672 Enum: "C", 673 Map: map[string]int{ 674 "key1": 1, 675 "key2": 2, 676 "key3": 3, 677 "key4": 4, 678 "key5": 5, 679 }, 680 Nullable: &unionStr, 681 Fixed: [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04}, 682 Record: &TestRecord{ 683 Long: 1925639126735, 684 String: "I am a test record", 685 Int: 666, 686 Float: 7171.17, 687 Double: 916734926348163.01973408746523, 688 Bool: true, 689 }, 690 } 691 692 buf := &bytes.Buffer{} 693 enc, err := ocf.NewEncoder(schema, buf, ocf.WithCompressionLevel(flate.BestCompression)) 694 require.NoError(t, err) 695 696 err = enc.Encode(record) 697 require.NoError(t, err) 698 699 err = enc.Close() 700 701 require.NoError(t, err) 702 assert.Equal(t, 926, buf.Len()) 703 } 704 705 func TestEncoder_EncodeCompressesSnappy(t *testing.T) { 706 unionStr := "union value" 707 record := FullRecord{ 708 Strings: []string{"string1", "string2", "string3", "string4", "string5"}, 709 Longs: []int64{1, 2, 3, 4, 5}, 710 Enum: "C", 711 Map: map[string]int{ 712 "key1": 1, 713 "key2": 2, 714 "key3": 3, 715 "key4": 4, 716 "key5": 5, 717 }, 718 Nullable: &unionStr, 719 Fixed: [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04}, 720 Record: &TestRecord{ 721 Long: 1925639126735, 722 String: "I am a test record", 723 Int: 666, 724 Float: 7171.17, 725 Double: 916734926348163.01973408746523, 726 Bool: true, 727 }, 728 } 729 730 buf := &bytes.Buffer{} 731 enc, err := ocf.NewEncoder(schema, buf, ocf.WithBlockLength(1), ocf.WithCodec(ocf.Snappy)) 732 require.NoError(t, err) 733 734 err = enc.Encode(record) 735 require.NoError(t, err) 736 737 err = enc.Close() 738 739 require.NoError(t, err) 740 assert.Equal(t, 938, buf.Len()) 741 } 742 743 func TestEncoder_EncodeCompressesZStandard(t *testing.T) { 744 unionStr := "union value" 745 record := FullRecord{ 746 Strings: []string{"string1", "string2", "string3", "string4", "string5"}, 747 Longs: []int64{1, 2, 3, 4, 5}, 748 Enum: "C", 749 Map: map[string]int{ 750 "key1": 1, 751 "key2": 2, 752 "key3": 3, 753 "key4": 4, 754 "key5": 5, 755 }, 756 Nullable: &unionStr, 757 Fixed: [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04}, 758 Record: &TestRecord{ 759 Long: 1925639126735, 760 String: "I am a test record", 761 Int: 666, 762 Float: 7171.17, 763 Double: 916734926348163.01973408746523, 764 Bool: true, 765 }, 766 } 767 768 buf := &bytes.Buffer{} 769 enc, _ := ocf.NewEncoder(schema, buf, ocf.WithCodec(ocf.ZStandard)) 770 771 err := enc.Encode(record) 772 assert.NoError(t, err) 773 774 err = enc.Close() 775 776 require.NoError(t, err) 777 assert.Equal(t, 951, buf.Len()) 778 } 779 780 func TestEncoder_EncodeError(t *testing.T) { 781 buf := &bytes.Buffer{} 782 enc, err := ocf.NewEncoder(`"long"`, buf) 783 require.NoError(t, err) 784 t.Cleanup(func() { _ = enc.Close() }) 785 786 err = enc.Encode("test") 787 788 assert.Error(t, err) 789 } 790 791 func TestEncoder_EncodeWritesBlocks(t *testing.T) { 792 buf := &bytes.Buffer{} 793 enc, _ := ocf.NewEncoder(`"long"`, buf, ocf.WithBlockLength(1)) 794 t.Cleanup(func() { _ = enc.Close() }) 795 796 err := enc.Encode(int64(1)) 797 798 require.NoError(t, err) 799 assert.Equal(t, 77, buf.Len()) 800 } 801 802 func TestEncoder_EncodeHandlesWriteBlockError(t *testing.T) { 803 w := &errorBlockWriter{} 804 enc, _ := ocf.NewEncoder(`"long"`, w, ocf.WithBlockLength(1)) 805 t.Cleanup(func() { _ = enc.Close() }) 806 807 err := enc.Encode(int64(1)) 808 809 assert.Error(t, err) 810 } 811 812 func TestEncoder_CloseHandlesWriteBlockError(t *testing.T) { 813 w := &errorBlockWriter{} 814 enc, _ := ocf.NewEncoder(`"long"`, w) 815 _ = enc.Encode(int64(1)) 816 817 err := enc.Close() 818 819 assert.Error(t, err) 820 } 821 822 func TestEncodeDecodeMetadata(t *testing.T) { 823 buf := &bytes.Buffer{} 824 enc, _ := ocf.NewEncoder(`"long"`, buf, ocf.WithMetadata(map[string][]byte{ 825 "test": []byte("foo"), 826 })) 827 828 err := enc.Encode(int64(1)) 829 require.NoError(t, err) 830 831 _ = enc.Close() 832 833 dec, err := ocf.NewDecoder(buf) 834 835 require.NoError(t, err) 836 assert.Equal(t, []byte("foo"), dec.Metadata()["test"]) 837 } 838 839 func TestEncode_WithSyncBlock(t *testing.T) { 840 buf := &bytes.Buffer{} 841 syncBlock := [16]byte{9, 9, 9, 9, 9, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9} 842 _, err := ocf.NewEncoder(`"long"`, buf, ocf.WithSyncBlock(syncBlock)) 843 require.NoError(t, err) 844 845 reader := avro.NewReader(buf, 1024) 846 847 var h ocf.Header 848 reader.ReadVal(ocf.HeaderSchema, &h) 849 require.NoError(t, reader.Error) 850 assert.Equal(t, syncBlock, h.Sync) 851 } 852 853 func TestEncoder_NoBlocks(t *testing.T) { 854 buf := &bytes.Buffer{} 855 856 _, err := ocf.NewEncoder(`"long"`, buf) 857 858 require.NoError(t, err) 859 assert.Equal(t, 58, buf.Len()) 860 } 861 862 func TestEncoder_WriteHeaderError(t *testing.T) { 863 w := &errorHeaderWriter{} 864 865 _, err := ocf.NewEncoder(`"long"`, w) 866 867 assert.Error(t, err) 868 } 869 870 func copyToTemp(t *testing.T, src string) *os.File { 871 t.Helper() 872 873 file, err := os.CreateTemp(".", "temp-*.avro") 874 require.NoError(t, err) 875 876 b, err := os.ReadFile(src) 877 require.NoError(t, err) 878 879 _, err = io.Copy(file, bytes.NewReader(b)) 880 require.NoError(t, err) 881 882 _, err = file.Seek(0, 0) 883 require.NoError(t, err) 884 885 return file 886 } 887 888 type errorBlockWriter struct { 889 headerWritten bool 890 } 891 892 func (ew *errorBlockWriter) Write(p []byte) (n int, err error) { 893 if !ew.headerWritten { 894 ew.headerWritten = true 895 return len(p), nil 896 } 897 return 0, errors.New("test") 898 } 899 900 type errorHeaderWriter struct{} 901 902 func (*errorHeaderWriter) Write(p []byte) (int, error) { 903 return 0, errors.New("test") 904 }