github.com/hamba/avro/v2@v2.22.1-0.20240518180522-aff3955acf7d/ocf/ocf_test.go (about)

     1  package ocf_test
     2  
     3  import (
     4  	"bytes"
     5  	"compress/flate"
     6  	"errors"
     7  	"flag"
     8  	"io"
     9  	"os"
    10  	"testing"
    11  
    12  	"github.com/hamba/avro/v2"
    13  	"github.com/hamba/avro/v2/ocf"
    14  	"github.com/stretchr/testify/assert"
    15  	"github.com/stretchr/testify/require"
    16  )
    17  
    18  var update = flag.Bool("update", false, "update the golden files")
    19  
    20  var schema = `{
    21  	"type":"record",
    22  	"name":"FullRecord",
    23  	"namespace":"org.hamba.avro",
    24  	"fields":[
    25  		{"name":"strings","type":{"type":"array","items":"string"}},
    26  		{"name":"longs","type":{"type":"array","items":"long"}},
    27  		{"name":"enum","type":{"type":"enum","name":"foo","symbols":["A","B","C","D"]}},
    28  		{"name":"map","type":{"type":"map","values":"int"}},
    29  		{"name":"nullable","type":["null","string"]},
    30  		{"name":"fixed","type":{"type":"fixed","name":"md5","size":16}},
    31  		{"name":"record","type":{
    32  			"type":"record",
    33  			"name":"TestRecord",
    34  			"fields":[
    35  				{"name":"long","type":"long"},
    36  				{"name":"string","type":"string"},
    37  				{"name":"int","type":"int"},
    38  				{"name":"float","type":"float"},
    39  				{"name":"double","type":"double"},
    40  				{"name":"bool","type":"boolean"}
    41  			]
    42  		}}
    43  	]
    44  }`
    45  
    46  type FullRecord struct {
    47  	Strings  []string       `avro:"strings"`
    48  	Longs    []int64        `avro:"longs"`
    49  	Enum     string         `avro:"enum"`
    50  	Map      map[string]int `avro:"map"`
    51  	Nullable *string        `avro:"nullable"`
    52  	Fixed    [16]byte       `avro:"fixed"`
    53  	Record   *TestRecord    `avro:"record"`
    54  }
    55  
    56  type TestRecord struct {
    57  	Long   int64   `avro:"long"`
    58  	String string  `avro:"string"`
    59  	Int    int32   `avro:"int"`
    60  	Float  float32 `avro:"float"`
    61  	Double float64 `avro:"double"`
    62  	Bool   bool    `avro:"bool"`
    63  }
    64  
    65  func TestNewDecoder_InvalidHeader(t *testing.T) {
    66  	data := []byte{'O', 'b', 'j'}
    67  
    68  	_, err := ocf.NewDecoder(bytes.NewReader(data))
    69  
    70  	assert.Error(t, err)
    71  }
    72  
    73  func TestNewDecoder_InvalidMagic(t *testing.T) {
    74  	data := []byte{'f', 'o', 'o', 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
    75  
    76  	_, err := ocf.NewDecoder(bytes.NewReader(data))
    77  
    78  	assert.Error(t, err)
    79  }
    80  
    81  func TestNewDecoder_InvalidSchema(t *testing.T) {
    82  	data := []byte{'O', 'b', 'j', 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
    83  
    84  	_, err := ocf.NewDecoder(bytes.NewReader(data))
    85  
    86  	assert.Error(t, err)
    87  }
    88  
    89  func TestNewDecoder_InvalidCodec(t *testing.T) {
    90  	data := []byte{'O', 'b', 'j', 0x1, 0x3, 0x4c, 0x16, 'a', 'v', 'r', 'o', '.', 's', 'c', 'h', 'e', 'm', 'a', 0xc, 0x22, 'l', 'o', 'n', 'g',
    91  		0x22, 0x14, 'a', 'v', 'r', 'o', 0x2e, 'c', 'o', 'd', 'e', 'c', 0xe, 'd', 'e', 'a', 'l', 'a', 't', 'e', 0x0,
    92  		0x72, 0xce, 0x78, 0x7, 0x35, 0x81, 0xb0, 0x80, 0x77, 0x59, 0xa9, 0x83, 0xaf, 0x90, 0x3e, 0xaf,
    93  	}
    94  
    95  	_, err := ocf.NewDecoder(bytes.NewReader(data))
    96  
    97  	assert.Error(t, err)
    98  }
    99  
   100  func TestDecoder(t *testing.T) {
   101  	unionStr := "union value"
   102  	want := FullRecord{
   103  		Strings: []string{"string1", "string2", "string3", "string4", "string5"},
   104  		Longs:   []int64{1, 2, 3, 4, 5},
   105  		Enum:    "C",
   106  		Map: map[string]int{
   107  			"key1": 1,
   108  			"key2": 2,
   109  			"key3": 3,
   110  			"key4": 4,
   111  			"key5": 5,
   112  		},
   113  		Nullable: &unionStr,
   114  		Fixed:    [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04},
   115  		Record: &TestRecord{
   116  			Long:   1925639126735,
   117  			String: "I am a test record",
   118  			Int:    666,
   119  			Float:  7171.17,
   120  			Double: 916734926348163.01973408746523,
   121  			Bool:   true,
   122  		},
   123  	}
   124  
   125  	f, err := os.Open("testdata/full.avro")
   126  	if err != nil {
   127  		t.Error(err)
   128  		return
   129  	}
   130  	t.Cleanup(func() { _ = f.Close() })
   131  
   132  	dec, err := ocf.NewDecoder(f)
   133  	require.NoError(t, err)
   134  
   135  	var count int
   136  	for dec.HasNext() {
   137  		count++
   138  		var got FullRecord
   139  		err = dec.Decode(&got)
   140  
   141  		require.NoError(t, err)
   142  		assert.Equal(t, want, got)
   143  	}
   144  
   145  	require.NoError(t, dec.Error())
   146  	assert.Equal(t, 1, count)
   147  }
   148  
   149  func TestDecoder_WithDeflate(t *testing.T) {
   150  	unionStr := "union value"
   151  	want := FullRecord{
   152  		Strings: []string{"string1", "string2", "string3", "string4", "string5"},
   153  		Longs:   []int64{1, 2, 3, 4, 5},
   154  		Enum:    "C",
   155  		Map: map[string]int{
   156  			"key1": 1,
   157  			"key2": 2,
   158  			"key3": 3,
   159  			"key4": 4,
   160  			"key5": 5,
   161  		},
   162  		Nullable: &unionStr,
   163  		Fixed:    [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04},
   164  		Record: &TestRecord{
   165  			Long:   1925639126735,
   166  			String: "I am a test record",
   167  			Int:    666,
   168  			Float:  7171.17,
   169  			Double: 916734926348163.01973408746523,
   170  			Bool:   true,
   171  		},
   172  	}
   173  
   174  	f, err := os.Open("testdata/full-deflate.avro")
   175  	if err != nil {
   176  		t.Error(err)
   177  		return
   178  	}
   179  	t.Cleanup(func() { _ = f.Close() })
   180  
   181  	dec, err := ocf.NewDecoder(f)
   182  	require.NoError(t, err)
   183  
   184  	var count int
   185  	for dec.HasNext() {
   186  		count++
   187  		var got FullRecord
   188  		err = dec.Decode(&got)
   189  
   190  		require.NoError(t, err)
   191  		assert.Equal(t, want, got)
   192  	}
   193  
   194  	require.NoError(t, dec.Error())
   195  	assert.Equal(t, 1, count)
   196  }
   197  
   198  func TestDecoder_WithDeflateHandlesInvalidData(t *testing.T) {
   199  	f, err := os.Open("testdata/deflate-invalid-data.avro")
   200  	if err != nil {
   201  		t.Error(err)
   202  		return
   203  	}
   204  	t.Cleanup(func() { _ = f.Close() })
   205  
   206  	dec, err := ocf.NewDecoder(f)
   207  	require.NoError(t, err)
   208  
   209  	dec.HasNext()
   210  
   211  	assert.Error(t, dec.Error())
   212  }
   213  
   214  func TestDecoder_WithSnappy(t *testing.T) {
   215  	unionStr := "union value"
   216  	want := FullRecord{
   217  		Strings: []string{"string1", "string2", "string3", "string4", "string5"},
   218  		Longs:   []int64{1, 2, 3, 4, 5},
   219  		Enum:    "C",
   220  		Map: map[string]int{
   221  			"key1": 1,
   222  			"key2": 2,
   223  			"key3": 3,
   224  			"key4": 4,
   225  			"key5": 5,
   226  		},
   227  		Nullable: &unionStr,
   228  		Fixed:    [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04},
   229  		Record: &TestRecord{
   230  			Long:   1925639126735,
   231  			String: "I am a test record",
   232  			Int:    666,
   233  			Float:  7171.17,
   234  			Double: 916734926348163.01973408746523,
   235  			Bool:   true,
   236  		},
   237  	}
   238  
   239  	f, err := os.Open("testdata/full-snappy.avro")
   240  	if err != nil {
   241  		t.Error(err)
   242  		return
   243  	}
   244  	t.Cleanup(func() { _ = f.Close() })
   245  
   246  	dec, err := ocf.NewDecoder(f)
   247  	require.NoError(t, err)
   248  
   249  	var count int
   250  	for dec.HasNext() {
   251  		count++
   252  		var got FullRecord
   253  		err = dec.Decode(&got)
   254  
   255  		require.NoError(t, err)
   256  		assert.Equal(t, want, got)
   257  	}
   258  
   259  	require.NoError(t, dec.Error())
   260  	assert.Equal(t, 1, count)
   261  }
   262  
   263  func TestDecoder_WithSnappyHandlesInvalidData(t *testing.T) {
   264  	f, err := os.Open("testdata/snappy-invalid-data.avro")
   265  	if err != nil {
   266  		t.Error(err)
   267  		return
   268  	}
   269  	t.Cleanup(func() { _ = f.Close() })
   270  
   271  	dec, err := ocf.NewDecoder(f)
   272  	require.NoError(t, err)
   273  
   274  	dec.HasNext()
   275  
   276  	assert.Error(t, dec.Error())
   277  }
   278  
   279  func TestDecoder_WithSnappyHandlesShortCRC(t *testing.T) {
   280  	f, err := os.Open("testdata/snappy-short-crc.avro")
   281  	if err != nil {
   282  		t.Error(err)
   283  		return
   284  	}
   285  	t.Cleanup(func() { _ = f.Close() })
   286  
   287  	dec, err := ocf.NewDecoder(f)
   288  	require.NoError(t, err)
   289  
   290  	dec.HasNext()
   291  
   292  	assert.Error(t, dec.Error())
   293  }
   294  
   295  func TestDecoder_WithSnappyHandlesInvalidCRC(t *testing.T) {
   296  	f, err := os.Open("testdata/snappy-invalid-crc.avro")
   297  	if err != nil {
   298  		t.Error(err)
   299  		return
   300  	}
   301  	t.Cleanup(func() { _ = f.Close() })
   302  
   303  	dec, err := ocf.NewDecoder(f)
   304  	if err != nil {
   305  		t.Error(err)
   306  		return
   307  	}
   308  
   309  	dec.HasNext()
   310  
   311  	assert.Error(t, dec.Error())
   312  }
   313  
   314  func TestDecoder_WithZStandard(t *testing.T) {
   315  	unionStr := "union value"
   316  	want := FullRecord{
   317  		Strings: []string{"string1", "string2", "string3", "string4", "string5"},
   318  		Longs:   []int64{1, 2, 3, 4, 5},
   319  		Enum:    "C",
   320  		Map: map[string]int{
   321  			"key1": 1,
   322  			"key2": 2,
   323  			"key3": 3,
   324  			"key4": 4,
   325  			"key5": 5,
   326  		},
   327  		Nullable: &unionStr,
   328  		Fixed:    [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04},
   329  		Record: &TestRecord{
   330  			Long:   1925639126735,
   331  			String: "I am a test record",
   332  			Int:    666,
   333  			Float:  7171.17,
   334  			Double: 916734926348163.01973408746523,
   335  			Bool:   true,
   336  		},
   337  	}
   338  
   339  	f, err := os.Open("testdata/full-zstd.avro")
   340  	require.NoError(t, err)
   341  	t.Cleanup(func() { _ = f.Close() })
   342  
   343  	dec, err := ocf.NewDecoder(f)
   344  	require.NoError(t, err)
   345  
   346  	var count int
   347  	for dec.HasNext() {
   348  		count++
   349  		var got FullRecord
   350  		err = dec.Decode(&got)
   351  
   352  		require.NoError(t, err)
   353  		assert.Equal(t, want, got)
   354  	}
   355  
   356  	require.NoError(t, dec.Error())
   357  	assert.Equal(t, 1, count)
   358  }
   359  
   360  func TestDecoder_WithZStandardHandlesInvalidData(t *testing.T) {
   361  	f, err := os.Open("testdata/zstd-invalid-data.avro")
   362  	require.NoError(t, err)
   363  	t.Cleanup(func() { _ = f.Close() })
   364  
   365  	dec, err := ocf.NewDecoder(f)
   366  	require.NoError(t, err)
   367  
   368  	dec.HasNext()
   369  
   370  	assert.Error(t, dec.Error())
   371  }
   372  
   373  func TestDecoder_DecodeAvroError(t *testing.T) {
   374  	data := []byte{'O', 'b', 'j', 0x01, 0x01, 0x26, 0x16, 'a', 'v', 'r', 'o', '.', 's', 'c', 'h', 'e', 'm', 'a',
   375  		0x0c, '"', 'l', 'o', 'n', 'g', '"', 0x00, 0xfb, 0x2b, 0x0f, 0x1a, 0xdd, 0xfd, 0x90, 0x7d, 0x87, 0x12,
   376  		0x15, 0x29, 0xd7, 0x1d, 0x1c, 0xdd, 0x02, 0x16, 0xe2, 0xa2, 0xf3, 0xad, 0xad, 0xad, 0xe2, 0xa2, 0xf3,
   377  		0xad, 0xad, 0xfb, 0x2b, 0x0f, 0x1a, 0xdd, 0xfd, 0x90, 0x7d, 0x87, 0x12, 0x15, 0x29, 0xd7, 0x1d, 0x1c, 0xdd,
   378  	}
   379  
   380  	dec, _ := ocf.NewDecoder(bytes.NewReader(data))
   381  	_ = dec.HasNext()
   382  
   383  	var l int64
   384  	err := dec.Decode(&l)
   385  
   386  	assert.Error(t, err)
   387  }
   388  
   389  func TestDecoder_DecodeMustCallHasNext(t *testing.T) {
   390  	data := []byte{'O', 'b', 'j', 0x01, 0x01, 0x26, 0x16, 'a', 'v', 'r', 'o', '.', 's', 'c', 'h', 'e', 'm', 'a',
   391  		0x0c, '"', 'l', 'o', 'n', 'g', '"', 0x00, 0xfb, 0x2b, 0x0f, 0x1a, 0xdd, 0xfd, 0x90, 0x7d, 0x87, 0x12,
   392  		0x15, 0x29, 0xd7, 0x1d, 0x1c, 0xdd, 0x02, 0x02, 0x02, 0xfb, 0x2b, 0x0f, 0x1a, 0xdd, 0xfd, 0x90, 0x7d,
   393  		0x87, 0x12, 0x15, 0x29, 0xd7, 0x1d, 0x1c, 0xdd,
   394  	}
   395  
   396  	dec, _ := ocf.NewDecoder(bytes.NewReader(data))
   397  
   398  	var l int64
   399  	err := dec.Decode(&l)
   400  
   401  	assert.Error(t, err)
   402  }
   403  
   404  func TestDecoder_InvalidBlock(t *testing.T) {
   405  	data := []byte{'O', 'b', 'j', 0x01, 0x01, 0x26, 0x16, 'a', 'v', 'r', 'o', '.', 's', 'c', 'h', 'e', 'm', 'a',
   406  		0x0c, '"', 'l', 'o', 'n', 'g', '"', 0x00, 0xfa, 0x2b, 0x0f, 0x1a, 0xdd, 0xfd, 0x90, 0x7d, 0x87, 0x12,
   407  		0x15, 0x29, 0xd7, 0x1d, 0x1c, 0xdd, 0x02, 0x02, 0x02, 0xfb, 0x2b, 0x0f, 0x1a, 0xdd, 0xfd, 0x90, 0x7d,
   408  		0x87, 0x12, 0x15, 0x29, 0xd7, 0x1d, 0x1c, 0xdd,
   409  	}
   410  
   411  	dec, _ := ocf.NewDecoder(bytes.NewReader(data))
   412  
   413  	got := dec.HasNext()
   414  
   415  	assert.False(t, got)
   416  	assert.Error(t, dec.Error())
   417  }
   418  
   419  func TestNewEncoder_InvalidSchema(t *testing.T) {
   420  	buf := &bytes.Buffer{}
   421  
   422  	_, err := ocf.NewEncoder(``, buf)
   423  
   424  	assert.Error(t, err)
   425  }
   426  
   427  func TestNewEncoder_InvalidCodec(t *testing.T) {
   428  	buf := &bytes.Buffer{}
   429  
   430  	_, err := ocf.NewEncoder(`"long"`, buf, ocf.WithCodec(ocf.CodecName("test")))
   431  
   432  	assert.Error(t, err)
   433  }
   434  
   435  func TestEncoder(t *testing.T) {
   436  	unionStr := "union value"
   437  	record := FullRecord{
   438  		Strings: []string{"string1", "string2", "string3", "string4", "string5"},
   439  		Longs:   []int64{1, 2, 3, 4, 5},
   440  		Enum:    "C",
   441  		Map: map[string]int{
   442  			"key1": 1,
   443  			"key2": 2,
   444  			"key3": 3,
   445  			"key4": 4,
   446  			"key5": 5,
   447  		},
   448  		Nullable: &unionStr,
   449  		Fixed:    [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04},
   450  		Record: &TestRecord{
   451  			Long:   1925639126735,
   452  			String: "I am a test record",
   453  			Int:    666,
   454  			Float:  7171.17,
   455  			Double: 916734926348163.01973408746523,
   456  			Bool:   true,
   457  		},
   458  	}
   459  
   460  	buf := &bytes.Buffer{}
   461  	enc, err := ocf.NewEncoder(schema, buf)
   462  	require.NoError(t, err)
   463  
   464  	err = enc.Encode(record)
   465  	require.NoError(t, err)
   466  
   467  	err = enc.Close()
   468  	assert.NoError(t, err)
   469  }
   470  
   471  func TestEncoder_WithEncodingConfig(t *testing.T) {
   472  	arrSchema := `{"type": "array", "items": "long"}`
   473  	syncMarker := [16]byte{0x1F, 0x1F, 0x1F, 0x1F, 0x2F, 0x2F, 0x2F, 0x2F, 0x3F, 0x3F, 0x3F, 0x3F, 0x4F, 0x4F, 0x4F, 0x4F}
   474  
   475  	skipOcfHeader := func(encoded []byte) []byte {
   476  		index := bytes.Index(encoded, syncMarker[:])
   477  		require.False(t, index == -1)
   478  		return encoded[index+len(syncMarker):] // +1 for the null byte
   479  	}
   480  
   481  	tests := []struct {
   482  		name        string
   483  		data        any
   484  		encConfig   avro.API
   485  		wantPayload []byte // without OCF header
   486  	}{
   487  		{
   488  			name: "no encoding config",
   489  			data: []int64{1, 2, 3, 4, 5},
   490  			wantPayload: []byte{
   491  				0x2, 0x10, // OCF block header: 1 elems, 8 bytes
   492  				0x9, 0xA, // array block header: 5 elems, 5 bytes
   493  				0x2, 0x4, 0x6, 0x8, 0xA, 0x0, // array block payload with terminator
   494  				0x1F, 0x1F, 0x1F, 0x1F, 0x2F, 0x2F, 0x2F, 0x2F, 0x3F, 0x3F, 0x3F, 0x3F, 0x4F, 0x4F, 0x4F, 0x4F, // OCF trailing sync marker
   495  			},
   496  		},
   497  		{
   498  			name:      "no array bytes size",
   499  			encConfig: avro.Config{DisableBlockSizeHeader: true}.Freeze(),
   500  			data:      []int64{1, 2, 3, 4, 5},
   501  			wantPayload: []byte{
   502  				0x2, 0x0E, // OCF block header: 1 elem, 7 bytes
   503  				0xA,                          // array block header: 5 elems
   504  				0x2, 0x4, 0x6, 0x8, 0xA, 0x0, // array block payload with terminator
   505  				0x1F, 0x1F, 0x1F, 0x1F, 0x2F, 0x2F, 0x2F, 0x2F, 0x3F, 0x3F, 0x3F, 0x3F, 0x4F, 0x4F, 0x4F, 0x4F, // OCF trailing sync marker
   506  			},
   507  		},
   508  		{
   509  			name:      "non-default array block length",
   510  			encConfig: avro.Config{BlockLength: 5}.Freeze(),
   511  			data:      []int64{1, 2, 3, 4, 5, 6, 7, 8, 9},
   512  			wantPayload: []byte{
   513  				0x2, 0x1c, // OCF block header: 1 elems, 15 bytes
   514  				0x9, 0xA, // array block 1 header: 5 elems, 5 bytes
   515  				0x2, 0x4, 0x6, 0x8, 0xA, // array block 1
   516  				0x7, 0x8, // array block 2 header: 4 elems, 4 bytes
   517  				0xC, 0xE, 0x10, 0x12, 0x0, // array block 2 with terminator
   518  				0x1F, 0x1F, 0x1F, 0x1F, 0x2F, 0x2F, 0x2F, 0x2F, 0x3F, 0x3F, 0x3F, 0x3F, 0x4F, 0x4F, 0x4F, 0x4F, // OCF sync marker
   519  			},
   520  		},
   521  	}
   522  
   523  	for _, tt := range tests {
   524  		t.Run(tt.name, func(t *testing.T) {
   525  			buf := &bytes.Buffer{}
   526  			opts := []ocf.EncoderFunc{ocf.WithSyncBlock(syncMarker)}
   527  			if tt.encConfig != nil {
   528  				opts = append(opts, ocf.WithEncodingConfig(tt.encConfig))
   529  			}
   530  			enc, err := ocf.NewEncoder(arrSchema, buf, opts...)
   531  			require.NoError(t, err)
   532  
   533  			err = enc.Encode(tt.data)
   534  			require.NoError(t, err)
   535  
   536  			err = enc.Close()
   537  			assert.NoError(t, err)
   538  
   539  			assert.Equal(t, tt.wantPayload, skipOcfHeader(buf.Bytes()))
   540  		})
   541  	}
   542  
   543  }
   544  
   545  func TestEncoder_ExistingOCF(t *testing.T) {
   546  	record := FullRecord{
   547  		Strings: []string{"another", "record"},
   548  		Enum:    "A",
   549  		Record:  &TestRecord{},
   550  	}
   551  
   552  	file := copyToTemp(t, "testdata/full.avro")
   553  	t.Cleanup(func() {
   554  		_ = file.Close()
   555  		_ = os.Remove(file.Name())
   556  	})
   557  
   558  	enc, err := ocf.NewEncoder(schema, file)
   559  	require.NoError(t, err)
   560  
   561  	err = enc.Encode(record)
   562  	require.NoError(t, err)
   563  
   564  	err = enc.Close()
   565  	assert.NoError(t, err)
   566  
   567  	_, err = file.Seek(0, 0)
   568  	require.NoError(t, err)
   569  	got, err := io.ReadAll(file)
   570  	require.NoError(t, err)
   571  
   572  	if *update {
   573  		err = os.WriteFile("testdata/full-appended.avro", got, 0o644)
   574  		require.NoError(t, err)
   575  	}
   576  
   577  	want, err := os.ReadFile("testdata/full-appended.avro")
   578  	require.NoError(t, err)
   579  	assert.Equal(t, want, got)
   580  }
   581  
   582  func TestEncoder_NilWriter(t *testing.T) {
   583  	_, err := ocf.NewEncoder(schema, nil)
   584  
   585  	assert.Error(t, err)
   586  }
   587  
   588  func TestEncoder_Write(t *testing.T) {
   589  	unionStr := "union value"
   590  	record := FullRecord{
   591  		Strings: []string{"string1", "string2", "string3", "string4", "string5"},
   592  		Longs:   []int64{1, 2, 3, 4, 5},
   593  		Enum:    "C",
   594  		Map: map[string]int{
   595  			"key1": 1,
   596  			"key2": 2,
   597  			"key3": 3,
   598  			"key4": 4,
   599  			"key5": 5,
   600  		},
   601  		Nullable: &unionStr,
   602  		Fixed:    [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04},
   603  		Record: &TestRecord{
   604  			Long:   1925639126735,
   605  			String: "I am a test record",
   606  			Int:    666,
   607  			Float:  7171.17,
   608  			Double: 916734926348163.01973408746523,
   609  			Bool:   true,
   610  		},
   611  	}
   612  
   613  	buf := &bytes.Buffer{}
   614  	enc, err := ocf.NewEncoder(schema, buf)
   615  	require.NoError(t, err)
   616  
   617  	encodedBytes, err := avro.Marshal(avro.MustParse(schema), record)
   618  	require.NoError(t, err)
   619  
   620  	n, err := enc.Write(encodedBytes)
   621  	require.NoError(t, err)
   622  
   623  	err = enc.Close()
   624  	require.NoError(t, err)
   625  
   626  	require.Equal(t, n, len(encodedBytes))
   627  	require.Equal(t, 957, buf.Len())
   628  }
   629  
   630  func TestEncoder_EncodeCompressesDeflate(t *testing.T) {
   631  	unionStr := "union value"
   632  	record := FullRecord{
   633  		Strings: []string{"string1", "string2", "string3", "string4", "string5"},
   634  		Longs:   []int64{1, 2, 3, 4, 5},
   635  		Enum:    "C",
   636  		Map: map[string]int{
   637  			"key1": 1,
   638  			"key2": 2,
   639  			"key3": 3,
   640  			"key4": 4,
   641  			"key5": 5,
   642  		},
   643  		Nullable: &unionStr,
   644  		Fixed:    [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04},
   645  		Record: &TestRecord{
   646  			Long:   1925639126735,
   647  			String: "I am a test record",
   648  			Int:    666,
   649  			Float:  7171.17,
   650  			Double: 916734926348163.01973408746523,
   651  			Bool:   true,
   652  		},
   653  	}
   654  
   655  	buf := &bytes.Buffer{}
   656  	enc, _ := ocf.NewEncoder(schema, buf, ocf.WithCodec(ocf.Deflate))
   657  
   658  	err := enc.Encode(record)
   659  	assert.NoError(t, err)
   660  
   661  	err = enc.Close()
   662  
   663  	require.NoError(t, err)
   664  	assert.Equal(t, 926, buf.Len())
   665  }
   666  
   667  func TestEncoder_EncodeCompressesDeflateWithLevel(t *testing.T) {
   668  	unionStr := "union value"
   669  	record := FullRecord{
   670  		Strings: []string{"string1", "string2", "string3", "string4", "string5"},
   671  		Longs:   []int64{1, 2, 3, 4, 5},
   672  		Enum:    "C",
   673  		Map: map[string]int{
   674  			"key1": 1,
   675  			"key2": 2,
   676  			"key3": 3,
   677  			"key4": 4,
   678  			"key5": 5,
   679  		},
   680  		Nullable: &unionStr,
   681  		Fixed:    [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04},
   682  		Record: &TestRecord{
   683  			Long:   1925639126735,
   684  			String: "I am a test record",
   685  			Int:    666,
   686  			Float:  7171.17,
   687  			Double: 916734926348163.01973408746523,
   688  			Bool:   true,
   689  		},
   690  	}
   691  
   692  	buf := &bytes.Buffer{}
   693  	enc, err := ocf.NewEncoder(schema, buf, ocf.WithCompressionLevel(flate.BestCompression))
   694  	require.NoError(t, err)
   695  
   696  	err = enc.Encode(record)
   697  	require.NoError(t, err)
   698  
   699  	err = enc.Close()
   700  
   701  	require.NoError(t, err)
   702  	assert.Equal(t, 926, buf.Len())
   703  }
   704  
   705  func TestEncoder_EncodeCompressesSnappy(t *testing.T) {
   706  	unionStr := "union value"
   707  	record := FullRecord{
   708  		Strings: []string{"string1", "string2", "string3", "string4", "string5"},
   709  		Longs:   []int64{1, 2, 3, 4, 5},
   710  		Enum:    "C",
   711  		Map: map[string]int{
   712  			"key1": 1,
   713  			"key2": 2,
   714  			"key3": 3,
   715  			"key4": 4,
   716  			"key5": 5,
   717  		},
   718  		Nullable: &unionStr,
   719  		Fixed:    [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04},
   720  		Record: &TestRecord{
   721  			Long:   1925639126735,
   722  			String: "I am a test record",
   723  			Int:    666,
   724  			Float:  7171.17,
   725  			Double: 916734926348163.01973408746523,
   726  			Bool:   true,
   727  		},
   728  	}
   729  
   730  	buf := &bytes.Buffer{}
   731  	enc, err := ocf.NewEncoder(schema, buf, ocf.WithBlockLength(1), ocf.WithCodec(ocf.Snappy))
   732  	require.NoError(t, err)
   733  
   734  	err = enc.Encode(record)
   735  	require.NoError(t, err)
   736  
   737  	err = enc.Close()
   738  
   739  	require.NoError(t, err)
   740  	assert.Equal(t, 938, buf.Len())
   741  }
   742  
   743  func TestEncoder_EncodeCompressesZStandard(t *testing.T) {
   744  	unionStr := "union value"
   745  	record := FullRecord{
   746  		Strings: []string{"string1", "string2", "string3", "string4", "string5"},
   747  		Longs:   []int64{1, 2, 3, 4, 5},
   748  		Enum:    "C",
   749  		Map: map[string]int{
   750  			"key1": 1,
   751  			"key2": 2,
   752  			"key3": 3,
   753  			"key4": 4,
   754  			"key5": 5,
   755  		},
   756  		Nullable: &unionStr,
   757  		Fixed:    [16]byte{0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04},
   758  		Record: &TestRecord{
   759  			Long:   1925639126735,
   760  			String: "I am a test record",
   761  			Int:    666,
   762  			Float:  7171.17,
   763  			Double: 916734926348163.01973408746523,
   764  			Bool:   true,
   765  		},
   766  	}
   767  
   768  	buf := &bytes.Buffer{}
   769  	enc, _ := ocf.NewEncoder(schema, buf, ocf.WithCodec(ocf.ZStandard))
   770  
   771  	err := enc.Encode(record)
   772  	assert.NoError(t, err)
   773  
   774  	err = enc.Close()
   775  
   776  	require.NoError(t, err)
   777  	assert.Equal(t, 951, buf.Len())
   778  }
   779  
   780  func TestEncoder_EncodeError(t *testing.T) {
   781  	buf := &bytes.Buffer{}
   782  	enc, err := ocf.NewEncoder(`"long"`, buf)
   783  	require.NoError(t, err)
   784  	t.Cleanup(func() { _ = enc.Close() })
   785  
   786  	err = enc.Encode("test")
   787  
   788  	assert.Error(t, err)
   789  }
   790  
   791  func TestEncoder_EncodeWritesBlocks(t *testing.T) {
   792  	buf := &bytes.Buffer{}
   793  	enc, _ := ocf.NewEncoder(`"long"`, buf, ocf.WithBlockLength(1))
   794  	t.Cleanup(func() { _ = enc.Close() })
   795  
   796  	err := enc.Encode(int64(1))
   797  
   798  	require.NoError(t, err)
   799  	assert.Equal(t, 77, buf.Len())
   800  }
   801  
   802  func TestEncoder_EncodeHandlesWriteBlockError(t *testing.T) {
   803  	w := &errorBlockWriter{}
   804  	enc, _ := ocf.NewEncoder(`"long"`, w, ocf.WithBlockLength(1))
   805  	t.Cleanup(func() { _ = enc.Close() })
   806  
   807  	err := enc.Encode(int64(1))
   808  
   809  	assert.Error(t, err)
   810  }
   811  
   812  func TestEncoder_CloseHandlesWriteBlockError(t *testing.T) {
   813  	w := &errorBlockWriter{}
   814  	enc, _ := ocf.NewEncoder(`"long"`, w)
   815  	_ = enc.Encode(int64(1))
   816  
   817  	err := enc.Close()
   818  
   819  	assert.Error(t, err)
   820  }
   821  
   822  func TestEncodeDecodeMetadata(t *testing.T) {
   823  	buf := &bytes.Buffer{}
   824  	enc, _ := ocf.NewEncoder(`"long"`, buf, ocf.WithMetadata(map[string][]byte{
   825  		"test": []byte("foo"),
   826  	}))
   827  
   828  	err := enc.Encode(int64(1))
   829  	require.NoError(t, err)
   830  
   831  	_ = enc.Close()
   832  
   833  	dec, err := ocf.NewDecoder(buf)
   834  
   835  	require.NoError(t, err)
   836  	assert.Equal(t, []byte("foo"), dec.Metadata()["test"])
   837  }
   838  
   839  func TestEncode_WithSyncBlock(t *testing.T) {
   840  	buf := &bytes.Buffer{}
   841  	syncBlock := [16]byte{9, 9, 9, 9, 9, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
   842  	_, err := ocf.NewEncoder(`"long"`, buf, ocf.WithSyncBlock(syncBlock))
   843  	require.NoError(t, err)
   844  
   845  	reader := avro.NewReader(buf, 1024)
   846  
   847  	var h ocf.Header
   848  	reader.ReadVal(ocf.HeaderSchema, &h)
   849  	require.NoError(t, reader.Error)
   850  	assert.Equal(t, syncBlock, h.Sync)
   851  }
   852  
   853  func TestEncoder_NoBlocks(t *testing.T) {
   854  	buf := &bytes.Buffer{}
   855  
   856  	_, err := ocf.NewEncoder(`"long"`, buf)
   857  
   858  	require.NoError(t, err)
   859  	assert.Equal(t, 58, buf.Len())
   860  }
   861  
   862  func TestEncoder_WriteHeaderError(t *testing.T) {
   863  	w := &errorHeaderWriter{}
   864  
   865  	_, err := ocf.NewEncoder(`"long"`, w)
   866  
   867  	assert.Error(t, err)
   868  }
   869  
   870  func copyToTemp(t *testing.T, src string) *os.File {
   871  	t.Helper()
   872  
   873  	file, err := os.CreateTemp(".", "temp-*.avro")
   874  	require.NoError(t, err)
   875  
   876  	b, err := os.ReadFile(src)
   877  	require.NoError(t, err)
   878  
   879  	_, err = io.Copy(file, bytes.NewReader(b))
   880  	require.NoError(t, err)
   881  
   882  	_, err = file.Seek(0, 0)
   883  	require.NoError(t, err)
   884  
   885  	return file
   886  }
   887  
   888  type errorBlockWriter struct {
   889  	headerWritten bool
   890  }
   891  
   892  func (ew *errorBlockWriter) Write(p []byte) (n int, err error) {
   893  	if !ew.headerWritten {
   894  		ew.headerWritten = true
   895  		return len(p), nil
   896  	}
   897  	return 0, errors.New("test")
   898  }
   899  
   900  type errorHeaderWriter struct{}
   901  
   902  func (*errorHeaderWriter) Write(p []byte) (int, error) {
   903  	return 0, errors.New("test")
   904  }