github.com/fraugster/parquet-go@v0.12.0/alloc_test.go (about)

     1  package goparquet
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"testing"
     9  
    10  	"github.com/fraugster/parquet-go/parquet"
    11  	"github.com/fraugster/parquet-go/parquetschema"
    12  	"github.com/stretchr/testify/require"
    13  )
    14  
    15  func TestAllocTrackerTriggerError(t *testing.T) {
    16  	var buf bytes.Buffer
    17  
    18  	sd, err := parquetschema.ParseSchemaDefinition(`message test {
    19  		required binary foo (STRING);
    20  	}`)
    21  	require.NoError(t, err)
    22  
    23  	wr := NewFileWriter(&buf,
    24  		WithSchemaDefinition(sd),
    25  		WithMaxRowGroupSize(150*1024*1024),
    26  		WithMaxPageSize(150*1024*1024),
    27  		WithCompressionCodec(parquet.CompressionCodec_SNAPPY),
    28  	)
    29  	// this should produce ~20 MiB easily compressible data
    30  	for i := 0; i < 20*1024; i++ {
    31  		err := wr.AddData(map[string]interface{}{
    32  			"foo": func() []byte {
    33  				var data [512]byte
    34  				data[0] = byte(i % 256)
    35  				data[1] = byte(i / 256)
    36  				return []byte(fmt.Sprintf("%x", data[:]))
    37  			}(),
    38  		})
    39  		require.NoError(t, err)
    40  	}
    41  	require.NoError(t, wr.FlushRowGroup())
    42  	require.NoError(t, wr.Close())
    43  
    44  	t.Logf("buf size: %d", buf.Len())
    45  
    46  	// we set a maximum memory size for that file of 10 MiB, so fully reading the file created earlier should fail.
    47  	r, err := NewFileReaderWithOptions(bytes.NewReader(buf.Bytes()), WithMaximumMemorySize(10*1024*1024))
    48  	require.NoError(t, err)
    49  
    50  	_, err = r.NextRow()
    51  	require.Error(t, err)
    52  	require.Contains(t, err.Error(), "bytes is greater than configured maximum of 10485760 bytes")
    53  }
    54  
    55  func TestAllocTrackerTriggerNoError(t *testing.T) {
    56  	var buf bytes.Buffer
    57  
    58  	sd, err := parquetschema.ParseSchemaDefinition(`message test {
    59  		required binary foo (STRING);
    60  	}`)
    61  	require.NoError(t, err)
    62  
    63  	wr := NewFileWriter(&buf,
    64  		WithSchemaDefinition(sd),
    65  		WithMaxPageSize(1024*1024),
    66  		WithCompressionCodec(parquet.CompressionCodec_SNAPPY),
    67  	)
    68  	// this should produce ~20 MiB easily compressible data
    69  	for i := 0; i < 20*1024; i++ {
    70  		err := wr.AddData(map[string]interface{}{
    71  			"foo": func() []byte {
    72  				var data [512]byte
    73  				data[0] = byte(i % 256)
    74  				data[1] = byte(i / 256)
    75  				return []byte(fmt.Sprintf("%x", data[:]))
    76  			}(),
    77  		})
    78  		require.NoError(t, err)
    79  	}
    80  	require.NoError(t, wr.FlushRowGroup())
    81  	require.NoError(t, wr.Close())
    82  
    83  	t.Logf("buf size: %d", buf.Len())
    84  
    85  	// we set a maximum memory size for that file of 100 MiB, so fully reading the file created earlier should not fail.
    86  	r, err := NewFileReaderWithOptions(bytes.NewReader(buf.Bytes()), WithMaximumMemorySize(100*1024*1024))
    87  	require.NoError(t, err)
    88  
    89  	for i := 0; ; i++ {
    90  		_, err := r.NextRow()
    91  		if err != nil {
    92  			if errors.Is(err, io.EOF) {
    93  				break
    94  			}
    95  			t.Fatalf("NextRow %d returned error: %v", i, err)
    96  		}
    97  	}
    98  }