github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/reader_go18_test.go (about)

     1  //go:build go1.18
     2  
     3  package parquet_test
     4  
     5  import (
     6  	"bytes"
     7  	"errors"
     8  	"fmt"
     9  	"io"
    10  	"math/rand"
    11  	"reflect"
    12  	"testing"
    13  
    14  	"github.com/vc42/parquet-go"
    15  )
    16  
    17  func TestGenericReader(t *testing.T) {
    18  	testGenericReader[booleanColumn](t)
    19  	testGenericReader[int32Column](t)
    20  	testGenericReader[int64Column](t)
    21  	testGenericReader[int96Column](t)
    22  	testGenericReader[floatColumn](t)
    23  	testGenericReader[doubleColumn](t)
    24  	testGenericReader[byteArrayColumn](t)
    25  	testGenericReader[fixedLenByteArrayColumn](t)
    26  	testGenericReader[stringColumn](t)
    27  	testGenericReader[indexedStringColumn](t)
    28  	testGenericReader[uuidColumn](t)
    29  	testGenericReader[mapColumn](t)
    30  	testGenericReader[decimalColumn](t)
    31  	testGenericReader[addressBook](t)
    32  	testGenericReader[contact](t)
    33  	testGenericReader[listColumn2](t)
    34  	testGenericReader[listColumn1](t)
    35  	testGenericReader[listColumn0](t)
    36  	testGenericReader[nestedListColumn1](t)
    37  	testGenericReader[nestedListColumn](t)
    38  	testGenericReader[*contact](t)
    39  	testGenericReader[paddedBooleanColumn](t)
    40  	testGenericReader[optionalInt32Column](t)
    41  	testGenericReader[repeatedInt32Column](t)
    42  }
    43  
    44  func testGenericReader[Row any](t *testing.T) {
    45  	var model Row
    46  	t.Run(reflect.TypeOf(model).Name(), func(t *testing.T) {
    47  		err := quickCheck(func(rows []Row) bool {
    48  			if len(rows) == 0 {
    49  				return true // TODO: fix support for parquet files with zero rows
    50  			}
    51  			if err := testGenericReaderRows(rows); err != nil {
    52  				t.Error(err)
    53  				return false
    54  			}
    55  			return true
    56  		})
    57  		if err != nil {
    58  			t.Error(err)
    59  		}
    60  	})
    61  }
    62  
    63  func testGenericReaderRows[Row any](rows []Row) error {
    64  	setNullPointers(rows)
    65  	buffer := new(bytes.Buffer)
    66  	writer := parquet.NewGenericWriter[Row](buffer)
    67  	_, err := writer.Write(rows)
    68  	if err != nil {
    69  		return err
    70  	}
    71  	if err := writer.Close(); err != nil {
    72  		return err
    73  	}
    74  	reader := parquet.NewGenericReader[Row](bytes.NewReader(buffer.Bytes()))
    75  	result := make([]Row, len(rows))
    76  	n, err := reader.Read(result)
    77  	if err != nil && !errors.Is(err, io.EOF) {
    78  		return err
    79  	}
    80  	if n < len(rows) {
    81  		return fmt.Errorf("not enough values were read: want=%d got=%d", len(rows), n)
    82  	}
    83  	if !reflect.DeepEqual(rows, result) {
    84  		return fmt.Errorf("rows mismatch:\nwant: %+v\ngot:  %+v", rows, result)
    85  	}
    86  	return nil
    87  }
    88  
    89  func BenchmarkGenericReader(b *testing.B) {
    90  	benchmarkGenericReader[benchmarkRowType](b)
    91  	benchmarkGenericReader[booleanColumn](b)
    92  	benchmarkGenericReader[int32Column](b)
    93  	benchmarkGenericReader[int64Column](b)
    94  	benchmarkGenericReader[floatColumn](b)
    95  	benchmarkGenericReader[doubleColumn](b)
    96  	benchmarkGenericReader[byteArrayColumn](b)
    97  	benchmarkGenericReader[fixedLenByteArrayColumn](b)
    98  	benchmarkGenericReader[stringColumn](b)
    99  	benchmarkGenericReader[indexedStringColumn](b)
   100  	benchmarkGenericReader[uuidColumn](b)
   101  	benchmarkGenericReader[mapColumn](b)
   102  	benchmarkGenericReader[decimalColumn](b)
   103  	benchmarkGenericReader[contact](b)
   104  	benchmarkGenericReader[paddedBooleanColumn](b)
   105  	benchmarkGenericReader[optionalInt32Column](b)
   106  }
   107  
   108  func benchmarkGenericReader[Row generator[Row]](b *testing.B) {
   109  	var model Row
   110  	b.Run(reflect.TypeOf(model).Name(), func(b *testing.B) {
   111  		prng := rand.New(rand.NewSource(0))
   112  		rows := make([]Row, benchmarkNumRows)
   113  		for i := range rows {
   114  			rows[i] = rows[i].generate(prng)
   115  		}
   116  
   117  		rowbuf := make([]Row, benchmarkRowsPerStep)
   118  		buffer := parquet.NewGenericBuffer[Row]()
   119  		buffer.Write(rows)
   120  
   121  		b.Run("go1.17", func(b *testing.B) {
   122  			reader := parquet.NewRowGroupReader(buffer)
   123  			benchmarkRowsPerSecond(b, func() int {
   124  				for i := range rowbuf {
   125  					if err := reader.Read(&rowbuf[i]); err != nil {
   126  						if err != io.EOF {
   127  							b.Fatal(err)
   128  						} else {
   129  							reader.Reset()
   130  						}
   131  					}
   132  				}
   133  				return len(rowbuf)
   134  			})
   135  		})
   136  
   137  		b.Run("go1.18", func(b *testing.B) {
   138  			reader := parquet.NewGenericRowGroupReader[Row](buffer)
   139  			benchmarkRowsPerSecond(b, func() int {
   140  				n, err := reader.Read(rowbuf)
   141  				if err != nil {
   142  					if err != io.EOF {
   143  						b.Fatal(err)
   144  					} else {
   145  						reader.Reset()
   146  					}
   147  				}
   148  				return n
   149  			})
   150  		})
   151  	})
   152  }