github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/buffer_go18_test.go (about)

     1  //go:build go1.18
     2  
     3  package parquet_test
     4  
     5  import (
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"math/rand"
    10  	"reflect"
    11  	"testing"
    12  
    13  	"github.com/vc42/parquet-go"
    14  )
    15  
    16  func TestGenericBuffer(t *testing.T) {
    17  	testGenericBuffer[booleanColumn](t)
    18  	testGenericBuffer[int32Column](t)
    19  	testGenericBuffer[int64Column](t)
    20  	testGenericBuffer[int96Column](t)
    21  	testGenericBuffer[floatColumn](t)
    22  	testGenericBuffer[doubleColumn](t)
    23  	testGenericBuffer[byteArrayColumn](t)
    24  	testGenericBuffer[fixedLenByteArrayColumn](t)
    25  	testGenericBuffer[stringColumn](t)
    26  	testGenericBuffer[indexedStringColumn](t)
    27  	testGenericBuffer[uuidColumn](t)
    28  	testGenericBuffer[mapColumn](t)
    29  	testGenericBuffer[decimalColumn](t)
    30  	testGenericBuffer[addressBook](t)
    31  	testGenericBuffer[contact](t)
    32  	testGenericBuffer[listColumn2](t)
    33  	testGenericBuffer[listColumn1](t)
    34  	testGenericBuffer[listColumn0](t)
    35  	testGenericBuffer[nestedListColumn1](t)
    36  	testGenericBuffer[nestedListColumn](t)
    37  	testGenericBuffer[*contact](t)
    38  	testGenericBuffer[paddedBooleanColumn](t)
    39  	testGenericBuffer[optionalInt32Column](t)
    40  	testGenericBuffer[repeatedInt32Column](t)
    41  }
    42  
    43  func testGenericBuffer[Row any](t *testing.T) {
    44  	var model Row
    45  	t.Run(reflect.TypeOf(model).Name(), func(t *testing.T) {
    46  		err := quickCheck(func(rows []Row) bool {
    47  			if len(rows) == 0 {
    48  				return true // TODO: fix support for parquet files with zero rows
    49  			}
    50  			if err := testGenericBufferRows(rows); err != nil {
    51  				t.Error(err)
    52  				return false
    53  			}
    54  			return true
    55  		})
    56  		if err != nil {
    57  			t.Error(err)
    58  		}
    59  	})
    60  }
    61  
    62  func testGenericBufferRows[Row any](rows []Row) error {
    63  	setNullPointers(rows)
    64  	buffer := parquet.NewGenericBuffer[Row]()
    65  	_, err := buffer.Write(rows)
    66  	if err != nil {
    67  		return err
    68  	}
    69  	reader := parquet.NewGenericRowGroupReader[Row](buffer)
    70  	result := make([]Row, len(rows))
    71  	n, err := reader.Read(result)
    72  	if err != nil && !errors.Is(err, io.EOF) {
    73  		return err
    74  	}
    75  	if n < len(rows) {
    76  		return fmt.Errorf("not enough values were read: want=%d got=%d", len(rows), n)
    77  	}
    78  	if !reflect.DeepEqual(rows, result) {
    79  		return fmt.Errorf("rows mismatch:\nwant: %#v\ngot:  %#v", rows, result)
    80  	}
    81  	return nil
    82  }
    83  
    84  func setNullPointers[Row any](rows []Row) {
    85  	if len(rows) > 0 && reflect.TypeOf(rows[0]).Kind() == reflect.Pointer {
    86  		for i := range rows {
    87  			v := reflect.ValueOf(&rows[i]).Elem()
    88  			if v.IsNil() {
    89  				v.Set(reflect.New(v.Type().Elem()))
    90  			}
    91  		}
    92  	}
    93  }
    94  
    95  type generator[T any] interface {
    96  	generate(*rand.Rand) T
    97  }
    98  
    99  func BenchmarkGenericBuffer(b *testing.B) {
   100  	benchmarkGenericBuffer[benchmarkRowType](b)
   101  	benchmarkGenericBuffer[booleanColumn](b)
   102  	benchmarkGenericBuffer[int32Column](b)
   103  	benchmarkGenericBuffer[int64Column](b)
   104  	benchmarkGenericBuffer[floatColumn](b)
   105  	benchmarkGenericBuffer[doubleColumn](b)
   106  	benchmarkGenericBuffer[byteArrayColumn](b)
   107  	benchmarkGenericBuffer[fixedLenByteArrayColumn](b)
   108  	benchmarkGenericBuffer[stringColumn](b)
   109  	benchmarkGenericBuffer[indexedStringColumn](b)
   110  	benchmarkGenericBuffer[uuidColumn](b)
   111  	benchmarkGenericBuffer[mapColumn](b)
   112  	benchmarkGenericBuffer[decimalColumn](b)
   113  	benchmarkGenericBuffer[contact](b)
   114  	benchmarkGenericBuffer[paddedBooleanColumn](b)
   115  	benchmarkGenericBuffer[optionalInt32Column](b)
   116  	benchmarkGenericBuffer[repeatedInt32Column](b)
   117  }
   118  
   119  func benchmarkGenericBuffer[Row generator[Row]](b *testing.B) {
   120  	var model Row
   121  	b.Run(reflect.TypeOf(model).Name(), func(b *testing.B) {
   122  		prng := rand.New(rand.NewSource(0))
   123  		rows := make([]Row, benchmarkNumRows)
   124  		for i := range rows {
   125  			rows[i] = rows[i].generate(prng)
   126  		}
   127  
   128  		b.Run("go1.17", func(b *testing.B) {
   129  			buffer := parquet.NewBuffer(parquet.SchemaOf(rows[0]))
   130  			i := 0
   131  			benchmarkRowsPerSecond(b, func() int {
   132  				for j := 0; j < benchmarkRowsPerStep; j++ {
   133  					if err := buffer.Write(&rows[i]); err != nil {
   134  						b.Fatal(err)
   135  					}
   136  				}
   137  
   138  				i += benchmarkRowsPerStep
   139  				i %= benchmarkNumRows
   140  
   141  				if i == 0 {
   142  					buffer.Reset()
   143  				}
   144  				return benchmarkRowsPerStep
   145  			})
   146  		})
   147  
   148  		b.Run("go1.18", func(b *testing.B) {
   149  			buffer := parquet.NewGenericBuffer[Row]()
   150  			i := 0
   151  			benchmarkRowsPerSecond(b, func() int {
   152  				n, err := buffer.Write(rows[i : i+benchmarkRowsPerStep])
   153  				if err != nil {
   154  					b.Fatal(err)
   155  				}
   156  
   157  				i += benchmarkRowsPerStep
   158  				i %= benchmarkNumRows
   159  
   160  				if i == 0 {
   161  					buffer.Reset()
   162  				}
   163  				return n
   164  			})
   165  		})
   166  	})
   167  }