github.com/fraugster/parquet-go@v0.12.0/types_test.go (about)

     1  package goparquet
     2  
     3  import (
     4  	"bytes"
     5  	"io"
     6  	"math/rand"
     7  	"reflect"
     8  	"testing"
     9  
    10  	"github.com/fraugster/parquet-go/parquet"
    11  
    12  	"github.com/stretchr/testify/assert"
    13  	"github.com/stretchr/testify/require"
    14  )
    15  
    16  func buildRandArray(count int, fn func() interface{}) []interface{} {
    17  	ret := make([]interface{}, count)
    18  	for i := range ret {
    19  		ret[i] = fn()
    20  	}
    21  
    22  	return ret
    23  }
    24  
    25  type encodingFixtures struct {
    26  	name string
    27  	enc  valuesEncoder
    28  	dec  valuesDecoder
    29  	rand func() interface{}
    30  }
    31  
    32  var (
    33  	encFixtures = []encodingFixtures{
    34  		{
    35  			name: "Int32Plain",
    36  			enc:  &int32PlainEncoder{},
    37  			dec:  &int32PlainDecoder{},
    38  			rand: func() interface{} {
    39  				return int32(rand.Int())
    40  			},
    41  		},
    42  		{
    43  			name: "Int32Delta",
    44  			enc:  &int32DeltaBPEncoder{deltaBitPackEncoder32: deltaBitPackEncoder32{blockSize: 128, miniBlockCount: 4}},
    45  			dec:  &int32DeltaBPDecoder{},
    46  			rand: func() interface{} {
    47  				return int32(rand.Int())
    48  			},
    49  		},
    50  		{
    51  			name: "Int64Plain",
    52  			enc:  &int64PlainEncoder{},
    53  			dec:  &int64PlainDecoder{},
    54  			rand: func() interface{} {
    55  				return rand.Int63()
    56  			},
    57  		},
    58  		{
    59  			name: "Int64Delta",
    60  			enc:  &int64DeltaBPEncoder{deltaBitPackEncoder64: deltaBitPackEncoder64{blockSize: 128, miniBlockCount: 4}},
    61  			dec:  &int64DeltaBPDecoder{},
    62  			rand: func() interface{} {
    63  				return rand.Int63()
    64  			},
    65  		},
    66  		{
    67  			name: "Int96Plain",
    68  			enc:  &int96PlainEncoder{},
    69  			dec:  &int96PlainDecoder{},
    70  			rand: func() interface{} {
    71  				var data [12]byte
    72  				for i := 0; i < 12; i++ {
    73  					data[i] = byte(rand.Intn(256))
    74  				}
    75  
    76  				return data
    77  			},
    78  		},
    79  		{
    80  			name: "DoublePlain",
    81  			enc:  &doublePlainEncoder{},
    82  			dec:  &doublePlainDecoder{},
    83  			rand: func() interface{} {
    84  				return rand.Float64()
    85  			},
    86  		},
    87  		{
    88  			name: "FloatPlain",
    89  			enc:  &floatPlainEncoder{},
    90  			dec:  &floatPlainDecoder{},
    91  			rand: func() interface{} {
    92  				return rand.Float32()
    93  			},
    94  		},
    95  		{
    96  			name: "BooleanRLE",
    97  			enc:  &booleanRLEEncoder{},
    98  			dec:  &booleanRLEDecoder{},
    99  			rand: func() interface{} {
   100  				return rand.Int()%2 == 0
   101  			},
   102  		},
   103  		{
   104  			name: "BooleanPlain",
   105  			enc:  &booleanPlainEncoder{},
   106  			dec:  &booleanPlainDecoder{},
   107  			rand: func() interface{} {
   108  				return rand.Int()%2 == 0
   109  			},
   110  		},
   111  		/*
   112  			{
   113  				name: "DictionaryInt32",
   114  				enc:  &dictEncoder{},
   115  				dec:  &dictDecoder{},
   116  				rand: func() interface{} {
   117  					return rand.Int31n(100)
   118  				},
   119  			},
   120  			{
   121  				name: "DictionaryInt96",
   122  				enc:  &dictEncoder{},
   123  				dec:  &dictDecoder{},
   124  				rand: func() interface{} {
   125  					var data [12]byte
   126  					for i := 0; i < 12; i++ {
   127  						data[i] = byte(rand.Intn(10)) // limit the values
   128  					}
   129  
   130  					return data
   131  				},
   132  			},
   133  		*/
   134  		{
   135  			name: "ByteArrayFixedLen",
   136  			enc:  &byteArrayPlainEncoder{length: 3},
   137  			dec:  &byteArrayPlainDecoder{length: 3},
   138  			rand: func() interface{} {
   139  				return []byte{
   140  					byte(rand.Intn(256)),
   141  					byte(rand.Intn(256)),
   142  					byte(rand.Intn(256)),
   143  				}
   144  			},
   145  		},
   146  		{
   147  			name: "ByteArrayPlain",
   148  			enc:  &byteArrayPlainEncoder{},
   149  			dec:  &byteArrayPlainDecoder{},
   150  			rand: func() interface{} {
   151  				l := rand.Intn(10) + 1 // no zero
   152  				ret := make([]byte, l)
   153  				for i := range ret {
   154  					ret[i] = byte(rand.Intn(256))
   155  				}
   156  				return ret
   157  			},
   158  		},
   159  		{
   160  			name: "ByteArrayDeltaLen",
   161  			enc:  &byteArrayDeltaLengthEncoder{},
   162  			dec:  &byteArrayDeltaLengthDecoder{},
   163  			rand: func() interface{} {
   164  				l := rand.Intn(10) + 1 // no zero
   165  				ret := make([]byte, l)
   166  				for i := range ret {
   167  					ret[i] = byte(rand.Intn(256))
   168  				}
   169  				return ret
   170  			},
   171  		},
   172  		{
   173  			name: "ByteArrayDelta",
   174  			enc:  &byteArrayDeltaEncoder{},
   175  			dec:  &byteArrayDeltaDecoder{},
   176  			rand: func() interface{} {
   177  				l := rand.Intn(10) + 1 // no zero
   178  				ret := make([]byte, l)
   179  				for i := range ret {
   180  					ret[i] = byte(rand.Intn(256))
   181  				}
   182  				return ret
   183  			},
   184  		},
   185  	}
   186  )
   187  
   188  func TestTypes(t *testing.T) {
   189  	bufLen := 1000
   190  
   191  	bufRead := bufLen + bufLen/2
   192  
   193  	for _, data := range encFixtures {
   194  		t.Run(data.name, func(t *testing.T) {
   195  			arr1 := buildRandArray(bufLen, data.rand)
   196  			arr2 := buildRandArray(bufLen, data.rand)
   197  			w := &bytes.Buffer{}
   198  			require.NoError(t, data.enc.init(w))
   199  			require.NoError(t, data.enc.encodeValues(arr1))
   200  			require.NoError(t, data.enc.encodeValues(arr2))
   201  			require.NoError(t, data.enc.Close())
   202  			var v []interface{}
   203  			if d, ok := data.enc.(dictValuesEncoder); ok {
   204  				v = d.getValues()
   205  			}
   206  			ret := make([]interface{}, bufRead)
   207  			r := bytes.NewReader(w.Bytes())
   208  			if d, ok := data.dec.(dictValuesDecoder); ok {
   209  				d.setValues(v)
   210  			}
   211  			require.NoError(t, data.dec.init(r))
   212  			n, err := data.dec.decodeValues(ret)
   213  			require.NoError(t, err)
   214  			require.Equal(t, bufRead, n)
   215  			require.Equal(t, ret[:bufLen], arr1)
   216  			//require.Equal(t, len(ret[bufRead:]), len(arr2[:bufRead-bufLen]))
   217  			require.Equal(t, ret[bufLen:], arr2[:bufRead-bufLen])
   218  			n, err = data.dec.decodeValues(ret)
   219  			require.Equal(t, io.EOF, err)
   220  			require.Equal(t, ret[:n], arr2[bufRead-bufLen:])
   221  		})
   222  	}
   223  }
   224  
   225  func convertToInterface(arr interface{}) []interface{} {
   226  	v := reflect.ValueOf(arr)
   227  	ret := make([]interface{}, v.Len())
   228  
   229  	for i := 0; i < v.Len(); i++ {
   230  		ret[i] = v.Index(i).Interface()
   231  	}
   232  
   233  	return ret
   234  }
   235  
   236  func getOne(arr interface{}) interface{} {
   237  	v := reflect.ValueOf(arr)
   238  	if v.Len() < 1 {
   239  		panic("no item in the array")
   240  	}
   241  
   242  	return v.Index(0).Interface()
   243  }
   244  
   245  type storeFixtures struct {
   246  	name  string
   247  	store *ColumnStore
   248  	rand  func(int) interface{}
   249  }
   250  
   251  var (
   252  	stFixtures = []storeFixtures{
   253  		{
   254  			name:  "Int32Store",
   255  			store: mustColumnStore(NewInt32Store(parquet.Encoding_PLAIN, false, &ColumnParameters{})),
   256  			rand: func(n int) interface{} {
   257  				ret := make([]int32, n)
   258  				for i := range ret {
   259  					ret[i] = rand.Int31()
   260  				}
   261  				return ret
   262  			},
   263  		},
   264  		{
   265  			name:  "Int64Store",
   266  			store: mustColumnStore(NewInt64Store(parquet.Encoding_PLAIN, false, &ColumnParameters{})),
   267  			rand: func(n int) interface{} {
   268  				ret := make([]int64, n)
   269  				for i := range ret {
   270  					ret[i] = rand.Int63()
   271  				}
   272  				return ret
   273  			},
   274  		},
   275  		{
   276  			name:  "Float32Store",
   277  			store: mustColumnStore(NewFloatStore(parquet.Encoding_PLAIN, false, &ColumnParameters{})),
   278  			rand: func(n int) interface{} {
   279  				ret := make([]float32, n)
   280  				for i := range ret {
   281  					ret[i] = rand.Float32()
   282  				}
   283  				return ret
   284  			},
   285  		},
   286  		{
   287  			name:  "Float64Store",
   288  			store: mustColumnStore(NewDoubleStore(parquet.Encoding_PLAIN, false, &ColumnParameters{})),
   289  			rand: func(n int) interface{} {
   290  				ret := make([]float64, n)
   291  				for i := range ret {
   292  					ret[i] = rand.Float64()
   293  				}
   294  				return ret
   295  			},
   296  		},
   297  		{
   298  			name:  "Int96Store",
   299  			store: mustColumnStore(NewInt96Store(parquet.Encoding_PLAIN, false, &ColumnParameters{})),
   300  			rand: func(n int) interface{} {
   301  				var data = make([][12]byte, n)
   302  				for c := 0; c < n; c++ {
   303  					for i := 0; i < 12; i++ {
   304  						data[c][i] = byte(rand.Intn(255))
   305  					}
   306  				}
   307  				return data
   308  			},
   309  		},
   310  		{
   311  			name:  "BooleanStore",
   312  			store: mustColumnStore(NewBooleanStore(parquet.Encoding_PLAIN, &ColumnParameters{})),
   313  			rand: func(n int) interface{} {
   314  				ret := make([]bool, n)
   315  				for i := range ret {
   316  					ret[i] = rand.Int()%2 == 0
   317  				}
   318  				return ret
   319  			},
   320  		},
   321  	}
   322  )
   323  
   324  func mustColumnStore(store *ColumnStore, err error) *ColumnStore {
   325  	if err != nil {
   326  		panic(err)
   327  	}
   328  
   329  	return store
   330  }
   331  
   332  func TestStores(t *testing.T) {
   333  	for _, fix := range stFixtures {
   334  		t.Run(fix.name, func(t *testing.T) {
   335  			st := fix.store
   336  			randArr := fix.rand
   337  
   338  			st.reset(parquet.FieldRepetitionType_REPEATED, 10, 10)
   339  
   340  			data := randArr(3)
   341  			err := st.add(data, 3, 3, 0)
   342  			require.NoError(t, err)
   343  
   344  			assert.Equal(t, convertToInterface(data), st.values.getValues())
   345  			// Field is not Required, so def level should be one more
   346  			assert.Equal(t, []int32{4, 4, 4}, st.dLevels.toArray())
   347  			// Field is repeated so the rep level (except for the first one which is the new record)
   348  			// should be one more
   349  			assert.Equal(t, []int32{0, 4, 4}, st.rLevels.toArray())
   350  
   351  			err = st.add(randArr(0), 3, 3, 0)
   352  			require.NoError(t, err)
   353  			// No Reset
   354  			assert.Equal(t, convertToInterface(data), st.values.getValues())
   355  			// The new field is nil
   356  			assert.Equal(t, []int32{4, 4, 4, 3}, st.dLevels.toArray())
   357  			assert.Equal(t, []int32{0, 4, 4, 0}, st.rLevels.toArray())
   358  
   359  			// One record
   360  			data = randArr(1)
   361  			st.reset(parquet.FieldRepetitionType_REQUIRED, 10, 10)
   362  			err = st.add(getOne(data), 3, 3, 0)
   363  			require.NoError(t, err)
   364  
   365  			assert.Equal(t, convertToInterface(data), st.values.getValues())
   366  			// Field is Required, so def level should be exact
   367  			assert.Equal(t, []int32{3}, st.dLevels.toArray())
   368  			assert.Equal(t, []int32{0}, st.rLevels.toArray())
   369  
   370  			data2 := randArr(1)
   371  			err = st.add(getOne(data2), 3, 3, 10)
   372  			require.NoError(t, err)
   373  			// No reset
   374  			dArr := []interface{}{getOne(data), getOne(data2)}
   375  			assert.Equal(t, dArr, st.values.getValues())
   376  			// Field is Required, so def level should be exact
   377  			assert.Equal(t, []int32{3, 3}, st.dLevels.toArray())
   378  			// rLevel is more than max, so its max now
   379  			assert.Equal(t, []int32{0, 3}, st.rLevels.toArray())
   380  
   381  			// empty array had same effect as nil in repeated, but not in required
   382  			err = st.add(randArr(0), 3, 3, 10)
   383  			assert.Error(t, err)
   384  
   385  			// Just exact type and nil
   386  			err = st.add(struct{}{}, 3, 3, 0)
   387  			assert.Error(t, err)
   388  
   389  			err = st.add(nil, 3, 3, 0)
   390  			assert.NoError(t, err)
   391  
   392  			assert.Equal(t, dArr, st.values.getValues())
   393  
   394  			// Field is Required, so def level should be exact
   395  			assert.Equal(t, []int32{3, 3, 3}, st.dLevels.toArray())
   396  			// rLevel is more than max, so its max now
   397  			assert.Equal(t, []int32{0, 3, 0}, st.rLevels.toArray())
   398  		})
   399  	}
   400  }