github.com/fraugster/parquet-go@v0.12.0/types_test.go (about) 1 package goparquet 2 3 import ( 4 "bytes" 5 "io" 6 "math/rand" 7 "reflect" 8 "testing" 9 10 "github.com/fraugster/parquet-go/parquet" 11 12 "github.com/stretchr/testify/assert" 13 "github.com/stretchr/testify/require" 14 ) 15 16 func buildRandArray(count int, fn func() interface{}) []interface{} { 17 ret := make([]interface{}, count) 18 for i := range ret { 19 ret[i] = fn() 20 } 21 22 return ret 23 } 24 25 type encodingFixtures struct { 26 name string 27 enc valuesEncoder 28 dec valuesDecoder 29 rand func() interface{} 30 } 31 32 var ( 33 encFixtures = []encodingFixtures{ 34 { 35 name: "Int32Plain", 36 enc: &int32PlainEncoder{}, 37 dec: &int32PlainDecoder{}, 38 rand: func() interface{} { 39 return int32(rand.Int()) 40 }, 41 }, 42 { 43 name: "Int32Delta", 44 enc: &int32DeltaBPEncoder{deltaBitPackEncoder32: deltaBitPackEncoder32{blockSize: 128, miniBlockCount: 4}}, 45 dec: &int32DeltaBPDecoder{}, 46 rand: func() interface{} { 47 return int32(rand.Int()) 48 }, 49 }, 50 { 51 name: "Int64Plain", 52 enc: &int64PlainEncoder{}, 53 dec: &int64PlainDecoder{}, 54 rand: func() interface{} { 55 return rand.Int63() 56 }, 57 }, 58 { 59 name: "Int64Delta", 60 enc: &int64DeltaBPEncoder{deltaBitPackEncoder64: deltaBitPackEncoder64{blockSize: 128, miniBlockCount: 4}}, 61 dec: &int64DeltaBPDecoder{}, 62 rand: func() interface{} { 63 return rand.Int63() 64 }, 65 }, 66 { 67 name: "Int96Plain", 68 enc: &int96PlainEncoder{}, 69 dec: &int96PlainDecoder{}, 70 rand: func() interface{} { 71 var data [12]byte 72 for i := 0; i < 12; i++ { 73 data[i] = byte(rand.Intn(256)) 74 } 75 76 return data 77 }, 78 }, 79 { 80 name: "DoublePlain", 81 enc: &doublePlainEncoder{}, 82 dec: &doublePlainDecoder{}, 83 rand: func() interface{} { 84 return rand.Float64() 85 }, 86 }, 87 { 88 name: "FloatPlain", 89 enc: &floatPlainEncoder{}, 90 dec: &floatPlainDecoder{}, 91 rand: func() interface{} { 92 return rand.Float32() 93 }, 94 }, 95 { 96 name: "BooleanRLE", 97 enc: &booleanRLEEncoder{}, 98 dec: &booleanRLEDecoder{}, 99 rand: func() interface{} { 100 return rand.Int()%2 == 0 101 }, 102 }, 103 { 104 name: "BooleanPlain", 105 enc: &booleanPlainEncoder{}, 106 dec: &booleanPlainDecoder{}, 107 rand: func() interface{} { 108 return rand.Int()%2 == 0 109 }, 110 }, 111 /* 112 { 113 name: "DictionaryInt32", 114 enc: &dictEncoder{}, 115 dec: &dictDecoder{}, 116 rand: func() interface{} { 117 return rand.Int31n(100) 118 }, 119 }, 120 { 121 name: "DictionaryInt96", 122 enc: &dictEncoder{}, 123 dec: &dictDecoder{}, 124 rand: func() interface{} { 125 var data [12]byte 126 for i := 0; i < 12; i++ { 127 data[i] = byte(rand.Intn(10)) // limit the values 128 } 129 130 return data 131 }, 132 }, 133 */ 134 { 135 name: "ByteArrayFixedLen", 136 enc: &byteArrayPlainEncoder{length: 3}, 137 dec: &byteArrayPlainDecoder{length: 3}, 138 rand: func() interface{} { 139 return []byte{ 140 byte(rand.Intn(256)), 141 byte(rand.Intn(256)), 142 byte(rand.Intn(256)), 143 } 144 }, 145 }, 146 { 147 name: "ByteArrayPlain", 148 enc: &byteArrayPlainEncoder{}, 149 dec: &byteArrayPlainDecoder{}, 150 rand: func() interface{} { 151 l := rand.Intn(10) + 1 // no zero 152 ret := make([]byte, l) 153 for i := range ret { 154 ret[i] = byte(rand.Intn(256)) 155 } 156 return ret 157 }, 158 }, 159 { 160 name: "ByteArrayDeltaLen", 161 enc: &byteArrayDeltaLengthEncoder{}, 162 dec: &byteArrayDeltaLengthDecoder{}, 163 rand: func() interface{} { 164 l := rand.Intn(10) + 1 // no zero 165 ret := make([]byte, l) 166 for i := range ret { 167 ret[i] = byte(rand.Intn(256)) 168 } 169 return ret 170 }, 171 }, 172 { 173 name: "ByteArrayDelta", 174 enc: &byteArrayDeltaEncoder{}, 175 dec: &byteArrayDeltaDecoder{}, 176 rand: func() interface{} { 177 l := rand.Intn(10) + 1 // no zero 178 ret := make([]byte, l) 179 for i := range ret { 180 ret[i] = byte(rand.Intn(256)) 181 } 182 return ret 183 }, 184 }, 185 } 186 ) 187 188 func TestTypes(t *testing.T) { 189 bufLen := 1000 190 191 bufRead := bufLen + bufLen/2 192 193 for _, data := range encFixtures { 194 t.Run(data.name, func(t *testing.T) { 195 arr1 := buildRandArray(bufLen, data.rand) 196 arr2 := buildRandArray(bufLen, data.rand) 197 w := &bytes.Buffer{} 198 require.NoError(t, data.enc.init(w)) 199 require.NoError(t, data.enc.encodeValues(arr1)) 200 require.NoError(t, data.enc.encodeValues(arr2)) 201 require.NoError(t, data.enc.Close()) 202 var v []interface{} 203 if d, ok := data.enc.(dictValuesEncoder); ok { 204 v = d.getValues() 205 } 206 ret := make([]interface{}, bufRead) 207 r := bytes.NewReader(w.Bytes()) 208 if d, ok := data.dec.(dictValuesDecoder); ok { 209 d.setValues(v) 210 } 211 require.NoError(t, data.dec.init(r)) 212 n, err := data.dec.decodeValues(ret) 213 require.NoError(t, err) 214 require.Equal(t, bufRead, n) 215 require.Equal(t, ret[:bufLen], arr1) 216 //require.Equal(t, len(ret[bufRead:]), len(arr2[:bufRead-bufLen])) 217 require.Equal(t, ret[bufLen:], arr2[:bufRead-bufLen]) 218 n, err = data.dec.decodeValues(ret) 219 require.Equal(t, io.EOF, err) 220 require.Equal(t, ret[:n], arr2[bufRead-bufLen:]) 221 }) 222 } 223 } 224 225 func convertToInterface(arr interface{}) []interface{} { 226 v := reflect.ValueOf(arr) 227 ret := make([]interface{}, v.Len()) 228 229 for i := 0; i < v.Len(); i++ { 230 ret[i] = v.Index(i).Interface() 231 } 232 233 return ret 234 } 235 236 func getOne(arr interface{}) interface{} { 237 v := reflect.ValueOf(arr) 238 if v.Len() < 1 { 239 panic("no item in the array") 240 } 241 242 return v.Index(0).Interface() 243 } 244 245 type storeFixtures struct { 246 name string 247 store *ColumnStore 248 rand func(int) interface{} 249 } 250 251 var ( 252 stFixtures = []storeFixtures{ 253 { 254 name: "Int32Store", 255 store: mustColumnStore(NewInt32Store(parquet.Encoding_PLAIN, false, &ColumnParameters{})), 256 rand: func(n int) interface{} { 257 ret := make([]int32, n) 258 for i := range ret { 259 ret[i] = rand.Int31() 260 } 261 return ret 262 }, 263 }, 264 { 265 name: "Int64Store", 266 store: mustColumnStore(NewInt64Store(parquet.Encoding_PLAIN, false, &ColumnParameters{})), 267 rand: func(n int) interface{} { 268 ret := make([]int64, n) 269 for i := range ret { 270 ret[i] = rand.Int63() 271 } 272 return ret 273 }, 274 }, 275 { 276 name: "Float32Store", 277 store: mustColumnStore(NewFloatStore(parquet.Encoding_PLAIN, false, &ColumnParameters{})), 278 rand: func(n int) interface{} { 279 ret := make([]float32, n) 280 for i := range ret { 281 ret[i] = rand.Float32() 282 } 283 return ret 284 }, 285 }, 286 { 287 name: "Float64Store", 288 store: mustColumnStore(NewDoubleStore(parquet.Encoding_PLAIN, false, &ColumnParameters{})), 289 rand: func(n int) interface{} { 290 ret := make([]float64, n) 291 for i := range ret { 292 ret[i] = rand.Float64() 293 } 294 return ret 295 }, 296 }, 297 { 298 name: "Int96Store", 299 store: mustColumnStore(NewInt96Store(parquet.Encoding_PLAIN, false, &ColumnParameters{})), 300 rand: func(n int) interface{} { 301 var data = make([][12]byte, n) 302 for c := 0; c < n; c++ { 303 for i := 0; i < 12; i++ { 304 data[c][i] = byte(rand.Intn(255)) 305 } 306 } 307 return data 308 }, 309 }, 310 { 311 name: "BooleanStore", 312 store: mustColumnStore(NewBooleanStore(parquet.Encoding_PLAIN, &ColumnParameters{})), 313 rand: func(n int) interface{} { 314 ret := make([]bool, n) 315 for i := range ret { 316 ret[i] = rand.Int()%2 == 0 317 } 318 return ret 319 }, 320 }, 321 } 322 ) 323 324 func mustColumnStore(store *ColumnStore, err error) *ColumnStore { 325 if err != nil { 326 panic(err) 327 } 328 329 return store 330 } 331 332 func TestStores(t *testing.T) { 333 for _, fix := range stFixtures { 334 t.Run(fix.name, func(t *testing.T) { 335 st := fix.store 336 randArr := fix.rand 337 338 st.reset(parquet.FieldRepetitionType_REPEATED, 10, 10) 339 340 data := randArr(3) 341 err := st.add(data, 3, 3, 0) 342 require.NoError(t, err) 343 344 assert.Equal(t, convertToInterface(data), st.values.getValues()) 345 // Field is not Required, so def level should be one more 346 assert.Equal(t, []int32{4, 4, 4}, st.dLevels.toArray()) 347 // Field is repeated so the rep level (except for the first one which is the new record) 348 // should be one more 349 assert.Equal(t, []int32{0, 4, 4}, st.rLevels.toArray()) 350 351 err = st.add(randArr(0), 3, 3, 0) 352 require.NoError(t, err) 353 // No Reset 354 assert.Equal(t, convertToInterface(data), st.values.getValues()) 355 // The new field is nil 356 assert.Equal(t, []int32{4, 4, 4, 3}, st.dLevels.toArray()) 357 assert.Equal(t, []int32{0, 4, 4, 0}, st.rLevels.toArray()) 358 359 // One record 360 data = randArr(1) 361 st.reset(parquet.FieldRepetitionType_REQUIRED, 10, 10) 362 err = st.add(getOne(data), 3, 3, 0) 363 require.NoError(t, err) 364 365 assert.Equal(t, convertToInterface(data), st.values.getValues()) 366 // Field is Required, so def level should be exact 367 assert.Equal(t, []int32{3}, st.dLevels.toArray()) 368 assert.Equal(t, []int32{0}, st.rLevels.toArray()) 369 370 data2 := randArr(1) 371 err = st.add(getOne(data2), 3, 3, 10) 372 require.NoError(t, err) 373 // No reset 374 dArr := []interface{}{getOne(data), getOne(data2)} 375 assert.Equal(t, dArr, st.values.getValues()) 376 // Field is Required, so def level should be exact 377 assert.Equal(t, []int32{3, 3}, st.dLevels.toArray()) 378 // rLevel is more than max, so its max now 379 assert.Equal(t, []int32{0, 3}, st.rLevels.toArray()) 380 381 // empty array had same effect as nil in repeated, but not in required 382 err = st.add(randArr(0), 3, 3, 10) 383 assert.Error(t, err) 384 385 // Just exact type and nil 386 err = st.add(struct{}{}, 3, 3, 0) 387 assert.Error(t, err) 388 389 err = st.add(nil, 3, 3, 0) 390 assert.NoError(t, err) 391 392 assert.Equal(t, dArr, st.values.getValues()) 393 394 // Field is Required, so def level should be exact 395 assert.Equal(t, []int32{3, 3, 3}, st.dLevels.toArray()) 396 // rLevel is more than max, so its max now 397 assert.Equal(t, []int32{0, 3, 0}, st.rLevels.toArray()) 398 }) 399 } 400 }