github.com/fraugster/parquet-go@v0.12.0/parquet_compatibility_test.go (about) 1 package goparquet 2 3 import ( 4 "context" 5 "encoding/csv" 6 "fmt" 7 "io" 8 "os" 9 "path/filepath" 10 "strconv" 11 "testing" 12 13 "github.com/stretchr/testify/assert" 14 15 "github.com/stretchr/testify/require" 16 ) 17 18 func toCustomerMap(t *testing.T, data []string) map[string]interface{} { 19 require.Len(t, data, 8) 20 res := make(map[string]interface{}) 21 var err error 22 cKey, err := strconv.ParseInt(data[0], 10, 0) 23 if err == nil { 24 res["c_custkey"] = cKey 25 } 26 27 res["c_name"] = []byte(data[1]) 28 res["c_address"] = []byte(data[2]) 29 cKey, err = strconv.ParseInt(data[3], 10, 0) 30 if err == nil { 31 res["c_nationkey"] = int32(cKey) 32 } 33 res["c_phone"] = []byte(data[4]) 34 fl, err := strconv.ParseFloat(data[5], 64) 35 if err == nil { 36 res["c_acctbal"] = fl 37 } 38 res["c_mktsegment"] = []byte(data[6]) 39 res["c_comment"] = []byte(data[7]) 40 41 return res 42 } 43 44 func customerMapTest(parquet, csvFl string) func(t *testing.T) { 45 return func(t *testing.T) { 46 f, err := os.Open(parquet) 47 require.NoError(t, err) 48 defer f.Close() 49 50 f2, err := os.Open(csvFl) 51 require.NoError(t, err) 52 defer f2.Close() 53 54 r := csv.NewReader(f2) 55 r.Comma = '|' 56 57 reader, err := NewFileReader(f) 58 require.NoError(t, err) 59 60 for { 61 if err := reader.readRowGroup(context.Background()); err == io.EOF { 62 break 63 } 64 count := reader.schemaReader.rowGroupNumRecords() 65 for i := int64(0); i < count; i++ { 66 rec, err := r.Read() 67 require.NoError(t, err) 68 read, err := reader.schemaReader.getData() 69 require.NoError(t, err) 70 csvData := toCustomerMap(t, rec) 71 assert.Equal(t, csvData, read) 72 } 73 } 74 } 75 } 76 77 func TestCompatibility(t *testing.T) { 78 root := os.Getenv("PARQUET_COMPATIBILITY_REPO_ROOT") 79 if root == "" { 80 t.Skip("The PARQUET_COMPATIBILITY_REPO_ROOT is missing, skip the tests") 81 } 82 83 for _, v := range []string{"NONE", "GZIP", "SNAPPY"} { 84 pq := filepath.Join(root, "parquet-testdata", "impala", fmt.Sprintf("1.1.1-%s", v), "customer.impala.parquet") 85 cs := filepath.Join(root, "parquet-testdata", "tpch", "customer.csv") 86 t.Run(fmt.Sprintf("Customer %s", v), customerMapTest(pq, cs)) 87 } 88 }