github.com/fraugster/parquet-go@v0.12.0/parquet_compatibility_test.go (about)

     1  package goparquet
     2  
     3  import (
     4  	"context"
     5  	"encoding/csv"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"path/filepath"
    10  	"strconv"
    11  	"testing"
    12  
    13  	"github.com/stretchr/testify/assert"
    14  
    15  	"github.com/stretchr/testify/require"
    16  )
    17  
    18  func toCustomerMap(t *testing.T, data []string) map[string]interface{} {
    19  	require.Len(t, data, 8)
    20  	res := make(map[string]interface{})
    21  	var err error
    22  	cKey, err := strconv.ParseInt(data[0], 10, 0)
    23  	if err == nil {
    24  		res["c_custkey"] = cKey
    25  	}
    26  
    27  	res["c_name"] = []byte(data[1])
    28  	res["c_address"] = []byte(data[2])
    29  	cKey, err = strconv.ParseInt(data[3], 10, 0)
    30  	if err == nil {
    31  		res["c_nationkey"] = int32(cKey)
    32  	}
    33  	res["c_phone"] = []byte(data[4])
    34  	fl, err := strconv.ParseFloat(data[5], 64)
    35  	if err == nil {
    36  		res["c_acctbal"] = fl
    37  	}
    38  	res["c_mktsegment"] = []byte(data[6])
    39  	res["c_comment"] = []byte(data[7])
    40  
    41  	return res
    42  }
    43  
    44  func customerMapTest(parquet, csvFl string) func(t *testing.T) {
    45  	return func(t *testing.T) {
    46  		f, err := os.Open(parquet)
    47  		require.NoError(t, err)
    48  		defer f.Close()
    49  
    50  		f2, err := os.Open(csvFl)
    51  		require.NoError(t, err)
    52  		defer f2.Close()
    53  
    54  		r := csv.NewReader(f2)
    55  		r.Comma = '|'
    56  
    57  		reader, err := NewFileReader(f)
    58  		require.NoError(t, err)
    59  
    60  		for {
    61  			if err := reader.readRowGroup(context.Background()); err == io.EOF {
    62  				break
    63  			}
    64  			count := reader.schemaReader.rowGroupNumRecords()
    65  			for i := int64(0); i < count; i++ {
    66  				rec, err := r.Read()
    67  				require.NoError(t, err)
    68  				read, err := reader.schemaReader.getData()
    69  				require.NoError(t, err)
    70  				csvData := toCustomerMap(t, rec)
    71  				assert.Equal(t, csvData, read)
    72  			}
    73  		}
    74  	}
    75  }
    76  
    77  func TestCompatibility(t *testing.T) {
    78  	root := os.Getenv("PARQUET_COMPATIBILITY_REPO_ROOT")
    79  	if root == "" {
    80  		t.Skip("The PARQUET_COMPATIBILITY_REPO_ROOT is missing, skip the tests")
    81  	}
    82  
    83  	for _, v := range []string{"NONE", "GZIP", "SNAPPY"} {
    84  		pq := filepath.Join(root, "parquet-testdata", "impala", fmt.Sprintf("1.1.1-%s", v), "customer.impala.parquet")
    85  		cs := filepath.Join(root, "parquet-testdata", "tpch", "customer.csv")
    86  		t.Run(fmt.Sprintf("Customer %s", v), customerMapTest(pq, cs))
    87  	}
    88  }