github.com/fraugster/parquet-go@v0.12.0/filereader_test.go (about)

     1  package goparquet
     2  
     3  import (
     4  	"bytes"
     5  	"io"
     6  	"math/rand"
     7  	"testing"
     8  
     9  	"github.com/fraugster/parquet-go/parquetschema"
    10  	"github.com/stretchr/testify/require"
    11  )
    12  
    13  func buildTestStream(t *testing.T) []byte {
    14  	schema, err := parquetschema.ParseSchemaDefinition(`message msg {
    15    required int64 a;
    16    required int64 b;
    17    optional group x {
    18      required int64 c;
    19      required int64 d;
    20    }
    21    required group y {
    22       required int64 e;
    23    }
    24  }
    25  `)
    26  	require.NoError(t, err)
    27  	buf := &bytes.Buffer{}
    28  	pw := NewFileWriter(buf, WithSchemaDefinition(schema))
    29  	for i := 0; i < 10000; i++ {
    30  		data := map[string]interface{}{
    31  			"a": rand.Int63(),
    32  			"b": rand.Int63(),
    33  			"x": map[string]interface{}{
    34  				"c": rand.Int63(),
    35  				"d": rand.Int63(),
    36  			},
    37  			"y": map[string]interface{}{
    38  				"e": rand.Int63(),
    39  			},
    40  		}
    41  		require.NoError(t, pw.AddData(data))
    42  		if i%100 == 0 {
    43  			require.NoError(t, pw.FlushRowGroup())
    44  		}
    45  	}
    46  	require.NoError(t, pw.Close())
    47  	return buf.Bytes()
    48  }
    49  
    50  func TestByteReaderSelected(t *testing.T) {
    51  	r := buildTestStream(t)
    52  	pr, err := NewFileReader(bytes.NewReader(r), "a")
    53  	require.NoError(t, err)
    54  
    55  	for {
    56  		data, err := pr.NextRow()
    57  		if err == io.EOF {
    58  			break
    59  		}
    60  		require.NoError(t, err)
    61  		require.Equal(t, 2, len(data))
    62  		_, ok := data["a"]
    63  		require.True(t, ok)
    64  		y, ok := data["y"]
    65  		require.True(t, ok)
    66  		require.Empty(t, y)
    67  	}
    68  }
    69  
    70  func TestByteReaderSelectedInner(t *testing.T) {
    71  	r := buildTestStream(t)
    72  	pr, err := NewFileReader(bytes.NewReader(r), "x.c")
    73  	require.NoError(t, err)
    74  
    75  	for {
    76  		data, err := pr.NextRow()
    77  		if err == io.EOF {
    78  			break
    79  		}
    80  		require.NoError(t, err)
    81  		require.Equal(t, 2, len(data))
    82  		x, ok := data["x"].(map[string]interface{})
    83  		require.True(t, ok)
    84  		require.Equal(t, 1, len(x))
    85  		y, ok := data["y"]
    86  		require.True(t, ok)
    87  		require.Empty(t, y)
    88  	}
    89  }
    90  
    91  func TestByteReaderSelectedInnerByColumnPath(t *testing.T) {
    92  	r := buildTestStream(t)
    93  	pr, err := NewFileReaderWithOptions(bytes.NewReader(r), WithColumnPaths(ColumnPath{"x", "c"}))
    94  	require.NoError(t, err)
    95  
    96  	for {
    97  		data, err := pr.NextRow()
    98  		if err == io.EOF {
    99  			break
   100  		}
   101  		require.NoError(t, err)
   102  		require.Equal(t, 2, len(data))
   103  		x, ok := data["x"].(map[string]interface{})
   104  		require.True(t, ok)
   105  		require.Equal(t, 1, len(x))
   106  		y, ok := data["y"]
   107  		require.True(t, ok)
   108  		require.Empty(t, y)
   109  	}
   110  }
   111  
   112  func TestByteReaderSelectedInnerFull(t *testing.T) {
   113  	r := buildTestStream(t)
   114  	pr, err := NewFileReader(bytes.NewReader(r), "x")
   115  	require.NoError(t, err)
   116  
   117  	require.NotNil(t, pr.GetColumnByName("x.c"))
   118  
   119  	for {
   120  		data, err := pr.NextRow()
   121  		if err == io.EOF {
   122  			break
   123  		}
   124  		require.NoError(t, err)
   125  		require.Equal(t, 2, len(data))
   126  		x, ok := data["x"].(map[string]interface{})
   127  		require.True(t, ok)
   128  		require.Equal(t, 2, len(x))
   129  		y, ok := data["y"]
   130  		require.True(t, ok)
   131  		require.Empty(t, y)
   132  	}
   133  }
   134  
   135  func TestByteReaderSelectedInnerFullByColumnPath(t *testing.T) {
   136  	r := buildTestStream(t)
   137  	pr, err := NewFileReaderWithOptions(bytes.NewReader(r), WithColumnPaths(ColumnPath{"x"}))
   138  	require.NoError(t, err)
   139  
   140  	require.NotNil(t, pr.GetColumnByPath(ColumnPath{"x", "c"}))
   141  
   142  	for {
   143  		data, err := pr.NextRow()
   144  		if err == io.EOF {
   145  			break
   146  		}
   147  		require.NoError(t, err)
   148  		require.Equal(t, 2, len(data))
   149  		x, ok := data["x"].(map[string]interface{})
   150  		require.True(t, ok)
   151  		require.Equal(t, 2, len(x))
   152  		y, ok := data["y"]
   153  		require.True(t, ok)
   154  		require.Empty(t, y)
   155  	}
   156  }
   157  
   158  func TestByteReaderSelectedInnerFullSetSelectedColumns(t *testing.T) {
   159  	r := buildTestStream(t)
   160  	pr, err := NewFileReaderWithOptions(bytes.NewReader(r))
   161  	require.NoError(t, err)
   162  
   163  	pr.SetSelectedColumns("x")
   164  
   165  	for {
   166  		data, err := pr.NextRow()
   167  		if err == io.EOF {
   168  			break
   169  		}
   170  		require.NoError(t, err)
   171  		require.Equal(t, 2, len(data))
   172  		x, ok := data["x"].(map[string]interface{})
   173  		require.True(t, ok)
   174  		require.Equal(t, 2, len(x))
   175  		y, ok := data["y"]
   176  		require.True(t, ok)
   177  		require.Empty(t, y)
   178  	}
   179  }
   180  
   181  func TestByteReaderSelectedInnerFullSetSelectedColumnsByPath(t *testing.T) {
   182  	r := buildTestStream(t)
   183  	pr, err := NewFileReaderWithOptions(bytes.NewReader(r))
   184  	require.NoError(t, err)
   185  
   186  	pr.SetSelectedColumnsByPath(ColumnPath{"x"})
   187  
   188  	for {
   189  		data, err := pr.NextRow()
   190  		if err == io.EOF {
   191  			break
   192  		}
   193  		require.NoError(t, err)
   194  		require.Equal(t, 2, len(data))
   195  		x, ok := data["x"].(map[string]interface{})
   196  		require.True(t, ok)
   197  		require.Equal(t, 2, len(x))
   198  		y, ok := data["y"]
   199  		require.True(t, ok)
   200  		require.Empty(t, y)
   201  	}
   202  }
   203  
   204  func TestIssue60(t *testing.T) {
   205  	sd, err := parquetschema.ParseSchemaDefinition(`message test {
   206  		required group population (LIST){
   207  			repeated group list {
   208  				optional int64 element;
   209  			}
   210  		}
   211  	}`)
   212  	require.NoError(t, err)
   213  
   214  	var buf bytes.Buffer
   215  	fw := NewFileWriter(&buf, WithSchemaDefinition(sd))
   216  
   217  	err = fw.AddData(map[string]interface{}{
   218  		"population": map[string]interface{}{
   219  			"list": []map[string]interface{}{
   220  				{"element": int64(23)},
   221  				{"element": nil},
   222  				{"element": int64(42)},
   223  			},
   224  		},
   225  	})
   226  	require.NoError(t, err)
   227  
   228  	require.NoError(t, fw.Close())
   229  
   230  	r, err := NewFileReader(bytes.NewReader(buf.Bytes()))
   231  	require.NoError(t, err)
   232  
   233  	row, err := r.NextRow()
   234  	require.NoError(t, err)
   235  
   236  	require.Equal(t, map[string]interface{}{
   237  		"population": map[string]interface{}{
   238  			"list": []map[string]interface{}{
   239  				{"element": int64(23)},
   240  				{},
   241  				{"element": int64(42)},
   242  			},
   243  		},
   244  	}, row)
   245  
   246  	t.Logf("row = %#v", row)
   247  }