github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/reader_test.go (about)

     1  package parquet_test
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"math"
     9  	"math/rand"
    10  	"os"
    11  	"reflect"
    12  	"testing"
    13  
    14  	"github.com/parquet-go/parquet-go"
    15  	"github.com/parquet-go/parquet-go/internal/quick"
    16  )
    17  
    18  func TestGenericReader(t *testing.T) {
    19  	testGenericReader[booleanColumn](t)
    20  	testGenericReader[int32Column](t)
    21  	testGenericReader[int64Column](t)
    22  	testGenericReader[int96Column](t)
    23  	testGenericReader[floatColumn](t)
    24  	testGenericReader[doubleColumn](t)
    25  	testGenericReader[byteArrayColumn](t)
    26  	testGenericReader[fixedLenByteArrayColumn](t)
    27  	testGenericReader[stringColumn](t)
    28  	testGenericReader[indexedStringColumn](t)
    29  	testGenericReader[uuidColumn](t)
    30  	testGenericReader[timeColumn](t)
    31  	testGenericReader[timeInMillisColumn](t)
    32  	testGenericReader[mapColumn](t)
    33  	testGenericReader[decimalColumn](t)
    34  	testGenericReader[addressBook](t)
    35  	testGenericReader[contact](t)
    36  	testGenericReader[listColumn2](t)
    37  	testGenericReader[listColumn1](t)
    38  	testGenericReader[listColumn0](t)
    39  	testGenericReader[nestedListColumn1](t)
    40  	testGenericReader[nestedListColumn](t)
    41  	testGenericReader[*contact](t)
    42  	testGenericReader[paddedBooleanColumn](t)
    43  	testGenericReader[optionalInt32Column](t)
    44  	testGenericReader[repeatedInt32Column](t)
    45  }
    46  
    47  func testGenericReader[Row any](t *testing.T) {
    48  	var model Row
    49  	t.Run(reflect.TypeOf(model).Name(), func(t *testing.T) {
    50  		err := quickCheck(func(rows []Row) bool {
    51  			if len(rows) == 0 {
    52  				return true // TODO: fix support for parquet files with zero rows
    53  			}
    54  			if err := testGenericReaderRows(rows); err != nil {
    55  				t.Error(err)
    56  				return false
    57  			}
    58  			return true
    59  		})
    60  		if err != nil {
    61  			t.Error(err)
    62  		}
    63  	})
    64  }
    65  
    66  func testGenericReaderRows[Row any](rows []Row) error {
    67  	setNullPointers(rows)
    68  	buffer := new(bytes.Buffer)
    69  	writer := parquet.NewGenericWriter[Row](buffer)
    70  	_, err := writer.Write(rows)
    71  	if err != nil {
    72  		return err
    73  	}
    74  	if err := writer.Close(); err != nil {
    75  		return err
    76  	}
    77  	reader := parquet.NewGenericReader[Row](bytes.NewReader(buffer.Bytes()))
    78  	result := make([]Row, len(rows))
    79  	n, err := reader.Read(result)
    80  	if err != nil && !errors.Is(err, io.EOF) {
    81  		return err
    82  	}
    83  	if n < len(rows) {
    84  		return fmt.Errorf("not enough values were read: want=%d got=%d", len(rows), n)
    85  	}
    86  	if !reflect.DeepEqual(rows, result) {
    87  		return fmt.Errorf("rows mismatch:\nwant: %+v\ngot: %+v", rows, result)
    88  	}
    89  	return nil
    90  }
    91  
    92  func TestIssue400(t *testing.T) {
    93  	type B struct {
    94  		Name string
    95  	}
    96  	type A struct {
    97  		B []B `parquet:",optional"`
    98  	}
    99  
   100  	b := new(bytes.Buffer)
   101  	w := parquet.NewGenericWriter[A](b)
   102  	expect := []A{
   103  		{
   104  			B: []B{
   105  				{
   106  					// 32 bytes random so we can see in the binary parquet if we
   107  					// actually wrote the value
   108  					Name: "9e7eb1f0-bbcc-43ec-bfad-a9fac1bb0feb",
   109  				},
   110  			},
   111  		},
   112  	}
   113  	_, err := w.Write(expect)
   114  	if err != nil {
   115  		t.Fatal(err)
   116  	}
   117  	if err = w.Close(); err != nil {
   118  		t.Fatal(err)
   119  	}
   120  
   121  	r := parquet.NewGenericReader[A](bytes.NewReader(b.Bytes()))
   122  	values := make([]A, 1)
   123  	_, err = r.Read(values)
   124  	if err != nil {
   125  		t.Fatal(err)
   126  	}
   127  	if !reflect.DeepEqual(expect[0], values[0]) {
   128  		t.Errorf("want %q got %q", values[0], expect[0])
   129  	}
   130  }
   131  
   132  func TestReadMinPageSize(t *testing.T) {
   133  	// NOTE: min page size is 307 for MyRow schema
   134  	t.Run("test read less than min page size", func(t *testing.T) { testReadMinPageSize(128, t) })
   135  	t.Run("test read equal to min page size", func(t *testing.T) { testReadMinPageSize(307, t) })
   136  	t.Run("test read more than min page size", func(t *testing.T) { testReadMinPageSize(384, t) })
   137  	// NOTE: num rows is 20,000
   138  	t.Run("test read equal to num rows", func(t *testing.T) { testReadMinPageSize(20_000, t) })
   139  	t.Run("test read more than num rows", func(t *testing.T) { testReadMinPageSize(25_000, t) })
   140  }
   141  
   142  func testReadMinPageSize(readSize int, t *testing.T) {
   143  	type MyRow struct {
   144  		ID    [16]byte `parquet:"id,delta,uuid"`
   145  		File  string   `parquet:"file,dict,zstd"`
   146  		Index int64    `parquet:"index,delta,zstd"`
   147  	}
   148  
   149  	numRows := 20_000
   150  	maxPageBytes := 5000
   151  
   152  	tmp, err := os.CreateTemp("/tmp", "*.parquet")
   153  	if err != nil {
   154  		t.Fatal("os.CreateTemp: ", err)
   155  	}
   156  	path := tmp.Name()
   157  	defer os.Remove(path)
   158  	t.Log("file:", path)
   159  
   160  	// The page buffer size ensures we get multiple pages out of this example.
   161  	w := parquet.NewGenericWriter[MyRow](tmp, parquet.PageBufferSize(maxPageBytes))
   162  	// Need to write 1 row at a time here as writing many at once disregards PageBufferSize option.
   163  	for i := 0; i < numRows; i++ {
   164  		row := MyRow{
   165  			ID:    [16]byte{15: byte(i)},
   166  			File:  "hi" + fmt.Sprint(i),
   167  			Index: int64(i),
   168  		}
   169  		_, err := w.Write([]MyRow{row})
   170  		if err != nil {
   171  			t.Fatal("w.Write: ", err)
   172  		}
   173  		// Flush writes rows as row group. 4 total (20k/5k) in this file.
   174  		if (i+1)%maxPageBytes == 0 {
   175  			err = w.Flush()
   176  			if err != nil {
   177  				t.Fatal("w.Flush: ", err)
   178  			}
   179  		}
   180  	}
   181  	err = w.Close()
   182  	if err != nil {
   183  		t.Fatal("w.Close: ", err)
   184  	}
   185  	err = tmp.Close()
   186  	if err != nil {
   187  		t.Fatal("tmp.Close: ", err)
   188  	}
   189  
   190  	file, err := os.Open(path)
   191  	if err != nil {
   192  		t.Fatal("os.Open", err)
   193  	}
   194  	reader := parquet.NewGenericReader[MyRow](file)
   195  	read := int64(0)
   196  	nRows := reader.NumRows()
   197  	rows := make([]MyRow, 0, nRows)
   198  	buf := make([]MyRow, readSize) // NOTE: min page size is 307 for MyRow schema
   199  
   200  	for read < nRows {
   201  		num, err := reader.Read(buf)
   202  		read += int64(num)
   203  		if err != nil && !errors.Is(err, io.EOF) {
   204  			t.Fatal("Read:", err)
   205  		}
   206  		rows = append(rows, buf...)
   207  	}
   208  
   209  	if err := reader.Close(); err != nil {
   210  		t.Fatal("Close", err)
   211  	}
   212  
   213  	if len(rows) < numRows {
   214  		t.Fatalf("not enough values were read: want=%d got=%d", len(rows), numRows)
   215  	}
   216  	for i, row := range rows[:numRows] {
   217  		id := [16]byte{15: byte(i)}
   218  		file := "hi" + fmt.Sprint(i)
   219  		index := int64(i)
   220  
   221  		if row.ID != id || row.File != file || row.Index != index {
   222  			t.Fatalf("rows mismatch at index: %d got: %+v", i, row)
   223  		}
   224  	}
   225  }
   226  
   227  func BenchmarkGenericReader(b *testing.B) {
   228  	benchmarkGenericReader[benchmarkRowType](b)
   229  	benchmarkGenericReader[booleanColumn](b)
   230  	benchmarkGenericReader[int32Column](b)
   231  	benchmarkGenericReader[int64Column](b)
   232  	benchmarkGenericReader[floatColumn](b)
   233  	benchmarkGenericReader[doubleColumn](b)
   234  	benchmarkGenericReader[byteArrayColumn](b)
   235  	benchmarkGenericReader[fixedLenByteArrayColumn](b)
   236  	benchmarkGenericReader[stringColumn](b)
   237  	benchmarkGenericReader[indexedStringColumn](b)
   238  	benchmarkGenericReader[uuidColumn](b)
   239  	benchmarkGenericReader[timeColumn](b)
   240  	benchmarkGenericReader[timeInMillisColumn](b)
   241  	benchmarkGenericReader[mapColumn](b)
   242  	benchmarkGenericReader[decimalColumn](b)
   243  	benchmarkGenericReader[contact](b)
   244  	benchmarkGenericReader[paddedBooleanColumn](b)
   245  	benchmarkGenericReader[optionalInt32Column](b)
   246  }
   247  
   248  func benchmarkGenericReader[Row generator[Row]](b *testing.B) {
   249  	var model Row
   250  	b.Run(reflect.TypeOf(model).Name(), func(b *testing.B) {
   251  		prng := rand.New(rand.NewSource(0))
   252  		rows := make([]Row, benchmarkNumRows)
   253  		for i := range rows {
   254  			rows[i] = rows[i].generate(prng)
   255  		}
   256  
   257  		rowbuf := make([]Row, benchmarkRowsPerStep)
   258  		buffer := parquet.NewGenericBuffer[Row]()
   259  		buffer.Write(rows)
   260  
   261  		b.Run("go1.17", func(b *testing.B) {
   262  			reader := parquet.NewRowGroupReader(buffer)
   263  			benchmarkRowsPerSecond(b, func() int {
   264  				for i := range rowbuf {
   265  					if err := reader.Read(&rowbuf[i]); err != nil {
   266  						if err != io.EOF {
   267  							b.Fatal(err)
   268  						} else {
   269  							reader.Reset()
   270  						}
   271  					}
   272  				}
   273  				return len(rowbuf)
   274  			})
   275  		})
   276  
   277  		b.Run("go1.18", func(b *testing.B) {
   278  			reader := parquet.NewGenericRowGroupReader[Row](buffer)
   279  			benchmarkRowsPerSecond(b, func() int {
   280  				n, err := reader.Read(rowbuf)
   281  				if err != nil {
   282  					if err != io.EOF {
   283  						b.Fatal(err)
   284  					} else {
   285  						reader.Reset()
   286  					}
   287  				}
   288  				return n
   289  			})
   290  		})
   291  	})
   292  }
   293  
   294  func rowsOf(numRows int, model interface{}) rows {
   295  	prng := rand.New(rand.NewSource(0))
   296  	return randomRowsOf(prng, numRows, model)
   297  }
   298  
   299  func randomRowsOf(prng *rand.Rand, numRows int, model interface{}) rows {
   300  	typ := reflect.TypeOf(model)
   301  	rows := make(rows, numRows)
   302  	makeValue := quick.MakeValueFuncOf(typ)
   303  	for i := range rows {
   304  		v := reflect.New(typ).Elem()
   305  		makeValue(v, prng)
   306  		rows[i] = v.Interface()
   307  	}
   308  	return rows
   309  }
   310  
   311  var readerTests = []struct {
   312  	scenario string
   313  	model    interface{}
   314  }{
   315  	{
   316  		scenario: "BOOLEAN",
   317  		model:    booleanColumn{},
   318  	},
   319  
   320  	{
   321  		scenario: "INT32",
   322  		model:    int32Column{},
   323  	},
   324  
   325  	{
   326  		scenario: "INT64",
   327  		model:    int64Column{},
   328  	},
   329  
   330  	{
   331  		scenario: "INT96",
   332  		model:    int96Column{},
   333  	},
   334  
   335  	{
   336  		scenario: "FLOAT",
   337  		model:    floatColumn{},
   338  	},
   339  
   340  	{
   341  		scenario: "DOUBLE",
   342  		model:    doubleColumn{},
   343  	},
   344  
   345  	{
   346  		scenario: "BYTE_ARRAY",
   347  		model:    byteArrayColumn{},
   348  	},
   349  
   350  	{
   351  		scenario: "FIXED_LEN_BYTE_ARRAY",
   352  		model:    fixedLenByteArrayColumn{},
   353  	},
   354  
   355  	{
   356  		scenario: "STRING",
   357  		model:    stringColumn{},
   358  	},
   359  
   360  	{
   361  		scenario: "STRING (dict)",
   362  		model:    indexedStringColumn{},
   363  	},
   364  
   365  	{
   366  		scenario: "UUID",
   367  		model:    uuidColumn{},
   368  	},
   369  
   370  	{
   371  		scenario: "time.Time",
   372  		model:    timeColumn{},
   373  	},
   374  
   375  	{
   376  		scenario: "time.Time in ms",
   377  		model:    timeInMillisColumn{},
   378  	},
   379  
   380  	{
   381  		scenario: "DECIMAL",
   382  		model:    decimalColumn{},
   383  	},
   384  
   385  	{
   386  		scenario: "AddressBook",
   387  		model:    addressBook{},
   388  	},
   389  
   390  	{
   391  		scenario: "one optional level",
   392  		model:    listColumn2{},
   393  	},
   394  
   395  	{
   396  		scenario: "one repeated level",
   397  		model:    listColumn1{},
   398  	},
   399  
   400  	{
   401  		scenario: "two repeated levels",
   402  		model:    listColumn0{},
   403  	},
   404  
   405  	{
   406  		scenario: "three repeated levels",
   407  		model:    listColumn0{},
   408  	},
   409  
   410  	{
   411  		scenario: "nested lists",
   412  		model:    nestedListColumn{},
   413  	},
   414  
   415  	{
   416  		scenario: "key-value pairs",
   417  		model: struct {
   418  			KeyValuePairs map[utf8string]utf8string
   419  		}{},
   420  	},
   421  
   422  	{
   423  		scenario: "multiple key-value pairs",
   424  		model: struct {
   425  			KeyValuePairs0 map[utf8string]utf8string
   426  			KeyValuePairs1 map[utf8string]utf8string
   427  			KeyValuePairs2 map[utf8string]utf8string
   428  		}{},
   429  	},
   430  
   431  	{
   432  		scenario: "repeated key-value pairs",
   433  		model: struct {
   434  			RepeatedKeyValuePairs []map[utf8string]utf8string
   435  		}{},
   436  	},
   437  
   438  	{
   439  		scenario: "map of repeated values",
   440  		model: struct {
   441  			MapOfRepeated map[utf8string][]utf8string
   442  		}{},
   443  	},
   444  }
   445  
   446  func TestReader(t *testing.T) {
   447  	buf := new(bytes.Buffer)
   448  	file := bytes.NewReader(nil)
   449  
   450  	for _, test := range readerTests {
   451  		t.Run(test.scenario, func(t *testing.T) {
   452  			const N = 42
   453  
   454  			rowType := reflect.TypeOf(test.model)
   455  			rowPtr := reflect.New(rowType)
   456  			rowZero := reflect.Zero(rowType)
   457  			rowValue := rowPtr.Elem()
   458  
   459  			for n := 1; n < N; n++ {
   460  				t.Run(fmt.Sprintf("N=%d", n), func(t *testing.T) {
   461  					defer buf.Reset()
   462  					rows := rowsOf(n, test.model)
   463  
   464  					if err := writeParquetFileWithBuffer(buf, rows); err != nil {
   465  						t.Fatal(err)
   466  					}
   467  
   468  					file.Reset(buf.Bytes())
   469  					r := parquet.NewReader(file, parquet.SchemaOf(test.model))
   470  
   471  					for i, v := range rows {
   472  						if err := r.Read(rowPtr.Interface()); err != nil {
   473  							t.Fatal(err)
   474  						}
   475  						if !reflect.DeepEqual(rowValue.Interface(), v) {
   476  							t.Errorf("row mismatch at index %d\nwant = %+v\ngot  = %+v", i, v, rowValue.Interface())
   477  						}
   478  						rowValue.Set(rowZero)
   479  					}
   480  
   481  					if err := r.Read(rowPtr.Interface()); err != io.EOF {
   482  						t.Errorf("expected EOF after reading all values but got: %v", err)
   483  					}
   484  				})
   485  			}
   486  		})
   487  	}
   488  }
   489  
   490  func BenchmarkReaderReadType(b *testing.B) {
   491  	buf := new(bytes.Buffer)
   492  	file := bytes.NewReader(nil)
   493  
   494  	for _, test := range readerTests {
   495  		b.Run(test.scenario, func(b *testing.B) {
   496  			defer buf.Reset()
   497  			rows := rowsOf(benchmarkNumRows, test.model)
   498  
   499  			if err := writeParquetFile(buf, rows); err != nil {
   500  				b.Fatal(err)
   501  			}
   502  			file.Reset(buf.Bytes())
   503  			f, err := parquet.OpenFile(file, file.Size())
   504  			if err != nil {
   505  				b.Fatal(err)
   506  			}
   507  
   508  			rowType := reflect.TypeOf(test.model)
   509  			rowPtr := reflect.New(rowType)
   510  			rowZero := reflect.Zero(rowType)
   511  			rowValue := rowPtr.Elem()
   512  
   513  			r := parquet.NewReader(f)
   514  			p := rowPtr.Interface()
   515  
   516  			benchmarkRowsPerSecond(b, func() (n int) {
   517  				for i := 0; i < benchmarkRowsPerStep; i++ {
   518  					if err := r.Read(p); err != nil {
   519  						if err == io.EOF {
   520  							r.Reset()
   521  						} else {
   522  							b.Fatal(err)
   523  						}
   524  					}
   525  				}
   526  				rowValue.Set(rowZero)
   527  				return benchmarkRowsPerStep
   528  			})
   529  
   530  			b.SetBytes(int64(math.Ceil(benchmarkRowsPerStep * float64(file.Size()) / benchmarkNumRows)))
   531  		})
   532  	}
   533  }
   534  
   535  func BenchmarkReaderReadRow(b *testing.B) {
   536  	buf := new(bytes.Buffer)
   537  	file := bytes.NewReader(nil)
   538  
   539  	for _, test := range readerTests {
   540  		b.Run(test.scenario, func(b *testing.B) {
   541  			defer buf.Reset()
   542  			rows := rowsOf(benchmarkNumRows, test.model)
   543  
   544  			if err := writeParquetFile(buf, rows); err != nil {
   545  				b.Fatal(err)
   546  			}
   547  			file.Reset(buf.Bytes())
   548  			f, err := parquet.OpenFile(file, file.Size())
   549  			if err != nil {
   550  				b.Fatal(err)
   551  			}
   552  
   553  			r := parquet.NewReader(f)
   554  			rowbuf := make([]parquet.Row, benchmarkRowsPerStep)
   555  
   556  			benchmarkRowsPerSecond(b, func() int {
   557  				n, err := r.ReadRows(rowbuf)
   558  				if err != nil {
   559  					if err == io.EOF {
   560  						r.Reset()
   561  					} else {
   562  						b.Fatal(err)
   563  					}
   564  				}
   565  				return n
   566  			})
   567  
   568  			b.SetBytes(int64(math.Ceil(benchmarkRowsPerStep * float64(file.Size()) / benchmarkNumRows)))
   569  		})
   570  	}
   571  }
   572  
   573  func TestReaderReadSubset(t *testing.T) {
   574  	// In this example we'll write 3 columns to the file - X, Y, and Z, but
   575  	// we'll only read out the X and Y columns. Returns true if all writes
   576  	// and reads were successful, and false otherwise.
   577  	type Point3D struct{ X, Y, Z int64 }
   578  	type Point2D struct{ X, Y int64 }
   579  
   580  	err := quickCheck(func(points3D []Point3D) bool {
   581  		if len(points3D) == 0 {
   582  			return true
   583  		}
   584  		buf := new(bytes.Buffer)
   585  		err := writeParquetFile(buf, makeRows(points3D))
   586  		if err != nil {
   587  			t.Error(err)
   588  			return false
   589  		}
   590  		reader := parquet.NewReader(bytes.NewReader(buf.Bytes()))
   591  		for i := 0; ; i++ {
   592  			row := Point2D{}
   593  			err := reader.Read(&row)
   594  			if err != nil {
   595  				if err == io.EOF && i == len(points3D) {
   596  					break
   597  				}
   598  				t.Error(err)
   599  				return false
   600  			}
   601  			if row != (Point2D{X: points3D[i].X, Y: points3D[i].Y}) {
   602  				t.Errorf("points mismatch at row index %d: want=%v got=%v", i, points3D[i], row)
   603  				return false
   604  			}
   605  		}
   606  		return true
   607  	})
   608  	if err != nil {
   609  		t.Error(err)
   610  	}
   611  }
   612  
   613  func TestReaderSeekToRow(t *testing.T) {
   614  	type rowType struct {
   615  		Name utf8string `parquet:",dict"`
   616  	}
   617  
   618  	rows := rowsOf(10, rowType{})
   619  	buf := new(bytes.Buffer)
   620  	err := writeParquetFile(buf, rows)
   621  	if err != nil {
   622  		t.Fatal(err)
   623  	}
   624  
   625  	reader := parquet.NewReader(bytes.NewReader(buf.Bytes()))
   626  	for i := 0; i < 10; i++ {
   627  		if err := reader.SeekToRow(int64(i)); err != nil {
   628  			t.Fatalf("seek to row %d: %v", i, err)
   629  		}
   630  
   631  		row := new(rowType)
   632  		err := reader.Read(row)
   633  		if err != nil {
   634  			t.Fatalf("reading row %d: %v", i, err)
   635  		}
   636  
   637  		if *row != rows[i] {
   638  			t.Fatalf("row %d mismatch: got=%+v want=%+v", i, *row, rows[i])
   639  		}
   640  	}
   641  }
   642  
   643  func TestSeekToRowNoDict(t *testing.T) {
   644  	type rowType struct {
   645  		Name utf8string `parquet:","` // no dictionary encoding
   646  	}
   647  
   648  	// write samples to in-memory buffer
   649  	buf := new(bytes.Buffer)
   650  	schema := parquet.SchemaOf(new(rowType))
   651  	w := parquet.NewWriter(buf, schema)
   652  	sample := rowType{
   653  		Name: "foo1",
   654  	}
   655  	// write two rows
   656  	w.Write(sample)
   657  	sample.Name = "foo2"
   658  	w.Write(sample)
   659  	w.Close()
   660  
   661  	// create reader
   662  	r := parquet.NewReader(bytes.NewReader(buf.Bytes()))
   663  
   664  	// read second row
   665  	r.SeekToRow(1)
   666  	row := new(rowType)
   667  	err := r.Read(row)
   668  	if err != nil {
   669  		t.Fatalf("reading row: %v", err)
   670  	}
   671  	// fmt.Println(&sample, row)
   672  	if *row != sample {
   673  		t.Fatalf("read != write")
   674  	}
   675  }
   676  
   677  func TestSeekToRowReadAll(t *testing.T) {
   678  	type rowType struct {
   679  		Name utf8string `parquet:",dict"`
   680  	}
   681  
   682  	// write samples to in-memory buffer
   683  	buf := new(bytes.Buffer)
   684  	schema := parquet.SchemaOf(new(rowType))
   685  	w := parquet.NewWriter(buf, schema)
   686  	sample := rowType{
   687  		Name: "foo1",
   688  	}
   689  	// write two rows
   690  	w.Write(sample)
   691  	sample.Name = "foo2"
   692  	w.Write(sample)
   693  	w.Close()
   694  
   695  	// create reader
   696  	r := parquet.NewReader(bytes.NewReader(buf.Bytes()))
   697  
   698  	// read first row
   699  	r.SeekToRow(0)
   700  	row := new(rowType)
   701  	err := r.Read(row)
   702  	if err != nil {
   703  		t.Fatalf("reading row: %v", err)
   704  	}
   705  	// read second row
   706  	r.SeekToRow(1)
   707  	row = new(rowType)
   708  	err = r.Read(row)
   709  	if err != nil {
   710  		t.Fatalf("reading row: %v", err)
   711  	}
   712  	// fmt.Println(&sample, row)
   713  	if *row != sample {
   714  		t.Fatalf("read != write")
   715  	}
   716  }
   717  
   718  func TestSeekToRowDictReadSecond(t *testing.T) {
   719  	type rowType struct {
   720  		Name utf8string `parquet:",dict"`
   721  	}
   722  
   723  	// write samples to in-memory buffer
   724  	buf := new(bytes.Buffer)
   725  	schema := parquet.SchemaOf(new(rowType))
   726  	w := parquet.NewWriter(buf, schema)
   727  	sample := rowType{
   728  		Name: "foo1",
   729  	}
   730  	// write two rows
   731  	w.Write(sample)
   732  	sample.Name = "foo2"
   733  	w.Write(sample)
   734  	w.Close()
   735  
   736  	// create reader
   737  	r := parquet.NewReader(bytes.NewReader(buf.Bytes()))
   738  
   739  	// read second row
   740  	r.SeekToRow(1)
   741  	row := new(rowType)
   742  	err := r.Read(row)
   743  	if err != nil {
   744  		t.Fatalf("reading row: %v", err)
   745  	}
   746  	// fmt.Println(&sample, row)
   747  	if *row != sample {
   748  		t.Fatalf("read != write")
   749  	}
   750  }
   751  
   752  func TestSeekToRowDictReadMultiplePages(t *testing.T) {
   753  	type rowType struct {
   754  		Name utf8string `parquet:",dict"`
   755  	}
   756  
   757  	// write samples to in-memory buffer
   758  	buf := new(bytes.Buffer)
   759  	schema := parquet.SchemaOf(new(rowType))
   760  	w := parquet.NewWriter(buf, schema, &parquet.WriterConfig{
   761  		PageBufferSize: 10,
   762  	})
   763  	sample := rowType{
   764  		Name: "foo1",
   765  	}
   766  
   767  	// write enough rows to spill over a single page
   768  	for i := 0; i < 10; i++ {
   769  		w.Write(sample)
   770  	}
   771  	sample.Name = "foo2"
   772  	w.Write(sample)
   773  	w.Close()
   774  
   775  	// create reader
   776  	r := parquet.NewReader(bytes.NewReader(buf.Bytes()))
   777  
   778  	// read 11th row
   779  	r.SeekToRow(10)
   780  	row := new(rowType)
   781  	err := r.Read(row)
   782  	if err != nil {
   783  		t.Fatalf("reading row: %v", err)
   784  	}
   785  	if *row != sample {
   786  		t.Fatalf("read != write")
   787  	}
   788  }