github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/row_group_test.go (about) 1 package parquet_test 2 3 import ( 4 "bytes" 5 "io" 6 "reflect" 7 "sort" 8 "testing" 9 10 "github.com/parquet-go/parquet-go" 11 ) 12 13 func sortedRowGroup(options []parquet.RowGroupOption, rows ...interface{}) parquet.RowGroup { 14 buf := parquet.NewBuffer(options...) 15 for _, row := range rows { 16 buf.Write(row) 17 } 18 sort.Stable(buf) 19 return buf 20 } 21 22 type Person struct { 23 FirstName utf8string 24 LastName utf8string 25 Age int 26 } 27 28 type LastNameOnly struct { 29 LastName utf8string 30 } 31 32 func newPeopleBuffer(people []Person) parquet.RowGroup { 33 buffer := parquet.NewBuffer() 34 for i := range people { 35 buffer.Write(&people[i]) 36 } 37 return buffer 38 } 39 40 func newPeopleFile(people []Person) parquet.RowGroup { 41 buffer := new(bytes.Buffer) 42 writer := parquet.NewWriter(buffer) 43 for i := range people { 44 writer.Write(&people[i]) 45 } 46 writer.Close() 47 reader := bytes.NewReader(buffer.Bytes()) 48 f, err := parquet.OpenFile(reader, reader.Size()) 49 if err != nil { 50 panic(err) 51 } 52 return f.RowGroups()[0] 53 } 54 55 func TestSeekToRow(t *testing.T) { 56 for _, config := range []struct { 57 name string 58 newRowGroup func([]Person) parquet.RowGroup 59 }{ 60 {name: "buffer", newRowGroup: newPeopleBuffer}, 61 {name: "file", newRowGroup: newPeopleFile}, 62 } { 63 t.Run(config.name, func(t *testing.T) { testSeekToRow(t, config.newRowGroup) }) 64 } 65 } 66 67 func testSeekToRow(t *testing.T, newRowGroup func([]Person) parquet.RowGroup) { 68 err := quickCheck(func(people []Person) bool { 69 if len(people) == 0 { // TODO: fix creation of empty parquet files 70 return true 71 } 72 rowGroup := newRowGroup(people) 73 rows := rowGroup.Rows() 74 rbuf := make([]parquet.Row, 1) 75 pers := Person{} 76 schema := parquet.SchemaOf(&pers) 77 defer rows.Close() 78 79 for i := range people { 80 if err := rows.SeekToRow(int64(i)); err != nil { 81 t.Errorf("seeking to row %d: %+v", i, err) 82 return false 83 } 84 if _, err := rows.ReadRows(rbuf); err != nil { 85 t.Errorf("reading row %d: %+v", i, err) 86 return false 87 } 88 if err := schema.Reconstruct(&pers, rbuf[0]); err != nil { 89 t.Errorf("deconstructing row %d: %+v", i, err) 90 return false 91 } 92 if !reflect.DeepEqual(&pers, &people[i]) { 93 t.Errorf("row %d mismatch", i) 94 return false 95 } 96 } 97 98 return true 99 }) 100 if err != nil { 101 t.Error(err) 102 } 103 } 104 105 func selfRowGroup(rowGroup parquet.RowGroup) parquet.RowGroup { 106 return rowGroup 107 } 108 109 func fileRowGroup(rowGroup parquet.RowGroup) parquet.RowGroup { 110 buffer := new(bytes.Buffer) 111 writer := parquet.NewWriter(buffer) 112 if _, err := writer.WriteRowGroup(rowGroup); err != nil { 113 panic(err) 114 } 115 if err := writer.Close(); err != nil { 116 panic(err) 117 } 118 reader := bytes.NewReader(buffer.Bytes()) 119 f, err := parquet.OpenFile(reader, reader.Size()) 120 if err != nil { 121 panic(err) 122 } 123 g := f.RowGroups() 124 if len(g) > 0 { 125 return g[0] 126 } 127 // There is a test checking for a panic when merging empty row groups. One of 128 // the input is an empty row group which leads to this path. 129 // 130 // It is unnecessary to also return an empty row group here because the 131 // behavior is triggered by custom row group implementation. 132 // 133 // buffer scenario check should be sufficient to cover for the issue. 134 return nil 135 } 136 137 func TestWriteRowGroupClosesRows(t *testing.T) { 138 var rows []*wrappedRows 139 rg := wrappedRowGroup{ 140 RowGroup: newPeopleFile([]Person{{}}), 141 rowsCallback: func(r parquet.Rows) parquet.Rows { 142 wrapped := &wrappedRows{Rows: r} 143 rows = append(rows, wrapped) 144 return wrapped 145 }, 146 } 147 writer := parquet.NewWriter(io.Discard) 148 if _, err := writer.WriteRowGroup(rg); err != nil { 149 t.Fatal(err) 150 } 151 if err := writer.Close(); err != nil { 152 t.Fatal(err) 153 } 154 for _, r := range rows { 155 if !r.closed { 156 t.Fatal("rows not closed") 157 } 158 } 159 }