github.com/apache/arrow/go/v16@v16.1.0/parquet/pqarrow/file_writer_test.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package pqarrow_test 18 19 import ( 20 "bytes" 21 "strings" 22 "testing" 23 24 "github.com/apache/arrow/go/v16/arrow" 25 "github.com/apache/arrow/go/v16/arrow/array" 26 "github.com/apache/arrow/go/v16/arrow/memory" 27 "github.com/apache/arrow/go/v16/parquet" 28 "github.com/apache/arrow/go/v16/parquet/pqarrow" 29 "github.com/stretchr/testify/assert" 30 "github.com/stretchr/testify/require" 31 ) 32 33 func TestFileWriterRowGroupNumRows(t *testing.T) { 34 schema := arrow.NewSchema([]arrow.Field{ 35 {Name: "one", Nullable: true, Type: arrow.PrimitiveTypes.Float64}, 36 {Name: "two", Nullable: true, Type: arrow.PrimitiveTypes.Float64}, 37 }, nil) 38 39 data := `[ 40 {"one": 1, "two": 2}, 41 {"one": 1, "two": null}, 42 {"one": null, "two": 2}, 43 {"one": null, "two": null} 44 ]` 45 record, _, err := array.RecordFromJSON(memory.DefaultAllocator, schema, strings.NewReader(data)) 46 require.NoError(t, err) 47 48 output := &bytes.Buffer{} 49 writerProps := parquet.NewWriterProperties(parquet.WithMaxRowGroupLength(100)) 50 writer, err := pqarrow.NewFileWriter(schema, output, writerProps, pqarrow.DefaultWriterProps()) 51 require.NoError(t, err) 52 53 require.NoError(t, writer.Write(record)) 54 numRows, err := writer.RowGroupNumRows() 55 require.NoError(t, err) 56 assert.Equal(t, 4, numRows) 57 require.NoError(t, writer.Close()) 58 } 59 60 func TestFileWriterNumRows(t *testing.T) { 61 schema := arrow.NewSchema([]arrow.Field{ 62 {Name: "one", Nullable: true, Type: arrow.PrimitiveTypes.Float64}, 63 {Name: "two", Nullable: true, Type: arrow.PrimitiveTypes.Float64}, 64 }, nil) 65 66 data := `[ 67 {"one": 1, "two": 2}, 68 {"one": 1, "two": null}, 69 {"one": null, "two": 2}, 70 {"one": null, "two": null} 71 ]` 72 record, _, err := array.RecordFromJSON(memory.DefaultAllocator, schema, strings.NewReader(data)) 73 require.NoError(t, err) 74 75 maxRowGroupLength := 2 76 77 output := &bytes.Buffer{} 78 writerProps := parquet.NewWriterProperties(parquet.WithMaxRowGroupLength(int64(maxRowGroupLength))) 79 writer, err := pqarrow.NewFileWriter(schema, output, writerProps, pqarrow.DefaultWriterProps()) 80 require.NoError(t, err) 81 82 require.NoError(t, writer.Write(record)) 83 rowGroupNumRows, err := writer.RowGroupNumRows() 84 require.NoError(t, err) 85 assert.Equal(t, maxRowGroupLength, rowGroupNumRows) 86 87 require.NoError(t, writer.Close()) 88 assert.Equal(t, 4, writer.NumRows()) 89 }