github.com/apache/arrow/go/v14@v14.0.1/parquet/pqarrow/schema_test.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package pqarrow_test
    18  
    19  import (
    20  	"encoding/base64"
    21  	"testing"
    22  
    23  	"github.com/apache/arrow/go/v14/arrow"
    24  	"github.com/apache/arrow/go/v14/arrow/flight"
    25  	"github.com/apache/arrow/go/v14/arrow/ipc"
    26  	"github.com/apache/arrow/go/v14/arrow/memory"
    27  	"github.com/apache/arrow/go/v14/internal/types"
    28  	"github.com/apache/arrow/go/v14/parquet"
    29  	"github.com/apache/arrow/go/v14/parquet/metadata"
    30  	"github.com/apache/arrow/go/v14/parquet/pqarrow"
    31  	"github.com/apache/arrow/go/v14/parquet/schema"
    32  	"github.com/stretchr/testify/assert"
    33  	"github.com/stretchr/testify/require"
    34  )
    35  
    36  func TestGetOriginSchemaBase64(t *testing.T) {
    37  	uuidType := types.NewUUIDType()
    38  	md := arrow.NewMetadata([]string{"PARQUET:field_id"}, []string{"-1"})
    39  	extMd := arrow.NewMetadata([]string{ipc.ExtensionMetadataKeyName, ipc.ExtensionTypeKeyName, "PARQUET:field_id"}, []string{uuidType.Serialize(), uuidType.ExtensionName(), "-1"})
    40  	origArrSc := arrow.NewSchema([]arrow.Field{
    41  		{Name: "f1", Type: arrow.BinaryTypes.String, Metadata: md},
    42  		{Name: "f2", Type: arrow.PrimitiveTypes.Int64, Metadata: md},
    43  		{Name: "uuid", Type: uuidType, Metadata: extMd},
    44  	}, nil)
    45  
    46  	arrSerializedSc := flight.SerializeSchema(origArrSc, memory.DefaultAllocator)
    47  	if err := arrow.RegisterExtensionType(uuidType); err != nil {
    48  		t.Fatal(err)
    49  	}
    50  	defer arrow.UnregisterExtensionType(uuidType.ExtensionName())
    51  	pqschema, err := pqarrow.ToParquet(origArrSc, nil, pqarrow.DefaultWriterProps())
    52  	require.NoError(t, err)
    53  
    54  	tests := []struct {
    55  		name string
    56  		enc  *base64.Encoding
    57  	}{
    58  		{"raw", base64.RawStdEncoding},
    59  		{"std", base64.StdEncoding},
    60  	}
    61  
    62  	for _, tt := range tests {
    63  		t.Run(tt.name, func(t *testing.T) {
    64  			kv := metadata.NewKeyValueMetadata()
    65  			kv.Append("ARROW:schema", tt.enc.EncodeToString(arrSerializedSc))
    66  			arrsc, err := pqarrow.FromParquet(pqschema, nil, kv)
    67  			assert.NoError(t, err)
    68  			assert.True(t, origArrSc.Equal(arrsc))
    69  		})
    70  	}
    71  }
    72  
    73  func TestGetOriginSchemaUnregisteredExtension(t *testing.T) {
    74  	uuidType := types.NewUUIDType()
    75  	if err := arrow.RegisterExtensionType(uuidType); err != nil {
    76  		t.Fatal(err)
    77  	}
    78  
    79  	md := arrow.NewMetadata([]string{"PARQUET:field_id"}, []string{"-1"})
    80  	origArrSc := arrow.NewSchema([]arrow.Field{
    81  		{Name: "f1", Type: arrow.BinaryTypes.String, Metadata: md},
    82  		{Name: "f2", Type: arrow.PrimitiveTypes.Int64, Metadata: md},
    83  		{Name: "uuid", Type: uuidType, Metadata: md},
    84  	}, nil)
    85  	pqschema, err := pqarrow.ToParquet(origArrSc, nil, pqarrow.DefaultWriterProps())
    86  	require.NoError(t, err)
    87  
    88  	arrSerializedSc := flight.SerializeSchema(origArrSc, memory.DefaultAllocator)
    89  	kv := metadata.NewKeyValueMetadata()
    90  	kv.Append("ARROW:schema", base64.StdEncoding.EncodeToString(arrSerializedSc))
    91  
    92  	arrow.UnregisterExtensionType(uuidType.ExtensionName())
    93  	arrsc, err := pqarrow.FromParquet(pqschema, nil, kv)
    94  	require.NoError(t, err)
    95  
    96  	extMd := arrow.NewMetadata([]string{ipc.ExtensionMetadataKeyName, ipc.ExtensionTypeKeyName, "PARQUET:field_id"},
    97  		[]string{uuidType.Serialize(), uuidType.ExtensionName(), "-1"})
    98  	expArrSc := arrow.NewSchema([]arrow.Field{
    99  		{Name: "f1", Type: arrow.BinaryTypes.String, Metadata: md},
   100  		{Name: "f2", Type: arrow.PrimitiveTypes.Int64, Metadata: md},
   101  		{Name: "uuid", Type: uuidType.StorageType(), Metadata: extMd},
   102  	}, nil)
   103  
   104  	assert.Truef(t, expArrSc.Equal(arrsc), "expected: %s\ngot: %s", expArrSc, arrsc)
   105  }
   106  
   107  func TestToParquetWriterConfig(t *testing.T) {
   108  	origSc := arrow.NewSchema([]arrow.Field{
   109  		{Name: "f1", Type: arrow.BinaryTypes.String},
   110  		{Name: "f2", Type: arrow.PrimitiveTypes.Int64},
   111  	}, nil)
   112  
   113  	tests := []struct {
   114  		name           string
   115  		rootRepetition parquet.Repetition
   116  	}{
   117  		{"test1", parquet.Repetitions.Required},
   118  		{"test2", parquet.Repetitions.Repeated},
   119  	}
   120  
   121  	for _, tt := range tests {
   122  		t.Run(tt.name, func(t *testing.T) {
   123  
   124  			pqschema, err := pqarrow.ToParquet(origSc,
   125  				parquet.NewWriterProperties(
   126  					parquet.WithRootName(tt.name),
   127  					parquet.WithRootRepetition(tt.rootRepetition),
   128  				),
   129  				pqarrow.DefaultWriterProps())
   130  			require.NoError(t, err)
   131  
   132  			assert.Equal(t, tt.name, pqschema.Root().Name())
   133  			assert.Equal(t, tt.rootRepetition, pqschema.Root().RepetitionType())
   134  		})
   135  	}
   136  }
   137  
   138  func TestConvertArrowFlatPrimitives(t *testing.T) {
   139  	parquetFields := make(schema.FieldList, 0)
   140  	arrowFields := make([]arrow.Field, 0)
   141  
   142  	parquetFields = append(parquetFields, schema.NewBooleanNode("boolean", parquet.Repetitions.Required, -1))
   143  	arrowFields = append(arrowFields, arrow.Field{Name: "boolean", Type: arrow.FixedWidthTypes.Boolean, Nullable: false})
   144  
   145  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("int8", parquet.Repetitions.Required,
   146  		schema.NewIntLogicalType(8, true), parquet.Types.Int32, 0, -1)))
   147  	arrowFields = append(arrowFields, arrow.Field{Name: "int8", Type: arrow.PrimitiveTypes.Int8, Nullable: false})
   148  
   149  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("uint8", parquet.Repetitions.Required,
   150  		schema.NewIntLogicalType(8, false), parquet.Types.Int32, 0, -1)))
   151  	arrowFields = append(arrowFields, arrow.Field{Name: "uint8", Type: arrow.PrimitiveTypes.Uint8, Nullable: false})
   152  
   153  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("int16", parquet.Repetitions.Required,
   154  		schema.NewIntLogicalType(16, true), parquet.Types.Int32, 0, -1)))
   155  	arrowFields = append(arrowFields, arrow.Field{Name: "int16", Type: arrow.PrimitiveTypes.Int16, Nullable: false})
   156  
   157  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("uint16", parquet.Repetitions.Required,
   158  		schema.NewIntLogicalType(16, false), parquet.Types.Int32, 0, -1)))
   159  	arrowFields = append(arrowFields, arrow.Field{Name: "uint16", Type: arrow.PrimitiveTypes.Uint16, Nullable: false})
   160  
   161  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("int32", parquet.Repetitions.Required,
   162  		schema.NewIntLogicalType(32, true), parquet.Types.Int32, 0, -1)))
   163  	arrowFields = append(arrowFields, arrow.Field{Name: "int32", Type: arrow.PrimitiveTypes.Int32, Nullable: false})
   164  
   165  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("uint32", parquet.Repetitions.Required,
   166  		schema.NewIntLogicalType(32, false), parquet.Types.Int32, 0, -1)))
   167  	arrowFields = append(arrowFields, arrow.Field{Name: "uint32", Type: arrow.PrimitiveTypes.Uint32, Nullable: false})
   168  
   169  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("int64", parquet.Repetitions.Required,
   170  		schema.NewIntLogicalType(64, true), parquet.Types.Int64, 0, -1)))
   171  	arrowFields = append(arrowFields, arrow.Field{Name: "int64", Type: arrow.PrimitiveTypes.Int64, Nullable: false})
   172  
   173  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("uint64", parquet.Repetitions.Required,
   174  		schema.NewIntLogicalType(64, false), parquet.Types.Int64, 0, -1)))
   175  	arrowFields = append(arrowFields, arrow.Field{Name: "uint64", Type: arrow.PrimitiveTypes.Uint64, Nullable: false})
   176  
   177  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeConverted("timestamp", parquet.Repetitions.Required,
   178  		parquet.Types.Int64, schema.ConvertedTypes.TimestampMillis, 0, 0, 0, -1)))
   179  	arrowFields = append(arrowFields, arrow.Field{Name: "timestamp", Type: arrow.FixedWidthTypes.Timestamp_ms, Nullable: false})
   180  
   181  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeConverted("timestamp[us]", parquet.Repetitions.Required,
   182  		parquet.Types.Int64, schema.ConvertedTypes.TimestampMicros, 0, 0, 0, -1)))
   183  	arrowFields = append(arrowFields, arrow.Field{Name: "timestamp[us]", Type: arrow.FixedWidthTypes.Timestamp_us, Nullable: false})
   184  
   185  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("date", parquet.Repetitions.Required,
   186  		schema.DateLogicalType{}, parquet.Types.Int32, 0, -1)))
   187  	arrowFields = append(arrowFields, arrow.Field{Name: "date", Type: arrow.FixedWidthTypes.Date32, Nullable: false})
   188  
   189  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("date64", parquet.Repetitions.Required,
   190  		schema.NewTimestampLogicalType(true, schema.TimeUnitMillis), parquet.Types.Int64, 0, -1)))
   191  	arrowFields = append(arrowFields, arrow.Field{Name: "date64", Type: arrow.FixedWidthTypes.Date64, Nullable: false})
   192  
   193  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("time32", parquet.Repetitions.Required,
   194  		schema.NewTimeLogicalType(true, schema.TimeUnitMillis), parquet.Types.Int32, 0, -1)))
   195  	arrowFields = append(arrowFields, arrow.Field{Name: "time32", Type: arrow.FixedWidthTypes.Time32ms, Nullable: false})
   196  
   197  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("time64", parquet.Repetitions.Required,
   198  		schema.NewTimeLogicalType(true, schema.TimeUnitMicros), parquet.Types.Int64, 0, -1)))
   199  	arrowFields = append(arrowFields, arrow.Field{Name: "time64", Type: arrow.FixedWidthTypes.Time64us, Nullable: false})
   200  
   201  	parquetFields = append(parquetFields, schema.NewInt96Node("timestamp96", parquet.Repetitions.Required, -1))
   202  	arrowFields = append(arrowFields, arrow.Field{Name: "timestamp96", Type: arrow.FixedWidthTypes.Timestamp_ns, Nullable: false})
   203  
   204  	parquetFields = append(parquetFields, schema.NewFloat32Node("float", parquet.Repetitions.Optional, -1))
   205  	arrowFields = append(arrowFields, arrow.Field{Name: "float", Type: arrow.PrimitiveTypes.Float32, Nullable: true})
   206  
   207  	parquetFields = append(parquetFields, schema.NewFloat64Node("double", parquet.Repetitions.Optional, -1))
   208  	arrowFields = append(arrowFields, arrow.Field{Name: "double", Type: arrow.PrimitiveTypes.Float64, Nullable: true})
   209  
   210  	parquetFields = append(parquetFields, schema.NewByteArrayNode("binary", parquet.Repetitions.Optional, -1))
   211  	arrowFields = append(arrowFields, arrow.Field{Name: "binary", Type: arrow.BinaryTypes.Binary, Nullable: true})
   212  
   213  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("string", parquet.Repetitions.Optional,
   214  		schema.StringLogicalType{}, parquet.Types.ByteArray, 0, -1)))
   215  	arrowFields = append(arrowFields, arrow.Field{Name: "string", Type: arrow.BinaryTypes.String, Nullable: true})
   216  
   217  	parquetFields = append(parquetFields, schema.NewFixedLenByteArrayNode("flba-binary", parquet.Repetitions.Optional, 12, -1))
   218  	arrowFields = append(arrowFields, arrow.Field{Name: "flba-binary", Type: &arrow.FixedSizeBinaryType{ByteWidth: 12}, Nullable: true})
   219  
   220  	arrowSchema := arrow.NewSchema(arrowFields, nil)
   221  	parquetSchema := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema", parquet.Repetitions.Repeated, parquetFields, -1)))
   222  
   223  	result, err := pqarrow.ToParquet(arrowSchema, nil, pqarrow.NewArrowWriterProperties(pqarrow.WithDeprecatedInt96Timestamps(true)))
   224  	assert.NoError(t, err)
   225  	assert.True(t, parquetSchema.Equals(result))
   226  	for i := 0; i < parquetSchema.NumColumns(); i++ {
   227  		assert.Truef(t, parquetSchema.Column(i).Equals(result.Column(i)), "Column %d didn't match: %s", i, parquetSchema.Column(i).Name())
   228  	}
   229  }
   230  
   231  func TestConvertArrowParquetLists(t *testing.T) {
   232  	parquetFields := make(schema.FieldList, 0)
   233  	arrowFields := make([]arrow.Field, 0)
   234  
   235  	parquetFields = append(parquetFields, schema.MustGroup(schema.ListOf(schema.Must(schema.NewPrimitiveNodeLogical("my_list",
   236  		parquet.Repetitions.Optional, schema.StringLogicalType{}, parquet.Types.ByteArray, 0, -1)), parquet.Repetitions.Required, -1)))
   237  
   238  	arrowFields = append(arrowFields, arrow.Field{Name: "my_list", Type: arrow.ListOf(arrow.BinaryTypes.String)})
   239  
   240  	parquetFields = append(parquetFields, schema.MustGroup(schema.ListOf(schema.Must(schema.NewPrimitiveNodeLogical("my_list",
   241  		parquet.Repetitions.Optional, schema.StringLogicalType{}, parquet.Types.ByteArray, 0, -1)), parquet.Repetitions.Optional, -1)))
   242  
   243  	arrowFields = append(arrowFields, arrow.Field{Name: "my_list", Type: arrow.ListOf(arrow.BinaryTypes.String), Nullable: true})
   244  
   245  	arrowSchema := arrow.NewSchema(arrowFields, nil)
   246  	parquetSchema := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema", parquet.Repetitions.Repeated, parquetFields, -1)))
   247  
   248  	result, err := pqarrow.ToParquet(arrowSchema, nil, pqarrow.NewArrowWriterProperties(pqarrow.WithDeprecatedInt96Timestamps(true)))
   249  	assert.NoError(t, err)
   250  	assert.True(t, parquetSchema.Equals(result), parquetSchema.String(), result.String())
   251  	for i := 0; i < parquetSchema.NumColumns(); i++ {
   252  		assert.Truef(t, parquetSchema.Column(i).Equals(result.Column(i)), "Column %d didn't match: %s", i, parquetSchema.Column(i).Name())
   253  	}
   254  }
   255  
   256  func TestConvertArrowDecimals(t *testing.T) {
   257  	parquetFields := make(schema.FieldList, 0)
   258  	arrowFields := make([]arrow.Field, 0)
   259  
   260  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("decimal_8_4", parquet.Repetitions.Required,
   261  		schema.NewDecimalLogicalType(8, 4), parquet.Types.FixedLenByteArray, 4, -1)))
   262  	arrowFields = append(arrowFields, arrow.Field{Name: "decimal_8_4", Type: &arrow.Decimal128Type{Precision: 8, Scale: 4}})
   263  
   264  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("decimal_20_4", parquet.Repetitions.Required,
   265  		schema.NewDecimalLogicalType(20, 4), parquet.Types.FixedLenByteArray, 9, -1)))
   266  	arrowFields = append(arrowFields, arrow.Field{Name: "decimal_20_4", Type: &arrow.Decimal128Type{Precision: 20, Scale: 4}})
   267  
   268  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("decimal_77_4", parquet.Repetitions.Required,
   269  		schema.NewDecimalLogicalType(77, 4), parquet.Types.FixedLenByteArray, 34, -1)))
   270  	arrowFields = append(arrowFields, arrow.Field{Name: "decimal_77_4", Type: &arrow.Decimal128Type{Precision: 77, Scale: 4}})
   271  
   272  	arrowSchema := arrow.NewSchema(arrowFields, nil)
   273  	parquetSchema := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema", parquet.Repetitions.Repeated, parquetFields, -1)))
   274  
   275  	result, err := pqarrow.ToParquet(arrowSchema, nil, pqarrow.NewArrowWriterProperties(pqarrow.WithDeprecatedInt96Timestamps(true)))
   276  	assert.NoError(t, err)
   277  	assert.True(t, parquetSchema.Equals(result))
   278  	for i := 0; i < parquetSchema.NumColumns(); i++ {
   279  		assert.Truef(t, parquetSchema.Column(i).Equals(result.Column(i)), "Column %d didn't match: %s", i, parquetSchema.Column(i).Name())
   280  	}
   281  }
   282  
   283  func TestCoerceTImestampV1(t *testing.T) {
   284  	parquetFields := make(schema.FieldList, 0)
   285  	arrowFields := make([]arrow.Field, 0)
   286  
   287  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("timestamp", parquet.Repetitions.Required,
   288  		schema.NewTimestampLogicalTypeForce(false, schema.TimeUnitMicros), parquet.Types.Int64, 0, -1)))
   289  	arrowFields = append(arrowFields, arrow.Field{Name: "timestamp", Type: &arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: "EST"}})
   290  
   291  	arrowSchema := arrow.NewSchema(arrowFields, nil)
   292  	parquetSchema := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema", parquet.Repetitions.Repeated, parquetFields, -1)))
   293  
   294  	result, err := pqarrow.ToParquet(arrowSchema, parquet.NewWriterProperties(parquet.WithVersion(parquet.V1_0)), pqarrow.NewArrowWriterProperties(pqarrow.WithCoerceTimestamps(arrow.Microsecond)))
   295  	assert.NoError(t, err)
   296  	assert.True(t, parquetSchema.Equals(result))
   297  	for i := 0; i < parquetSchema.NumColumns(); i++ {
   298  		assert.Truef(t, parquetSchema.Column(i).Equals(result.Column(i)), "Column %d didn't match: %s", i, parquetSchema.Column(i).Name())
   299  	}
   300  }
   301  
   302  func TestAutoCoerceTImestampV1(t *testing.T) {
   303  	parquetFields := make(schema.FieldList, 0)
   304  	arrowFields := make([]arrow.Field, 0)
   305  
   306  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("timestamp", parquet.Repetitions.Required,
   307  		schema.NewTimestampLogicalTypeForce(false, schema.TimeUnitMicros), parquet.Types.Int64, 0, -1)))
   308  	arrowFields = append(arrowFields, arrow.Field{Name: "timestamp", Type: &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "EST"}})
   309  
   310  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("timestamp[ms]", parquet.Repetitions.Required,
   311  		schema.NewTimestampLogicalTypeForce(true, schema.TimeUnitMillis), parquet.Types.Int64, 0, -1)))
   312  	arrowFields = append(arrowFields, arrow.Field{Name: "timestamp[ms]", Type: &arrow.TimestampType{Unit: arrow.Second}})
   313  
   314  	arrowSchema := arrow.NewSchema(arrowFields, nil)
   315  	parquetSchema := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema", parquet.Repetitions.Repeated, parquetFields, -1)))
   316  
   317  	result, err := pqarrow.ToParquet(arrowSchema, parquet.NewWriterProperties(parquet.WithVersion(parquet.V1_0)), pqarrow.NewArrowWriterProperties())
   318  	assert.NoError(t, err)
   319  	assert.True(t, parquetSchema.Equals(result))
   320  	for i := 0; i < parquetSchema.NumColumns(); i++ {
   321  		assert.Truef(t, parquetSchema.Column(i).Equals(result.Column(i)), "Column %d didn't match: %s", i, parquetSchema.Column(i).Name())
   322  	}
   323  }
   324  
   325  func TestConvertArrowStruct(t *testing.T) {
   326  	parquetFields := make(schema.FieldList, 0)
   327  	arrowFields := make([]arrow.Field, 0)
   328  
   329  	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("leaf1", parquet.Repetitions.Optional, schema.NewIntLogicalType(32, true), parquet.Types.Int32, 0, -1)))
   330  	parquetFields = append(parquetFields, schema.Must(schema.NewGroupNode("outerGroup", parquet.Repetitions.Required, schema.FieldList{
   331  		schema.Must(schema.NewPrimitiveNodeLogical("leaf2", parquet.Repetitions.Optional, schema.NewIntLogicalType(32, true), parquet.Types.Int32, 0, -1)),
   332  		schema.Must(schema.NewGroupNode("innerGroup", parquet.Repetitions.Required, schema.FieldList{
   333  			schema.Must(schema.NewPrimitiveNodeLogical("leaf3", parquet.Repetitions.Optional, schema.NewIntLogicalType(32, true), parquet.Types.Int32, 0, -1)),
   334  		}, -1)),
   335  	}, -1)))
   336  
   337  	arrowFields = append(arrowFields, arrow.Field{Name: "leaf1", Type: arrow.PrimitiveTypes.Int32, Nullable: true})
   338  	arrowFields = append(arrowFields, arrow.Field{Name: "outerGroup", Type: arrow.StructOf(
   339  		arrow.Field{Name: "leaf2", Type: arrow.PrimitiveTypes.Int32, Nullable: true},
   340  		arrow.Field{Name: "innerGroup", Type: arrow.StructOf(
   341  			arrow.Field{Name: "leaf3", Type: arrow.PrimitiveTypes.Int32, Nullable: true},
   342  		)},
   343  	)})
   344  
   345  	arrowSchema := arrow.NewSchema(arrowFields, nil)
   346  	parquetSchema := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema", parquet.Repetitions.Repeated, parquetFields, -1)))
   347  
   348  	result, err := pqarrow.ToParquet(arrowSchema, nil, pqarrow.NewArrowWriterProperties())
   349  	assert.NoError(t, err)
   350  	assert.True(t, parquetSchema.Equals(result))
   351  	for i := 0; i < parquetSchema.NumColumns(); i++ {
   352  		assert.Truef(t, parquetSchema.Column(i).Equals(result.Column(i)), "Column %d didn't match: %s", i, parquetSchema.Column(i).Name())
   353  	}
   354  }
   355  
   356  func TestListStructBackwardCompatible(t *testing.T) {
   357  	// Set up old construction for list of struct, not using
   358  	// the 3-level encoding. Schema looks like:
   359  	//
   360  	//     required group field_id=-1 root {
   361  	//       optional group field_id=-1 answers (List) {
   362  	//		   repeated group field_id=-1 array {
   363  	//           optional byte_array field_id=-1 type (String);
   364  	//           optional byte_array field_id=-1 rdata (String);
   365  	//           optional byte_array field_id=-1 class (String);
   366  	//         }
   367  	//       }
   368  	//     }
   369  	//
   370  	// Instaed of the proper 3-level encoding which would be:
   371  	//
   372  	//     repeated group field_id=-1 schema {
   373  	//       optional group field_id=-1 answers (List) {
   374  	//         repeated group field_id=-1 list {
   375  	//           optional group field_id=-1 element {
   376  	//             optional byte_array field_id=-1 type (String);
   377  	//             optional byte_array field_id=-1 rdata (String);
   378  	//             optional byte_array field_id=-1 class (String);
   379  	//           }
   380  	//         }
   381  	//       }
   382  	//     }
   383  	//
   384  	pqSchema := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("root", parquet.Repetitions.Required, schema.FieldList{
   385  		schema.Must(schema.NewGroupNodeLogical("answers", parquet.Repetitions.Optional, schema.FieldList{
   386  			schema.Must(schema.NewGroupNode("array", parquet.Repetitions.Repeated, schema.FieldList{
   387  				schema.MustPrimitive(schema.NewPrimitiveNodeLogical("type", parquet.Repetitions.Optional,
   388  					schema.StringLogicalType{}, parquet.Types.ByteArray, -1, -1)),
   389  				schema.MustPrimitive(schema.NewPrimitiveNodeLogical("rdata", parquet.Repetitions.Optional,
   390  					schema.StringLogicalType{}, parquet.Types.ByteArray, -1, -1)),
   391  				schema.MustPrimitive(schema.NewPrimitiveNodeLogical("class", parquet.Repetitions.Optional,
   392  					schema.StringLogicalType{}, parquet.Types.ByteArray, -1, -1)),
   393  			}, -1)),
   394  		}, schema.NewListLogicalType(), -1)),
   395  	}, -1)))
   396  
   397  	meta := arrow.NewMetadata([]string{"PARQUET:field_id"}, []string{"-1"})
   398  	// desired equivalent arrow schema would be list<item: struct<type: utf8, rdata: utf8, class: utf8>>
   399  	arrowSchema := arrow.NewSchema(
   400  		[]arrow.Field{
   401  			{Name: "answers", Type: arrow.ListOfField(arrow.Field{
   402  				Name: "array", Type: arrow.StructOf(
   403  					arrow.Field{Name: "type", Type: arrow.BinaryTypes.String, Nullable: true, Metadata: meta},
   404  					arrow.Field{Name: "rdata", Type: arrow.BinaryTypes.String, Nullable: true, Metadata: meta},
   405  					arrow.Field{Name: "class", Type: arrow.BinaryTypes.String, Nullable: true, Metadata: meta},
   406  				), Nullable: true}), Nullable: true, Metadata: meta},
   407  		}, nil)
   408  
   409  	arrsc, err := pqarrow.FromParquet(pqSchema, nil, metadata.KeyValueMetadata{})
   410  	assert.NoError(t, err)
   411  	assert.True(t, arrowSchema.Equal(arrsc))
   412  }
   413  
   414  // TestUnsupportedTypes tests the error message for unsupported types. This test should be updated
   415  // when support for these types is added.
   416  func TestUnsupportedTypes(t *testing.T) {
   417  	unsupportedTypes := []struct {
   418  		typ arrow.DataType
   419  	}{
   420  		// Non-exhaustive list of unsupported types
   421  		{typ: &arrow.Float16Type{}},
   422  		{typ: &arrow.DurationType{}},
   423  		{typ: &arrow.DayTimeIntervalType{}},
   424  		{typ: &arrow.MonthIntervalType{}},
   425  		{typ: &arrow.MonthDayNanoIntervalType{}},
   426  		{typ: &arrow.DenseUnionType{}},
   427  		{typ: &arrow.SparseUnionType{}},
   428  	}
   429  	for _, tc := range unsupportedTypes {
   430  		t.Run(tc.typ.ID().String(), func(t *testing.T) {
   431  			arrowFields := make([]arrow.Field, 0)
   432  			arrowFields = append(arrowFields, arrow.Field{Name: "unsupported", Type: tc.typ, Nullable: true})
   433  			arrowSchema := arrow.NewSchema(arrowFields, nil)
   434  			_, err := pqarrow.ToParquet(arrowSchema, nil, pqarrow.NewArrowWriterProperties())
   435  			assert.ErrorIs(t, err, arrow.ErrNotImplemented)
   436  			assert.ErrorContains(t, err, "support for "+tc.typ.ID().String())
   437  		})
   438  	}
   439  }