github.com/apache/arrow/go/v14@v14.0.1/parquet/schema/schema_test.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package schema_test
    18  
    19  import (
    20  	"os"
    21  	"testing"
    22  
    23  	"github.com/apache/arrow/go/v14/parquet"
    24  	format "github.com/apache/arrow/go/v14/parquet/internal/gen-go/parquet"
    25  	"github.com/apache/arrow/go/v14/parquet/schema"
    26  	"github.com/apache/thrift/lib/go/thrift"
    27  	"github.com/stretchr/testify/assert"
    28  	"github.com/stretchr/testify/suite"
    29  )
    30  
    31  func TestColumnPath(t *testing.T) {
    32  	p := parquet.ColumnPath([]string{"toplevel", "leaf"})
    33  	assert.Equal(t, "toplevel.leaf", p.String())
    34  
    35  	p2 := parquet.ColumnPathFromString("toplevel.leaf")
    36  	assert.Equal(t, "toplevel.leaf", p2.String())
    37  
    38  	extend := p2.Extend("anotherlevel")
    39  	assert.Equal(t, "toplevel.leaf.anotherlevel", extend.String())
    40  }
    41  
    42  func NewPrimitive(name string, repetition format.FieldRepetitionType, typ format.Type, fieldID int32) *format.SchemaElement {
    43  	ret := &format.SchemaElement{
    44  		Name:           name,
    45  		RepetitionType: format.FieldRepetitionTypePtr(repetition),
    46  		Type:           format.TypePtr(typ),
    47  	}
    48  	if fieldID >= 0 {
    49  		ret.FieldID = &fieldID
    50  	}
    51  	return ret
    52  }
    53  
    54  func NewGroup(name string, repetition format.FieldRepetitionType, numChildren, fieldID int32) *format.SchemaElement {
    55  	ret := &format.SchemaElement{
    56  		Name:           name,
    57  		RepetitionType: format.FieldRepetitionTypePtr(repetition),
    58  		NumChildren:    &numChildren,
    59  	}
    60  	if fieldID >= 0 {
    61  		ret.FieldID = &fieldID
    62  	}
    63  	return ret
    64  }
    65  
    66  func TestSchemaNodes(t *testing.T) {
    67  	suite.Run(t, new(PrimitiveNodeTestSuite))
    68  	suite.Run(t, new(GroupNodeTestSuite))
    69  	suite.Run(t, new(SchemaConverterSuite))
    70  }
    71  
    72  type PrimitiveNodeTestSuite struct {
    73  	suite.Suite
    74  
    75  	name    string
    76  	fieldID int32
    77  	node    schema.Node
    78  }
    79  
    80  func (p *PrimitiveNodeTestSuite) SetupTest() {
    81  	p.name = "name"
    82  	p.fieldID = 5
    83  }
    84  
    85  func (p *PrimitiveNodeTestSuite) convert(elt *format.SchemaElement) {
    86  	p.node = schema.MustPrimitive(schema.PrimitiveNodeFromThrift(elt))
    87  	p.IsType(&schema.PrimitiveNode{}, p.node)
    88  }
    89  
    90  func (p *PrimitiveNodeTestSuite) TestAttrs() {
    91  	node1 := schema.NewInt32Node("foo" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
    92  	node2 := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("bar" /* name */, parquet.Repetitions.Optional, parquet.Types.ByteArray,
    93  		schema.ConvertedTypes.UTF8, 0 /* type len */, 0 /* precision */, 0 /* scale */, -1 /* fieldID */))
    94  
    95  	p.Equal("foo", node1.Name())
    96  	p.Equal(schema.Primitive, node1.Type())
    97  	p.Equal(schema.Primitive, node2.Type())
    98  
    99  	p.Equal(parquet.Repetitions.Repeated, node1.RepetitionType())
   100  	p.Equal(parquet.Repetitions.Optional, node2.RepetitionType())
   101  
   102  	p.Equal(parquet.Types.Int32, node1.PhysicalType())
   103  	p.Equal(parquet.Types.ByteArray, node2.PhysicalType())
   104  
   105  	p.Equal(schema.ConvertedTypes.None, node1.ConvertedType())
   106  	p.Equal(schema.ConvertedTypes.UTF8, node2.ConvertedType())
   107  }
   108  
   109  func (p *PrimitiveNodeTestSuite) TestFromParquet() {
   110  	p.Run("Optional Int32", func() {
   111  		elt := NewPrimitive(p.name, format.FieldRepetitionType_OPTIONAL, format.Type_INT32, p.fieldID)
   112  		p.convert(elt)
   113  
   114  		p.Equal(p.name, p.node.Name())
   115  		p.Equal(p.fieldID, p.node.FieldID())
   116  		p.Equal(parquet.Repetitions.Optional, p.node.RepetitionType())
   117  		p.Equal(parquet.Types.Int32, p.node.(*schema.PrimitiveNode).PhysicalType())
   118  		p.Equal(schema.ConvertedTypes.None, p.node.ConvertedType())
   119  	})
   120  
   121  	p.Run("LogicalType", func() {
   122  		elt := NewPrimitive(p.name, format.FieldRepetitionType_REQUIRED, format.Type_BYTE_ARRAY, p.fieldID)
   123  		elt.ConvertedType = format.ConvertedTypePtr(format.ConvertedType_UTF8)
   124  		p.convert(elt)
   125  
   126  		p.Equal(parquet.Repetitions.Required, p.node.RepetitionType())
   127  		p.Equal(parquet.Types.ByteArray, p.node.(*schema.PrimitiveNode).PhysicalType())
   128  		p.Equal(schema.ConvertedTypes.UTF8, p.node.ConvertedType())
   129  	})
   130  
   131  	p.Run("FixedLenByteArray", func() {
   132  		elt := NewPrimitive(p.name, format.FieldRepetitionType_OPTIONAL, format.Type_FIXED_LEN_BYTE_ARRAY, p.fieldID)
   133  		elt.TypeLength = thrift.Int32Ptr(16)
   134  		p.convert(elt)
   135  
   136  		p.Equal(p.name, p.node.Name())
   137  		p.Equal(p.fieldID, p.node.FieldID())
   138  		p.Equal(parquet.Repetitions.Optional, p.node.RepetitionType())
   139  		p.Equal(parquet.Types.FixedLenByteArray, p.node.(*schema.PrimitiveNode).PhysicalType())
   140  		p.Equal(16, p.node.(*schema.PrimitiveNode).TypeLength())
   141  	})
   142  
   143  	p.Run("convertedtype::decimal", func() {
   144  		elt := NewPrimitive(p.name, format.FieldRepetitionType_OPTIONAL, format.Type_FIXED_LEN_BYTE_ARRAY, p.fieldID)
   145  		elt.ConvertedType = format.ConvertedTypePtr(format.ConvertedType_DECIMAL)
   146  		elt.TypeLength = thrift.Int32Ptr(6)
   147  		elt.Scale = thrift.Int32Ptr(2)
   148  		elt.Precision = thrift.Int32Ptr(12)
   149  
   150  		p.convert(elt)
   151  		p.Equal(parquet.Types.FixedLenByteArray, p.node.(*schema.PrimitiveNode).PhysicalType())
   152  		p.Equal(schema.ConvertedTypes.Decimal, p.node.ConvertedType())
   153  		p.Equal(6, p.node.(*schema.PrimitiveNode).TypeLength())
   154  		p.EqualValues(2, p.node.(*schema.PrimitiveNode).DecimalMetadata().Scale)
   155  		p.EqualValues(12, p.node.(*schema.PrimitiveNode).DecimalMetadata().Precision)
   156  	})
   157  }
   158  
   159  func (p *PrimitiveNodeTestSuite) TestEquals() {
   160  	const fieldID = -1
   161  	node1 := schema.NewInt32Node("foo" /* name */, parquet.Repetitions.Required, fieldID)
   162  	node2 := schema.NewInt64Node("foo" /* name */, parquet.Repetitions.Required, fieldID)
   163  	node3 := schema.NewInt32Node("bar" /* name */, parquet.Repetitions.Required, fieldID)
   164  	node4 := schema.NewInt32Node("foo" /* name */, parquet.Repetitions.Optional, fieldID)
   165  	node5 := schema.NewInt32Node("foo" /* name */, parquet.Repetitions.Required, fieldID)
   166  
   167  	p.True(node1.Equals(node1))
   168  	p.False(node1.Equals(node2))
   169  	p.False(node1.Equals(node3))
   170  	p.False(node1.Equals(node4))
   171  	p.True(node1.Equals(node5))
   172  
   173  	flba1 := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("foo" /* name */, parquet.Repetitions.Required, parquet.Types.FixedLenByteArray,
   174  		schema.ConvertedTypes.Decimal, 12 /* type len */, 4 /* precision */, 2 /* scale */, fieldID))
   175  	flba2 := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("foo" /* name */, parquet.Repetitions.Required, parquet.Types.FixedLenByteArray,
   176  		schema.ConvertedTypes.Decimal, 1 /* type len */, 4 /* precision */, 2 /* scale */, fieldID))
   177  	flba2.SetTypeLength(12)
   178  
   179  	flba3 := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("foo" /* name */, parquet.Repetitions.Required, parquet.Types.FixedLenByteArray,
   180  		schema.ConvertedTypes.Decimal, 1 /* type len */, 4 /* precision */, 2 /* scale */, fieldID))
   181  	flba3.SetTypeLength(16)
   182  
   183  	flba4 := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("foo" /* name */, parquet.Repetitions.Required, parquet.Types.FixedLenByteArray,
   184  		schema.ConvertedTypes.Decimal, 12 /* type len */, 4 /* precision */, 0 /* scale */, fieldID))
   185  	flba5 := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("foo" /* name */, parquet.Repetitions.Required, parquet.Types.FixedLenByteArray,
   186  		schema.ConvertedTypes.None, 12 /* type len */, 4 /* precision */, 0 /* scale */, fieldID))
   187  
   188  	p.True(flba1.Equals(flba2))
   189  	p.False(flba1.Equals(flba3))
   190  	p.False(flba1.Equals(flba4))
   191  	p.False(flba1.Equals(flba5))
   192  }
   193  
   194  func (p *PrimitiveNodeTestSuite) TestPhysicalLogicalMapping() {
   195  	tests := []struct {
   196  		typ       parquet.Type
   197  		cnv       schema.ConvertedType
   198  		typLen    int
   199  		precision int
   200  		scale     int
   201  		shouldErr bool
   202  	}{
   203  		{parquet.Types.Int32, schema.ConvertedTypes.Int32, 0 /* type len */, 0 /* precision */, 0 /* scale */, false},
   204  		{parquet.Types.ByteArray, schema.ConvertedTypes.JSON, 0 /* type len */, 0 /* precision */, 0 /* scale */, false},
   205  		{parquet.Types.Int32, schema.ConvertedTypes.JSON, 0 /* type len */, 0 /* precision */, 0 /* scale */, true},
   206  		{parquet.Types.Int64, schema.ConvertedTypes.TimestampMillis, 0 /* type len */, 0 /* precision */, 0 /* scale */, false},
   207  		{parquet.Types.Int32, schema.ConvertedTypes.Int64, 0 /* type len */, 0 /* precision */, 0 /* scale */, true},
   208  		{parquet.Types.ByteArray, schema.ConvertedTypes.Int8, 0 /* type len */, 0 /* precision */, 0 /* scale */, true},
   209  		{parquet.Types.ByteArray, schema.ConvertedTypes.Interval, 0 /* type len */, 0 /* precision */, 0 /* scale */, true},
   210  		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Enum, 0 /* type len */, 0 /* precision */, 0 /* scale */, true},
   211  		{parquet.Types.ByteArray, schema.ConvertedTypes.Enum, 0 /* type len */, 0 /* precision */, 0 /* scale */, false},
   212  		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Decimal, 0 /* type len */, 2 /* precision */, 4 /* scale */, true},
   213  		{parquet.Types.Float, schema.ConvertedTypes.Decimal, 0 /* type len */, 2 /* precision */, 4 /* scale */, true},
   214  		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Decimal, 0 /* type len */, 4 /* precision */, 0 /* scale */, true},
   215  		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Decimal, 10 /* type len */, 4 /* precision */, -1 /* scale */, true},
   216  		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Decimal, 10 /* type len */, 2 /* precision */, 4 /* scale */, true},
   217  		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Decimal, 10 /* type len */, 6 /* precision */, 4 /* scale */, false},
   218  		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Interval, 12 /* type len */, 0 /* precision */, 0 /* scale */, false},
   219  		{parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Interval, 10 /* type len */, 0 /* precision */, 0 /* scale */, true},
   220  	}
   221  	for _, tt := range tests {
   222  		p.Run(tt.typ.String(), func() {
   223  			_, err := schema.NewPrimitiveNodeConverted("foo" /* name */, parquet.Repetitions.Required, tt.typ, tt.cnv, tt.typLen, tt.precision, tt.scale, -1 /* fieldID */)
   224  			if tt.shouldErr {
   225  				p.Error(err)
   226  			} else {
   227  				p.NoError(err)
   228  			}
   229  		})
   230  	}
   231  }
   232  
   233  type GroupNodeTestSuite struct {
   234  	suite.Suite
   235  }
   236  
   237  func (g *GroupNodeTestSuite) fields1() []schema.Node {
   238  	return schema.FieldList{
   239  		schema.NewInt32Node("one" /* name */, parquet.Repetitions.Required, -1 /* fieldID */),
   240  		schema.NewInt64Node("two" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */),
   241  		schema.NewFloat64Node("three" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */),
   242  	}
   243  }
   244  
   245  func (g *GroupNodeTestSuite) fields2() []schema.Node {
   246  	return schema.FieldList{
   247  		schema.NewInt32Node("duplicate" /* name */, parquet.Repetitions.Required, -1 /* fieldID */),
   248  		schema.NewInt64Node("unique" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */),
   249  		schema.NewFloat64Node("duplicate" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */),
   250  	}
   251  }
   252  
   253  func (g *GroupNodeTestSuite) TestAttrs() {
   254  	fields := g.fields1()
   255  
   256  	node1 := schema.MustGroup(schema.NewGroupNode("foo" /* name */, parquet.Repetitions.Repeated, fields, -1 /* fieldID */))
   257  	node2 := schema.MustGroup(schema.NewGroupNodeConverted("bar" /* name */, parquet.Repetitions.Optional, fields, schema.ConvertedTypes.List, -1 /* fieldID */))
   258  
   259  	g.Equal("foo", node1.Name())
   260  	g.Equal(schema.Group, node1.Type())
   261  	g.Equal(len(fields), node1.NumFields())
   262  	g.Equal(parquet.Repetitions.Repeated, node1.RepetitionType())
   263  	g.Equal(parquet.Repetitions.Optional, node2.RepetitionType())
   264  
   265  	g.Equal(schema.ConvertedTypes.None, node1.ConvertedType())
   266  	g.Equal(schema.ConvertedTypes.List, node2.ConvertedType())
   267  }
   268  
   269  func (g *GroupNodeTestSuite) TestEquals() {
   270  	f1 := g.fields1()
   271  	f2 := g.fields1()
   272  
   273  	group1 := schema.Must(schema.NewGroupNode("group" /* name */, parquet.Repetitions.Repeated, f1, -1 /* fieldID */))
   274  	group2 := schema.Must(schema.NewGroupNode("group" /* name */, parquet.Repetitions.Repeated, f2, -1 /* fieldID */))
   275  	group3 := schema.Must(schema.NewGroupNode("group2" /* name */, parquet.Repetitions.Repeated, f2, -1 /* fieldID */))
   276  
   277  	f2 = append(f2, schema.NewFloat32Node("four" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */))
   278  	group4 := schema.Must(schema.NewGroupNode("group" /* name */, parquet.Repetitions.Repeated, f2, -1 /* fieldID */))
   279  	group5 := schema.Must(schema.NewGroupNode("group" /* name */, parquet.Repetitions.Repeated, g.fields1(), -1 /* fieldID */))
   280  
   281  	g.True(group1.Equals(group1))
   282  	g.True(group1.Equals(group2))
   283  	g.False(group1.Equals(group3))
   284  	g.False(group1.Equals(group4))
   285  	g.False(group5.Equals(group4))
   286  }
   287  
   288  func (g *GroupNodeTestSuite) TestFieldIndex() {
   289  	fields := g.fields1()
   290  	group := schema.MustGroup(schema.NewGroupNode("group" /* name */, parquet.Repetitions.Required, fields, -1 /* fieldID */))
   291  	for idx, field := range fields {
   292  		f := group.Field(idx)
   293  		g.Same(field, f)
   294  		g.Equal(idx, group.FieldIndexByField(f))
   295  		g.Equal(idx, group.FieldIndexByName(field.Name()))
   296  	}
   297  
   298  	// Non field nodes
   299  	nonFieldAlien := schema.NewInt32Node("alien" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
   300  	nonFieldFamiliar := schema.NewInt32Node("one" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
   301  	g.Less(group.FieldIndexByField(nonFieldAlien), 0)
   302  	g.Less(group.FieldIndexByField(nonFieldFamiliar), 0)
   303  }
   304  
   305  func (g *GroupNodeTestSuite) TestFieldIndexDuplicateName() {
   306  	fields := g.fields2()
   307  	group := schema.MustGroup(schema.NewGroupNode("group" /* name */, parquet.Repetitions.Required, fields, -1 /* fieldID */))
   308  	for idx, field := range fields {
   309  		f := group.Field(idx)
   310  		g.Same(f, field)
   311  		g.Equal(idx, group.FieldIndexByField(f))
   312  	}
   313  }
   314  
   315  type SchemaConverterSuite struct {
   316  	suite.Suite
   317  
   318  	name string
   319  	node schema.Node
   320  }
   321  
   322  func (s *SchemaConverterSuite) SetupSuite() {
   323  	s.name = "parquet_schema"
   324  }
   325  
   326  func (s *SchemaConverterSuite) convert(elems []*format.SchemaElement) {
   327  	s.node = schema.Must(schema.FromParquet(elems))
   328  	s.Equal(schema.Group, s.node.Type())
   329  }
   330  
   331  func (s *SchemaConverterSuite) checkParentConsistency(groupRoot *schema.GroupNode) bool {
   332  	// each node should have the group as parent
   333  	for i := 0; i < groupRoot.NumFields(); i++ {
   334  		field := groupRoot.Field(i)
   335  		if field.Parent() != groupRoot {
   336  			return false
   337  		}
   338  		if field.Type() == schema.Group {
   339  			if !s.checkParentConsistency(field.(*schema.GroupNode)) {
   340  				return false
   341  			}
   342  		}
   343  	}
   344  	return true
   345  }
   346  
   347  func (s *SchemaConverterSuite) TestNestedExample() {
   348  	elements := make([]*format.SchemaElement, 0)
   349  	elements = append(elements,
   350  		NewGroup(s.name, format.FieldRepetitionType_REPEATED, 2 /* numChildren */, 0 /* fieldID */),
   351  		NewPrimitive("a" /* name */, format.FieldRepetitionType_REQUIRED, format.Type_INT32, 1 /* fieldID */),
   352  		NewGroup("bag" /* name */, format.FieldRepetitionType_OPTIONAL, 1 /* numChildren */, 2 /* fieldID */))
   353  	elt := NewGroup("b" /* name */, format.FieldRepetitionType_REPEATED, 1 /* numChildren */, 3 /* fieldID */)
   354  	elt.ConvertedType = format.ConvertedTypePtr(format.ConvertedType_LIST)
   355  	elements = append(elements, elt, NewPrimitive("item" /* name */, format.FieldRepetitionType_OPTIONAL, format.Type_INT64, 4 /* fieldID */))
   356  
   357  	s.convert(elements)
   358  
   359  	// construct the expected schema
   360  	fields := make([]schema.Node, 0)
   361  	fields = append(fields, schema.NewInt32Node("a" /* name */, parquet.Repetitions.Required, 1 /* fieldID */))
   362  
   363  	// 3-level list encoding
   364  	item := schema.NewInt64Node("item" /* name */, parquet.Repetitions.Optional, 4 /* fieldID */)
   365  	list := schema.MustGroup(schema.NewGroupNodeConverted("b" /* name */, parquet.Repetitions.Repeated, schema.FieldList{item}, schema.ConvertedTypes.List, 3 /* fieldID */))
   366  	bag := schema.MustGroup(schema.NewGroupNode("bag" /* name */, parquet.Repetitions.Optional, schema.FieldList{list}, 2 /* fieldID */))
   367  	fields = append(fields, bag)
   368  
   369  	sc := schema.MustGroup(schema.NewGroupNode(s.name, parquet.Repetitions.Repeated, fields, 0 /* fieldID */))
   370  	s.True(sc.Equals(s.node))
   371  	s.Nil(s.node.Parent())
   372  	s.True(s.checkParentConsistency(s.node.(*schema.GroupNode)))
   373  }
   374  
   375  func (s *SchemaConverterSuite) TestZeroColumns() {
   376  	elements := []*format.SchemaElement{NewGroup("schema" /* name */, format.FieldRepetitionType_REPEATED, 0 /* numChildren */, 0 /* fieldID */)}
   377  	s.NotPanics(func() { s.convert(elements) })
   378  }
   379  
   380  func (s *SchemaConverterSuite) TestInvalidRoot() {
   381  	// According to the Parquet spec, the first element in the list<SchemaElement>
   382  	// is a group whose children (and their descendants) contain all of the rest of
   383  	// the flattened schema elments. If the first element is not a group, it is malformed
   384  	elements := []*format.SchemaElement{NewPrimitive("not-a-group" /* name */, format.FieldRepetitionType_REQUIRED,
   385  		format.Type_INT32, 0 /* fieldID */), format.NewSchemaElement()}
   386  	s.Panics(func() { s.convert(elements) })
   387  
   388  	// While the parquet spec indicates that the root group should have REPEATED
   389  	// repetition type, some implementations may return REQUIRED or OPTIONAL
   390  	// groups as the first element. These tests check that this is okay as a
   391  	// practicality matter
   392  	elements = []*format.SchemaElement{
   393  		NewGroup("not-repeated" /* name */, format.FieldRepetitionType_REQUIRED, 1 /* numChildren */, 0 /* fieldID */),
   394  		NewPrimitive("a" /* name */, format.FieldRepetitionType_REQUIRED, format.Type_INT32, 1 /* fieldID */)}
   395  	s.NotPanics(func() { s.convert(elements) })
   396  
   397  	elements[0] = NewGroup("not-repeated" /* name */, format.FieldRepetitionType_OPTIONAL, 1 /* numChildren */, 0 /* fieldID */)
   398  	s.NotPanics(func() { s.convert(elements) })
   399  }
   400  
   401  func (s *SchemaConverterSuite) TestNotEnoughChildren() {
   402  	s.Panics(func() {
   403  		s.convert([]*format.SchemaElement{NewGroup(s.name, format.FieldRepetitionType_REPEATED, 2 /* numChildren */, 0 /* fieldID */)})
   404  	})
   405  }
   406  
   407  func TestColumnDesc(t *testing.T) {
   408  	n := schema.MustPrimitive(schema.NewPrimitiveNodeConverted("name" /* name */, parquet.Repetitions.Optional, parquet.Types.ByteArray,
   409  		schema.ConvertedTypes.UTF8, 0 /* type len */, 0 /* precision */, 0 /* scale */, -1 /* fieldID */))
   410  	descr := schema.NewColumn(n, 4, 1)
   411  
   412  	assert.Equal(t, "name", descr.Name())
   413  	assert.EqualValues(t, 4, descr.MaxDefinitionLevel())
   414  	assert.EqualValues(t, 1, descr.MaxRepetitionLevel())
   415  	assert.Equal(t, parquet.Types.ByteArray, descr.PhysicalType())
   416  	assert.Equal(t, -1, descr.TypeLength())
   417  
   418  	expectedDesc := `column descriptor = {
   419    name: name,
   420    path: ,
   421    physical_type: BYTE_ARRAY,
   422    converted_type: UTF8,
   423    logical_type: String,
   424    max_definition_level: 4,
   425    max_repetition_level: 1,
   426  }`
   427  	assert.Equal(t, expectedDesc, descr.String())
   428  
   429  	n = schema.MustPrimitive(schema.NewPrimitiveNodeConverted("name" /* name */, parquet.Repetitions.Optional, parquet.Types.FixedLenByteArray, schema.ConvertedTypes.Decimal, 12 /* type len */, 10 /* precision */, 4 /* scale */, -1 /* fieldID */))
   430  	descr2 := schema.NewColumn(n, 4, 1)
   431  
   432  	assert.Equal(t, parquet.Types.FixedLenByteArray, descr2.PhysicalType())
   433  	assert.Equal(t, 12, descr2.TypeLength())
   434  
   435  	expectedDesc = `column descriptor = {
   436    name: name,
   437    path: ,
   438    physical_type: FIXED_LEN_BYTE_ARRAY,
   439    converted_type: DECIMAL,
   440    logical_type: Decimal(precision=10, scale=4),
   441    max_definition_level: 4,
   442    max_repetition_level: 1,
   443    length: 12,
   444    precision: 10,
   445    scale: 4,
   446  }`
   447  	assert.Equal(t, expectedDesc, descr2.String())
   448  }
   449  
   450  func TestSchemaDescriptor(t *testing.T) {
   451  	t.Run("Equals", func(t *testing.T) {
   452  		inta := schema.NewInt32Node("a" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
   453  		intb := schema.NewInt64Node("b" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */)
   454  		intb2 := schema.NewInt64Node("b2" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */)
   455  		intc := schema.NewByteArrayNode("c" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
   456  
   457  		item1 := schema.NewInt64Node("item1" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
   458  		item2 := schema.NewBooleanNode("item2" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */)
   459  		item3 := schema.NewInt32Node("item3" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
   460  		list := schema.MustGroup(schema.NewGroupNodeConverted("records" /* name */, parquet.Repetitions.Repeated, schema.FieldList{item1, item2, item3}, schema.ConvertedTypes.List, -1 /* fieldID */))
   461  
   462  		bag := schema.MustGroup(schema.NewGroupNode("bag" /* name */, parquet.Repetitions.Optional, schema.FieldList{list}, -1 /* fieldID */))
   463  		bag2 := schema.MustGroup(schema.NewGroupNode("bag" /* name */, parquet.Repetitions.Required, schema.FieldList{list}, -1 /* fieldID */))
   464  
   465  		descr1 := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, schema.FieldList{inta, intb, intc, bag}, -1 /* fieldID */)))
   466  		assert.True(t, descr1.Equals(descr1))
   467  
   468  		descr2 := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, schema.FieldList{inta, intb, intc, bag2}, -1 /* fieldID */)))
   469  		assert.False(t, descr1.Equals(descr2))
   470  
   471  		descr3 := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, schema.FieldList{inta, intb2, intc, bag}, -1 /* fieldID */)))
   472  		assert.False(t, descr1.Equals(descr3))
   473  
   474  		descr4 := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("SCHEMA" /* name */, parquet.Repetitions.Repeated, schema.FieldList{inta, intb, intc, bag}, -1 /* fieldID */)))
   475  		assert.True(t, descr1.Equals(descr4))
   476  
   477  		descr5 := schema.NewSchema(schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, schema.FieldList{inta, intb, intc, bag, intb2}, -1 /* fieldID */)))
   478  		assert.False(t, descr1.Equals(descr5))
   479  
   480  		col1 := schema.NewColumn(inta, 5 /* maxDefLvl */, 1 /* maxRepLvl */)
   481  		col2 := schema.NewColumn(inta, 6 /* maxDefLvl */, 1 /* maxRepLvl */)
   482  		col3 := schema.NewColumn(inta, 5 /* maxDefLvl */, 2 /* maxRepLvl */)
   483  
   484  		assert.True(t, col1.Equals(col1))
   485  		assert.False(t, col1.Equals(col2))
   486  		assert.False(t, col2.Equals(col3))
   487  	})
   488  
   489  	t.Run("BuildTree", func(t *testing.T) {
   490  		inta := schema.NewInt32Node("a" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
   491  		fields := schema.FieldList{inta}
   492  		fields = append(fields,
   493  			schema.NewInt64Node("b" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */),
   494  			schema.NewByteArrayNode("c" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */))
   495  
   496  		item1 := schema.NewInt64Node("item1" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
   497  		item2 := schema.NewBooleanNode("item2" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */)
   498  		item3 := schema.NewInt32Node("item3" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
   499  		list := schema.MustGroup(schema.NewGroupNodeConverted("records" /* name */, parquet.Repetitions.Repeated, schema.FieldList{item1, item2, item3}, schema.ConvertedTypes.List, -1 /* fieldID */))
   500  		bag := schema.MustGroup(schema.NewGroupNode("bag" /* name */, parquet.Repetitions.Optional, schema.FieldList{list}, -1 /* fieldID */))
   501  		fields = append(fields, bag)
   502  
   503  		sc := schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, fields, -1 /* fieldID */))
   504  		descr := schema.NewSchema(sc)
   505  
   506  		const nleaves = 6
   507  		assert.Equal(t, nleaves, descr.NumColumns())
   508  
   509  		//                             mdef mrep
   510  		// required int32 a            0    0
   511  		// optional int64 b            1    0
   512  		// repeated byte_array c       1    1
   513  		// optional group bag          1    0
   514  		//   repeated group records    2    1
   515  		//     required int64 item1    2    1
   516  		//     optional boolean item2  3    1
   517  		//     repeated int32 item3    3    2
   518  		var (
   519  			exMaxDefLevels = [...]int16{0, 1, 1, 2, 3, 3}
   520  			exMaxRepLevels = [...]int16{0, 0, 1, 1, 1, 2}
   521  		)
   522  
   523  		for i := 0; i < nleaves; i++ {
   524  			col := descr.Column(i)
   525  			assert.Equal(t, exMaxDefLevels[i], col.MaxDefinitionLevel())
   526  			assert.Equal(t, exMaxRepLevels[i], col.MaxRepetitionLevel())
   527  		}
   528  
   529  		assert.Equal(t, "a", descr.Column(0).Path())
   530  		assert.Equal(t, "b", descr.Column(1).Path())
   531  		assert.Equal(t, "c", descr.Column(2).Path())
   532  		assert.Equal(t, "bag.records.item1", descr.Column(3).Path())
   533  		assert.Equal(t, "bag.records.item2", descr.Column(4).Path())
   534  		assert.Equal(t, "bag.records.item3", descr.Column(5).Path())
   535  
   536  		for i := 0; i < nleaves; i++ {
   537  			col := descr.Column(i)
   538  			assert.Equal(t, i, descr.ColumnIndexByNode(col.SchemaNode()))
   539  		}
   540  
   541  		nonColumnAlien := schema.NewInt32Node("alien" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
   542  		nonColumnFamiliar := schema.NewInt32Node("a" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
   543  		assert.Less(t, descr.ColumnIndexByNode(nonColumnAlien), 0)
   544  		assert.Less(t, descr.ColumnIndexByNode(nonColumnFamiliar), 0)
   545  
   546  		assert.Same(t, inta, descr.ColumnRoot(0))
   547  		assert.Same(t, bag, descr.ColumnRoot(3))
   548  		assert.Same(t, bag, descr.ColumnRoot(4))
   549  		assert.Same(t, bag, descr.ColumnRoot(5))
   550  
   551  		assert.Same(t, sc, descr.Root())
   552  	})
   553  
   554  	t.Run("HasRepeatedFields", func(t *testing.T) {
   555  		inta := schema.NewInt32Node("a" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
   556  		fields := schema.FieldList{inta}
   557  		fields = append(fields,
   558  			schema.NewInt64Node("b" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */),
   559  			schema.NewByteArrayNode("c" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */))
   560  
   561  		sc := schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, fields, -1 /* fieldID */))
   562  		descr := schema.NewSchema(sc)
   563  		assert.True(t, descr.HasRepeatedFields())
   564  
   565  		item1 := schema.NewInt64Node("item1" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
   566  		item2 := schema.NewBooleanNode("item2" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */)
   567  		item3 := schema.NewInt32Node("item3" /* name */, parquet.Repetitions.Repeated, -1 /* fieldID */)
   568  		list := schema.MustGroup(schema.NewGroupNodeConverted("records" /* name */, parquet.Repetitions.Repeated, schema.FieldList{item1, item2, item3}, schema.ConvertedTypes.List, -1 /* fieldID */))
   569  		bag := schema.MustGroup(schema.NewGroupNode("bag" /* name */, parquet.Repetitions.Optional, schema.FieldList{list}, -1 /* fieldID */))
   570  		fields = append(fields, bag)
   571  
   572  		sc = schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, fields, -1 /* fieldID */))
   573  		descr = schema.NewSchema(sc)
   574  		assert.True(t, descr.HasRepeatedFields())
   575  
   576  		itemKey := schema.NewInt64Node("key" /* name */, parquet.Repetitions.Required, -1 /* fieldID */)
   577  		itemValue := schema.NewBooleanNode("value" /* name */, parquet.Repetitions.Optional, -1 /* fieldID */)
   578  		sc = schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, append(fields, schema.FieldList{
   579  			schema.MustGroup(schema.NewGroupNode("my_map" /* name */, parquet.Repetitions.Optional, schema.FieldList{
   580  				schema.MustGroup(schema.NewGroupNodeConverted("map" /* name */, parquet.Repetitions.Repeated, schema.FieldList{itemKey, itemValue}, schema.ConvertedTypes.Map, -1 /* fieldID */)),
   581  			}, -1 /* fieldID */)),
   582  		}...), -1 /* fieldID */))
   583  		descr = schema.NewSchema(sc)
   584  		assert.True(t, descr.HasRepeatedFields())
   585  	})
   586  }
   587  
   588  func ExamplePrintSchema() {
   589  	fields := schema.FieldList{schema.NewInt32Node("a" /* name */, parquet.Repetitions.Required, 1 /* fieldID */)}
   590  	item1 := schema.NewInt64Node("item1" /* name */, parquet.Repetitions.Optional, 4 /* fieldID */)
   591  	item2 := schema.NewBooleanNode("item2" /* name */, parquet.Repetitions.Required, 5 /* fieldID */)
   592  	list := schema.MustGroup(schema.NewGroupNodeConverted("b" /* name */, parquet.Repetitions.Repeated, schema.FieldList{item1, item2}, schema.ConvertedTypes.List, 3 /* fieldID */))
   593  	bag := schema.MustGroup(schema.NewGroupNode("bag" /* name */, parquet.Repetitions.Optional, schema.FieldList{list}, 2 /* fieldID */))
   594  	fields = append(fields, bag)
   595  
   596  	fields = append(fields,
   597  		schema.MustPrimitive(schema.NewPrimitiveNodeConverted("c" /* name */, parquet.Repetitions.Required, parquet.Types.Int32, schema.ConvertedTypes.Decimal, 0 /* type len */, 3 /* precision */, 2 /* scale */, 6 /* fieldID */)),
   598  		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("d" /* name */, parquet.Repetitions.Required, schema.NewDecimalLogicalType(10 /* precision */, 5 /* scale */), parquet.Types.Int64, -1 /* type len */, 7 /* fieldID */)))
   599  
   600  	sc := schema.MustGroup(schema.NewGroupNode("schema" /* name */, parquet.Repetitions.Repeated, fields, 0 /* fieldID */))
   601  	schema.PrintSchema(sc, os.Stdout, 2)
   602  
   603  	// Output:
   604  	// repeated group field_id=0 schema {
   605  	//   required int32 field_id=1 a;
   606  	//   optional group field_id=2 bag {
   607  	//     repeated group field_id=3 b (List) {
   608  	//       optional int64 field_id=4 item1;
   609  	//       required boolean field_id=5 item2;
   610  	//     }
   611  	//   }
   612  	//   required int32 field_id=6 c (Decimal(precision=3, scale=2));
   613  	//   required int64 field_id=7 d (Decimal(precision=10, scale=5));
   614  	// }
   615  }
   616  
   617  func TestPanicSchemaNodeCreation(t *testing.T) {
   618  	assert.Panics(t, func() {
   619  		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("map" /* name */, parquet.Repetitions.Required, schema.MapLogicalType{}, parquet.Types.Int64, -1 /* type len */, -1 /* fieldID */))
   620  	}, "nested logical type on non-group node")
   621  
   622  	assert.Panics(t, func() {
   623  		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("string" /* name */, parquet.Repetitions.Required, schema.StringLogicalType{}, parquet.Types.Boolean, -1 /* type len */, -1 /* fieldID */))
   624  	}, "incompatible primitive type")
   625  
   626  	assert.Panics(t, func() {
   627  		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("interval" /* name */, parquet.Repetitions.Required, schema.IntervalLogicalType{}, parquet.Types.FixedLenByteArray, 11 /* type len */, -1 /* fieldID */))
   628  	}, "incompatible primitive length")
   629  
   630  	assert.Panics(t, func() {
   631  		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("decimal" /* name */, parquet.Repetitions.Required, schema.NewDecimalLogicalType(16, 6), parquet.Types.Int32, -1 /* type len */, -1 /* fieldID */))
   632  	}, "primitive too small for given precision")
   633  
   634  	assert.Panics(t, func() {
   635  		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("uuid" /* name */, parquet.Repetitions.Required, schema.UUIDLogicalType{}, parquet.Types.FixedLenByteArray, 64 /* type len */, -1 /* fieldID */))
   636  	}, "incompatible primitive length")
   637  
   638  	assert.Panics(t, func() {
   639  		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("negative_len" /* name */, parquet.Repetitions.Required, schema.NoLogicalType{}, parquet.Types.FixedLenByteArray, -16 /* type len */, -1 /* fieldID */))
   640  	}, "non-positive length for fixed length binary")
   641  
   642  	assert.Panics(t, func() {
   643  		schema.MustPrimitive(schema.NewPrimitiveNodeLogical("zero_len" /* name */, parquet.Repetitions.Required, schema.NoLogicalType{}, parquet.Types.FixedLenByteArray, 0 /* type len */, -1 /* fieldID */))
   644  	}, "non-positive length for fixed length binary")
   645  
   646  	assert.Panics(t, func() {
   647  		schema.MustGroup(schema.NewGroupNodeLogical("list" /* name */, parquet.Repetitions.Repeated, schema.FieldList{}, schema.JSONLogicalType{}, -1 /* fieldID */))
   648  	}, "non-nested logical type on group node")
   649  }
   650  
   651  func TestNullLogicalConvertsToNone(t *testing.T) {
   652  	var (
   653  		empty schema.LogicalType
   654  		n     schema.Node
   655  	)
   656  	assert.NotPanics(t, func() {
   657  		n = schema.MustPrimitive(schema.NewPrimitiveNodeLogical("value" /* name */, parquet.Repetitions.Required, empty, parquet.Types.Double, -1 /* type len */, -1 /* fieldID */))
   658  	})
   659  	assert.True(t, n.LogicalType().IsNone())
   660  	assert.Equal(t, schema.ConvertedTypes.None, n.ConvertedType())
   661  	assert.NotPanics(t, func() {
   662  		n = schema.MustGroup(schema.NewGroupNodeLogical("items" /* name */, parquet.Repetitions.Repeated, schema.FieldList{}, empty, -1 /* fieldID */))
   663  	})
   664  	assert.True(t, n.LogicalType().IsNone())
   665  	assert.Equal(t, schema.ConvertedTypes.None, n.ConvertedType())
   666  }