github.com/apache/arrow/go/v14@v14.0.2/parquet/schema/column.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package schema 18 19 import ( 20 "fmt" 21 "strings" 22 23 "github.com/apache/arrow/go/v14/parquet" 24 format "github.com/apache/arrow/go/v14/parquet/internal/gen-go/parquet" 25 ) 26 27 // Column encapsulates the information necessary to interpret primitive 28 // column data in the context of a particular schema. We have to examine 29 // the node structure of a column's path to the root in the schema tree 30 // to be able to reassemble the nested structure from the repetition and 31 // definition levels. 32 type Column struct { 33 pnode *PrimitiveNode 34 // the maximum definition level in this column 35 // if this is > 0 then either this column or a parent column must be optional. 36 maxDefLvl int16 37 // the maximum repetition level in this column 38 // if this is > 0, then either this column or a parent column must be repeated. 39 // when the repetition level in the column data equals this value, it indicates 40 // additional elements in the innermost list. 41 maxRepLvl int16 42 } 43 44 // NewColumn returns a new column object for the given node with the provided 45 // maximum definition and repetition levels. 46 func NewColumn(n *PrimitiveNode, maxDefinitionLvl, maxRepetitionLvl int16) *Column { 47 return &Column{n, maxDefinitionLvl, maxRepetitionLvl} 48 } 49 50 // Name is the column's name 51 func (c *Column) Name() string { return c.pnode.Name() } 52 53 // ColumnPath returns the full path to this column from the root of the schema 54 func (c *Column) ColumnPath() parquet.ColumnPath { return c.pnode.columnPath() } 55 56 // Path is equivalent to ColumnPath().String() returning the dot-string version of the path 57 func (c *Column) Path() string { return c.pnode.Path() } 58 59 // TypeLength is -1 if not a FixedLenByteArray, otherwise it is the length of elements in the column 60 func (c *Column) TypeLength() int { return c.pnode.TypeLength() } 61 62 func (c *Column) MaxDefinitionLevel() int16 { return c.maxDefLvl } 63 func (c *Column) MaxRepetitionLevel() int16 { return c.maxRepLvl } 64 func (c *Column) PhysicalType() parquet.Type { return c.pnode.PhysicalType() } 65 func (c *Column) ConvertedType() ConvertedType { return c.pnode.convertedType } 66 func (c *Column) LogicalType() LogicalType { return c.pnode.logicalType } 67 func (c *Column) ColumnOrder() parquet.ColumnOrder { return c.pnode.ColumnOrder } 68 func (c *Column) String() string { 69 var bld strings.Builder 70 bld.WriteString("column descriptor = {\n") 71 fmt.Fprintf(&bld, " name: %s,\n", c.Name()) 72 fmt.Fprintf(&bld, " path: %s,\n", c.Path()) 73 fmt.Fprintf(&bld, " physical_type: %s,\n", c.PhysicalType()) 74 fmt.Fprintf(&bld, " converted_type: %s,\n", c.ConvertedType()) 75 fmt.Fprintf(&bld, " logical_type: %s,\n", c.LogicalType()) 76 fmt.Fprintf(&bld, " max_definition_level: %d,\n", c.MaxDefinitionLevel()) 77 fmt.Fprintf(&bld, " max_repetition_level: %d,\n", c.MaxRepetitionLevel()) 78 if c.PhysicalType() == parquet.Types.FixedLenByteArray { 79 fmt.Fprintf(&bld, " length: %d,\n", c.TypeLength()) 80 } 81 if c.ConvertedType() == ConvertedTypes.Decimal { 82 fmt.Fprintf(&bld, " precision: %d,\n scale: %d,\n", c.pnode.decimalMetaData.Precision, c.pnode.decimalMetaData.Scale) 83 } 84 bld.WriteString("}") 85 return bld.String() 86 } 87 88 // Equals will return true if the rhs Column has the same Max Repetition and Definition levels 89 // along with having the same node definition. 90 func (c *Column) Equals(rhs *Column) bool { 91 return c.pnode.Equals(rhs.pnode) && 92 c.MaxRepetitionLevel() == rhs.MaxRepetitionLevel() && 93 c.MaxDefinitionLevel() == rhs.MaxDefinitionLevel() 94 } 95 96 // SchemaNode returns the underlying Node in the schema tree for this column. 97 func (c *Column) SchemaNode() Node { 98 return c.pnode 99 } 100 101 // SortOrder returns the sort order of this column's statistics based on the 102 // Logical and Converted types. 103 func (c *Column) SortOrder() SortOrder { 104 if c.LogicalType() != nil { 105 return GetLogicalSortOrder(c.LogicalType(), format.Type(c.pnode.PhysicalType())) 106 } 107 return GetSortOrder(c.ConvertedType(), format.Type(c.pnode.PhysicalType())) 108 }