github.com/apache/arrow/go/v14@v14.0.1/parquet/schema/column.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package schema
    18  
    19  import (
    20  	"fmt"
    21  	"strings"
    22  
    23  	"github.com/apache/arrow/go/v14/parquet"
    24  	format "github.com/apache/arrow/go/v14/parquet/internal/gen-go/parquet"
    25  )
    26  
    27  // Column encapsulates the information necessary to interpret primitive
    28  // column data in the context of a particular schema. We have to examine
    29  // the node structure of a column's path to the root in the schema tree
    30  // to be able to reassemble the nested structure from the repetition and
    31  // definition levels.
    32  type Column struct {
    33  	pnode *PrimitiveNode
    34  	// the maximum definition level in this column
    35  	// if this is > 0 then either this column or a parent column must be optional.
    36  	maxDefLvl int16
    37  	// the maximum repetition level in this column
    38  	// if this is > 0, then either this column or a parent column must be repeated.
    39  	// when the repetition level in the column data equals this value, it indicates
    40  	// additional elements in the innermost list.
    41  	maxRepLvl int16
    42  }
    43  
    44  // NewColumn returns a new column object for the given node with the provided
    45  // maximum definition and repetition levels.
    46  func NewColumn(n *PrimitiveNode, maxDefinitionLvl, maxRepetitionLvl int16) *Column {
    47  	return &Column{n, maxDefinitionLvl, maxRepetitionLvl}
    48  }
    49  
    50  // Name is the column's name
    51  func (c *Column) Name() string { return c.pnode.Name() }
    52  
    53  // ColumnPath returns the full path to this column from the root of the schema
    54  func (c *Column) ColumnPath() parquet.ColumnPath { return c.pnode.columnPath() }
    55  
    56  // Path is equivalent to ColumnPath().String() returning the dot-string version of the path
    57  func (c *Column) Path() string { return c.pnode.Path() }
    58  
    59  // TypeLength is -1 if not a FixedLenByteArray, otherwise it is the length of elements in the column
    60  func (c *Column) TypeLength() int { return c.pnode.TypeLength() }
    61  
    62  func (c *Column) MaxDefinitionLevel() int16        { return c.maxDefLvl }
    63  func (c *Column) MaxRepetitionLevel() int16        { return c.maxRepLvl }
    64  func (c *Column) PhysicalType() parquet.Type       { return c.pnode.PhysicalType() }
    65  func (c *Column) ConvertedType() ConvertedType     { return c.pnode.convertedType }
    66  func (c *Column) LogicalType() LogicalType         { return c.pnode.logicalType }
    67  func (c *Column) ColumnOrder() parquet.ColumnOrder { return c.pnode.ColumnOrder }
    68  func (c *Column) String() string {
    69  	var bld strings.Builder
    70  	bld.WriteString("column descriptor = {\n")
    71  	fmt.Fprintf(&bld, "  name: %s,\n", c.Name())
    72  	fmt.Fprintf(&bld, "  path: %s,\n", c.Path())
    73  	fmt.Fprintf(&bld, "  physical_type: %s,\n", c.PhysicalType())
    74  	fmt.Fprintf(&bld, "  converted_type: %s,\n", c.ConvertedType())
    75  	fmt.Fprintf(&bld, "  logical_type: %s,\n", c.LogicalType())
    76  	fmt.Fprintf(&bld, "  max_definition_level: %d,\n", c.MaxDefinitionLevel())
    77  	fmt.Fprintf(&bld, "  max_repetition_level: %d,\n", c.MaxRepetitionLevel())
    78  	if c.PhysicalType() == parquet.Types.FixedLenByteArray {
    79  		fmt.Fprintf(&bld, "  length: %d,\n", c.TypeLength())
    80  	}
    81  	if c.ConvertedType() == ConvertedTypes.Decimal {
    82  		fmt.Fprintf(&bld, "  precision: %d,\n  scale: %d,\n", c.pnode.decimalMetaData.Precision, c.pnode.decimalMetaData.Scale)
    83  	}
    84  	bld.WriteString("}")
    85  	return bld.String()
    86  }
    87  
    88  // Equals will return true if the rhs Column has the same Max Repetition and Definition levels
    89  // along with having the same node definition.
    90  func (c *Column) Equals(rhs *Column) bool {
    91  	return c.pnode.Equals(rhs.pnode) &&
    92  		c.MaxRepetitionLevel() == rhs.MaxRepetitionLevel() &&
    93  		c.MaxDefinitionLevel() == rhs.MaxDefinitionLevel()
    94  }
    95  
    96  // SchemaNode returns the underlying Node in the schema tree for this column.
    97  func (c *Column) SchemaNode() Node {
    98  	return c.pnode
    99  }
   100  
   101  // SortOrder returns the sort order of this column's statistics based on the
   102  // Logical and Converted types.
   103  func (c *Column) SortOrder() SortOrder {
   104  	if c.LogicalType() != nil {
   105  		return GetLogicalSortOrder(c.LogicalType(), format.Type(c.pnode.PhysicalType()))
   106  	}
   107  	return GetSortOrder(c.ConvertedType(), format.Type(c.pnode.PhysicalType()))
   108  }