github.com/apache/arrow/go/v15@v15.0.1/parquet/schema/logical_types.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package schema
    18  
    19  import (
    20  	"fmt"
    21  	"math"
    22  
    23  	"github.com/apache/arrow/go/v15/internal/json"
    24  	"github.com/apache/arrow/go/v15/parquet"
    25  	"github.com/apache/arrow/go/v15/parquet/internal/debug"
    26  	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
    27  )
    28  
    29  // DecimalMetadata is a struct for managing scale and precision information between
    30  // converted and logical types.
    31  type DecimalMetadata struct {
    32  	IsSet     bool
    33  	Scale     int32
    34  	Precision int32
    35  }
    36  
    37  func getLogicalType(l *format.LogicalType) LogicalType {
    38  	switch {
    39  	case l.IsSetSTRING():
    40  		return StringLogicalType{}
    41  	case l.IsSetMAP():
    42  		return MapLogicalType{}
    43  	case l.IsSetLIST():
    44  		return ListLogicalType{}
    45  	case l.IsSetENUM():
    46  		return EnumLogicalType{}
    47  	case l.IsSetDECIMAL():
    48  		return &DecimalLogicalType{typ: l.DECIMAL}
    49  	case l.IsSetDATE():
    50  		return DateLogicalType{}
    51  	case l.IsSetTIME():
    52  		if timeUnitFromThrift(l.TIME.Unit) == TimeUnitUnknown {
    53  			panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Time logical type")
    54  		}
    55  		return &TimeLogicalType{typ: l.TIME}
    56  	case l.IsSetTIMESTAMP():
    57  		if timeUnitFromThrift(l.TIMESTAMP.Unit) == TimeUnitUnknown {
    58  			panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Timestamp logical type")
    59  		}
    60  		return &TimestampLogicalType{typ: l.TIMESTAMP}
    61  	case l.IsSetINTEGER():
    62  		return &IntLogicalType{typ: l.INTEGER}
    63  	case l.IsSetUNKNOWN():
    64  		return NullLogicalType{}
    65  	case l.IsSetJSON():
    66  		return JSONLogicalType{}
    67  	case l.IsSetBSON():
    68  		return BSONLogicalType{}
    69  	case l.IsSetUUID():
    70  		return UUIDLogicalType{}
    71  	case l.IsSetFLOAT16():
    72  		return Float16LogicalType{}
    73  	case l == nil:
    74  		return NoLogicalType{}
    75  	default:
    76  		panic("invalid logical type")
    77  	}
    78  }
    79  
    80  // TimeUnitType is an enum for denoting whether a time based logical type
    81  // is using milliseconds, microseconds or nanoseconds.
    82  type TimeUnitType int
    83  
    84  // Constants for the TimeUnitType
    85  const (
    86  	TimeUnitMillis TimeUnitType = iota
    87  	TimeUnitMicros
    88  	TimeUnitNanos
    89  	TimeUnitUnknown
    90  )
    91  
    92  // LogicalType is the descriptor that defines the usage of a physical primitive
    93  // type in the schema, such as an Interval, Date, etc.
    94  type LogicalType interface {
    95  	// Returns true if a nested type like List or Map
    96  	IsNested() bool
    97  	// Returns true if this type can be serialized, ie: not Unknown/NoType/Interval
    98  	IsSerialized() bool
    99  	// Returns true if not NoLogicalType
   100  	IsValid() bool
   101  	// Returns true if it is NoType
   102  	IsNone() bool
   103  	// returns a string representation of the Logical Type
   104  	String() string
   105  	toThrift() *format.LogicalType
   106  	// Return the equivalent ConvertedType for legacy Parquet systems
   107  	ToConvertedType() (ConvertedType, DecimalMetadata)
   108  	// Returns true if the specified ConvertedType is compatible with this
   109  	// logical type
   110  	IsCompatible(ConvertedType, DecimalMetadata) bool
   111  	// Returns true if this logical type can be used with the provided physical type
   112  	IsApplicable(t parquet.Type, tlen int32) bool
   113  	// Returns true if the logical types are the same
   114  	Equals(LogicalType) bool
   115  	// Returns the default stat sort order for this logical type
   116  	SortOrder() SortOrder
   117  }
   118  
   119  // TemporalLogicalType is a smaller interface for Time based logical types
   120  // like Time / Timestamp
   121  type TemporalLogicalType interface {
   122  	LogicalType
   123  	IsAdjustedToUTC() bool
   124  	TimeUnit() TimeUnitType
   125  }
   126  
   127  // SortOrder mirrors the parquet.thrift sort order type
   128  type SortOrder int8
   129  
   130  // Constants for the Stat sort order definitions
   131  const (
   132  	SortSIGNED SortOrder = iota
   133  	SortUNSIGNED
   134  	SortUNKNOWN
   135  )
   136  
   137  // DefaultSortOrder returns the default stat sort order for the given physical type
   138  func DefaultSortOrder(primitive format.Type) SortOrder {
   139  	switch primitive {
   140  	case format.Type_BOOLEAN, format.Type_INT32, format.Type_INT64, format.Type_FLOAT, format.Type_DOUBLE:
   141  		return SortSIGNED
   142  	case format.Type_BYTE_ARRAY, format.Type_FIXED_LEN_BYTE_ARRAY:
   143  		return SortUNSIGNED
   144  	case format.Type_INT96:
   145  		fallthrough
   146  	default:
   147  		return SortUNKNOWN
   148  	}
   149  }
   150  
   151  // GetLogicalSortOrder returns the default sort order for this logical type
   152  // or falls back to the default sort order for the physical type if not valid
   153  func GetLogicalSortOrder(logical LogicalType, primitive format.Type) SortOrder {
   154  	switch {
   155  	case logical == nil || !logical.IsValid():
   156  		return SortUNKNOWN
   157  	case logical.Equals(NoLogicalType{}):
   158  		return DefaultSortOrder(primitive)
   159  	default:
   160  		return logical.SortOrder()
   161  	}
   162  }
   163  
   164  type baseLogicalType struct{}
   165  
   166  func (baseLogicalType) IsSerialized() bool {
   167  	return true
   168  }
   169  
   170  func (baseLogicalType) IsValid() bool {
   171  	return true
   172  }
   173  
   174  func (baseLogicalType) IsNested() bool {
   175  	return false
   176  }
   177  
   178  func (baseLogicalType) IsNone() bool { return false }
   179  
   180  // StringLogicalType is a UTF8 string, only usable with ByteArray and FixedLenByteArray
   181  type StringLogicalType struct{ baseLogicalType }
   182  
   183  func (StringLogicalType) SortOrder() SortOrder {
   184  	return SortUNSIGNED
   185  }
   186  
   187  func (StringLogicalType) MarshalJSON() ([]byte, error) {
   188  	return json.Marshal(map[string]string{"Type": StringLogicalType{}.String()})
   189  }
   190  
   191  func (StringLogicalType) String() string {
   192  	return "String"
   193  }
   194  
   195  func (StringLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   196  	return ConvertedTypes.UTF8, DecimalMetadata{}
   197  }
   198  
   199  func (StringLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
   200  	return t == ConvertedTypes.UTF8 && !dec.IsSet
   201  }
   202  
   203  func (StringLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
   204  	return t == parquet.Types.ByteArray
   205  }
   206  
   207  func (StringLogicalType) toThrift() *format.LogicalType {
   208  	return &format.LogicalType{STRING: format.NewStringType()}
   209  }
   210  
   211  func (StringLogicalType) Equals(rhs LogicalType) bool {
   212  	_, ok := rhs.(StringLogicalType)
   213  	return ok
   214  }
   215  
   216  // MapLogicalType represents a mapped type
   217  type MapLogicalType struct{ baseLogicalType }
   218  
   219  func (MapLogicalType) SortOrder() SortOrder {
   220  	return SortUNKNOWN
   221  }
   222  
   223  func (MapLogicalType) MarshalJSON() ([]byte, error) {
   224  	return json.Marshal(map[string]string{"Type": MapLogicalType{}.String()})
   225  }
   226  
   227  func (MapLogicalType) String() string {
   228  	return "Map"
   229  }
   230  
   231  func (MapLogicalType) IsNested() bool {
   232  	return true
   233  }
   234  
   235  func (MapLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   236  	return ConvertedTypes.Map, DecimalMetadata{}
   237  }
   238  
   239  func (MapLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
   240  	return (t == ConvertedTypes.Map || t == ConvertedTypes.MapKeyValue) && !dec.IsSet
   241  }
   242  
   243  func (MapLogicalType) IsApplicable(parquet.Type, int32) bool {
   244  	return false
   245  }
   246  
   247  func (MapLogicalType) toThrift() *format.LogicalType {
   248  	return &format.LogicalType{MAP: format.NewMapType()}
   249  }
   250  
   251  func (MapLogicalType) Equals(rhs LogicalType) bool {
   252  	_, ok := rhs.(MapLogicalType)
   253  	return ok
   254  }
   255  
   256  func NewListLogicalType() LogicalType {
   257  	return ListLogicalType{}
   258  }
   259  
   260  // ListLogicalType is used for columns which are themselves nested lists
   261  type ListLogicalType struct{ baseLogicalType }
   262  
   263  func (ListLogicalType) SortOrder() SortOrder {
   264  	return SortUNKNOWN
   265  }
   266  
   267  func (ListLogicalType) MarshalJSON() ([]byte, error) {
   268  	return json.Marshal(map[string]string{"Type": ListLogicalType{}.String()})
   269  }
   270  
   271  func (ListLogicalType) String() string {
   272  	return "List"
   273  }
   274  
   275  func (ListLogicalType) IsNested() bool {
   276  	return true
   277  }
   278  
   279  func (ListLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   280  	return ConvertedTypes.List, DecimalMetadata{}
   281  }
   282  
   283  func (ListLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
   284  	return t == ConvertedTypes.List && !dec.IsSet
   285  }
   286  
   287  func (ListLogicalType) IsApplicable(parquet.Type, int32) bool {
   288  	return false
   289  }
   290  
   291  func (ListLogicalType) toThrift() *format.LogicalType {
   292  	return &format.LogicalType{LIST: format.NewListType()}
   293  }
   294  
   295  func (ListLogicalType) Equals(rhs LogicalType) bool {
   296  	_, ok := rhs.(ListLogicalType)
   297  	return ok
   298  }
   299  
   300  // EnumLogicalType is for representing an enum, which should be a byte array type
   301  type EnumLogicalType struct{ baseLogicalType }
   302  
   303  func (EnumLogicalType) SortOrder() SortOrder {
   304  	return SortUNSIGNED
   305  }
   306  
   307  func (EnumLogicalType) MarshalJSON() ([]byte, error) {
   308  	return json.Marshal(map[string]string{"Type": EnumLogicalType{}.String()})
   309  }
   310  
   311  func (EnumLogicalType) String() string {
   312  	return "Enum"
   313  }
   314  
   315  func (EnumLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   316  	return ConvertedTypes.Enum, DecimalMetadata{}
   317  }
   318  
   319  func (EnumLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
   320  	return t == ConvertedTypes.Enum && !dec.IsSet
   321  }
   322  
   323  func (EnumLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
   324  	return t == parquet.Types.ByteArray
   325  }
   326  
   327  func (EnumLogicalType) toThrift() *format.LogicalType {
   328  	return &format.LogicalType{ENUM: format.NewEnumType()}
   329  }
   330  
   331  func (EnumLogicalType) Equals(rhs LogicalType) bool {
   332  	_, ok := rhs.(EnumLogicalType)
   333  	return ok
   334  }
   335  
   336  // NewDecimalLogicalType returns a Decimal logical type with the given
   337  // precision and scale.
   338  //
   339  // Panics if precision < 1 or scale is not in the range (0, precision)
   340  func NewDecimalLogicalType(precision int32, scale int32) LogicalType {
   341  	if precision < 1 {
   342  		panic("parquet: precision must be greater than or equal to 1 for decimal logical type")
   343  	}
   344  	if scale < 0 || scale > precision {
   345  		panic("parquet: scale must be a non-negative integer that does not exceed precision for decimal logical type")
   346  	}
   347  	return &DecimalLogicalType{typ: &format.DecimalType{Precision: precision, Scale: scale}}
   348  }
   349  
   350  // DecimalLogicalType is used to represent a decimal value of a given
   351  // precision and scale
   352  type DecimalLogicalType struct {
   353  	baseLogicalType
   354  	typ *format.DecimalType
   355  }
   356  
   357  func (t DecimalLogicalType) Precision() int32 {
   358  	return t.typ.Precision
   359  }
   360  
   361  func (t DecimalLogicalType) Scale() int32 {
   362  	return t.typ.Scale
   363  }
   364  
   365  func (DecimalLogicalType) SortOrder() SortOrder {
   366  	return SortSIGNED
   367  }
   368  
   369  func (t DecimalLogicalType) MarshalJSON() ([]byte, error) {
   370  	return json.Marshal(map[string]interface{}{"Type": "Decimal", "precision": t.typ.Precision, "scale": t.typ.Scale})
   371  }
   372  
   373  func (t DecimalLogicalType) String() string {
   374  	return fmt.Sprintf("Decimal(precision=%d, scale=%d)", t.typ.Precision, t.typ.Scale)
   375  }
   376  
   377  func (t DecimalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   378  	return ConvertedTypes.Decimal, DecimalMetadata{IsSet: true, Scale: t.typ.GetScale(), Precision: t.typ.GetPrecision()}
   379  }
   380  
   381  func (t DecimalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   382  	return c == ConvertedTypes.Decimal &&
   383  		dec.IsSet && dec.Scale == t.typ.Scale && dec.Precision == t.typ.Precision
   384  }
   385  
   386  func (t DecimalLogicalType) IsApplicable(typ parquet.Type, tlen int32) bool {
   387  	switch typ {
   388  	case parquet.Types.Int32:
   389  		return 1 <= t.typ.Precision && t.typ.Precision <= 9
   390  	case parquet.Types.Int64:
   391  		if t.typ.Precision < 10 {
   392  			debug.Log("int64 used for decimal logical, precision is small enough to use int32")
   393  		}
   394  		return 1 <= t.typ.Precision && t.typ.Precision <= 18
   395  	case parquet.Types.FixedLenByteArray:
   396  		return t.typ.Precision <= int32(math.Floor(math.Log10(math.Pow(2.0, (8.0*float64(tlen)-1.0)))))
   397  	case parquet.Types.ByteArray:
   398  		return true
   399  	}
   400  	return false
   401  }
   402  
   403  func (t DecimalLogicalType) toThrift() *format.LogicalType {
   404  	return &format.LogicalType{DECIMAL: t.typ}
   405  }
   406  
   407  func (t DecimalLogicalType) Equals(rhs LogicalType) bool {
   408  	other, ok := rhs.(*DecimalLogicalType)
   409  	if !ok {
   410  		return false
   411  	}
   412  	return t.typ.Precision == other.typ.Precision && t.typ.Scale == other.typ.Scale
   413  }
   414  
   415  // DateLogicalType is an int32 representing the number of days since the Unix Epoch
   416  // 1 January 1970
   417  type DateLogicalType struct{ baseLogicalType }
   418  
   419  func (DateLogicalType) SortOrder() SortOrder {
   420  	return SortSIGNED
   421  }
   422  
   423  func (DateLogicalType) MarshalJSON() ([]byte, error) {
   424  	return json.Marshal(map[string]string{"Type": DateLogicalType{}.String()})
   425  }
   426  
   427  func (DateLogicalType) String() string {
   428  	return "Date"
   429  }
   430  
   431  func (DateLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   432  	return ConvertedTypes.Date, DecimalMetadata{}
   433  }
   434  
   435  func (DateLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
   436  	return t == ConvertedTypes.Date && !dec.IsSet
   437  }
   438  
   439  func (DateLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
   440  	return t == parquet.Types.Int32
   441  }
   442  
   443  func (DateLogicalType) toThrift() *format.LogicalType {
   444  	return &format.LogicalType{DATE: format.NewDateType()}
   445  }
   446  
   447  func (DateLogicalType) Equals(rhs LogicalType) bool {
   448  	_, ok := rhs.(DateLogicalType)
   449  	return ok
   450  }
   451  
   452  func timeUnitFromThrift(unit *format.TimeUnit) TimeUnitType {
   453  	switch {
   454  	case unit == nil:
   455  		return TimeUnitUnknown
   456  	case unit.IsSetMILLIS():
   457  		return TimeUnitMillis
   458  	case unit.IsSetMICROS():
   459  		return TimeUnitMicros
   460  	case unit.IsSetNANOS():
   461  		return TimeUnitNanos
   462  	default:
   463  		return TimeUnitUnknown
   464  	}
   465  }
   466  
   467  func timeUnitToString(unit *format.TimeUnit) string {
   468  	switch {
   469  	case unit == nil:
   470  		return "unknown"
   471  	case unit.IsSetMILLIS():
   472  		return "milliseconds"
   473  	case unit.IsSetMICROS():
   474  		return "microseconds"
   475  	case unit.IsSetNANOS():
   476  		return "nanoseconds"
   477  	default:
   478  		return "unknown"
   479  	}
   480  }
   481  
   482  func timeUnitFromString(v string) TimeUnitType {
   483  	switch v {
   484  	case "millis":
   485  		return TimeUnitMillis
   486  	case "micros":
   487  		return TimeUnitMicros
   488  	case "nanos":
   489  		return TimeUnitNanos
   490  	default:
   491  		return TimeUnitUnknown
   492  	}
   493  }
   494  
   495  func createTimeUnit(unit TimeUnitType) *format.TimeUnit {
   496  	tunit := format.NewTimeUnit()
   497  	switch unit {
   498  	case TimeUnitMicros:
   499  		tunit.MICROS = format.NewMicroSeconds()
   500  	case TimeUnitMillis:
   501  		tunit.MILLIS = format.NewMilliSeconds()
   502  	case TimeUnitNanos:
   503  		tunit.NANOS = format.NewNanoSeconds()
   504  	default:
   505  		panic("parquet: time unit must be one of MILLIS, MICROS, or NANOS for Time logical type")
   506  	}
   507  	return tunit
   508  }
   509  
   510  // NewTimeLogicalType returns a time type of the given unit.
   511  func NewTimeLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
   512  	return &TimeLogicalType{typ: &format.TimeType{
   513  		IsAdjustedToUTC: isAdjustedToUTC,
   514  		Unit:            createTimeUnit(unit),
   515  	}}
   516  }
   517  
   518  // TimeLogicalType is a time type without a date and must be an
   519  // int32 for milliseconds, or an int64 for micro or nano seconds.
   520  type TimeLogicalType struct {
   521  	baseLogicalType
   522  	typ *format.TimeType
   523  }
   524  
   525  func (t TimeLogicalType) IsAdjustedToUTC() bool {
   526  	return t.typ.IsAdjustedToUTC
   527  }
   528  
   529  func (t TimeLogicalType) TimeUnit() TimeUnitType {
   530  	return timeUnitFromThrift(t.typ.Unit)
   531  }
   532  
   533  func (TimeLogicalType) SortOrder() SortOrder {
   534  	return SortSIGNED
   535  }
   536  
   537  func (t TimeLogicalType) MarshalJSON() ([]byte, error) {
   538  	return json.Marshal(map[string]interface{}{
   539  		"Type": "Time", "isAdjustedToUTC": t.typ.IsAdjustedToUTC, "timeUnit": timeUnitToString(t.typ.GetUnit())})
   540  }
   541  
   542  func (t TimeLogicalType) String() string {
   543  	return fmt.Sprintf("Time(isAdjustedToUTC=%t, timeUnit=%s)", t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit()))
   544  }
   545  
   546  func (t TimeLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   547  	unit := timeUnitFromThrift(t.typ.Unit)
   548  	if t.typ.IsAdjustedToUTC {
   549  		switch unit {
   550  		case TimeUnitMillis:
   551  			return ConvertedTypes.TimeMillis, DecimalMetadata{}
   552  		case TimeUnitMicros:
   553  			return ConvertedTypes.TimeMicros, DecimalMetadata{}
   554  		}
   555  	}
   556  	return ConvertedTypes.None, DecimalMetadata{}
   557  }
   558  
   559  func (t TimeLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   560  	if dec.IsSet {
   561  		return false
   562  	}
   563  	unit := timeUnitFromThrift(t.typ.Unit)
   564  	if t.typ.IsAdjustedToUTC {
   565  		switch unit {
   566  		case TimeUnitMillis:
   567  			return c == ConvertedTypes.TimeMillis
   568  		case TimeUnitMicros:
   569  			return c == ConvertedTypes.TimeMicros
   570  		}
   571  	}
   572  
   573  	return c == ConvertedTypes.None || c == ConvertedTypes.NA
   574  }
   575  
   576  func (t TimeLogicalType) IsApplicable(typ parquet.Type, _ int32) bool {
   577  	return (typ == parquet.Types.Int32 && t.typ.GetUnit().IsSetMILLIS()) ||
   578  		(typ == parquet.Types.Int64 &&
   579  			(t.typ.GetUnit().IsSetMICROS() || t.typ.GetUnit().IsSetNANOS()))
   580  }
   581  
   582  func (t TimeLogicalType) toThrift() *format.LogicalType {
   583  	return &format.LogicalType{TIME: t.typ}
   584  }
   585  
   586  func (t TimeLogicalType) Equals(rhs LogicalType) bool {
   587  	other, ok := rhs.(*TimeLogicalType)
   588  	if !ok {
   589  		return false
   590  	}
   591  	return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC &&
   592  		timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit)
   593  }
   594  
   595  // NewTimestampLogicalType returns a logical timestamp type with "forceConverted"
   596  // set to false
   597  func NewTimestampLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
   598  	return &TimestampLogicalType{
   599  		typ: &format.TimestampType{
   600  			IsAdjustedToUTC: isAdjustedToUTC,
   601  			Unit:            createTimeUnit(unit),
   602  		},
   603  		forceConverted: false,
   604  		fromConverted:  false,
   605  	}
   606  }
   607  
   608  // NewTimestampLogicalTypeForce returns a timestamp logical type with
   609  // "forceConverted" set to true
   610  func NewTimestampLogicalTypeForce(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
   611  	return &TimestampLogicalType{
   612  		typ: &format.TimestampType{
   613  			IsAdjustedToUTC: isAdjustedToUTC,
   614  			Unit:            createTimeUnit(unit),
   615  		},
   616  		forceConverted: true,
   617  		fromConverted:  false,
   618  	}
   619  }
   620  
   621  // TimestampOpt options used with New Timestamp Logical Type
   622  type TimestampOpt func(*TimestampLogicalType)
   623  
   624  // WithTSIsAdjustedToUTC sets the IsAdjustedToUTC field of the timestamp type.
   625  func WithTSIsAdjustedToUTC() TimestampOpt {
   626  	return func(t *TimestampLogicalType) {
   627  		t.typ.IsAdjustedToUTC = true
   628  	}
   629  }
   630  
   631  // WithTSTimeUnitType sets the time unit for the timestamp type
   632  func WithTSTimeUnitType(unit TimeUnitType) TimestampOpt {
   633  	return func(t *TimestampLogicalType) {
   634  		t.typ.Unit = createTimeUnit(unit)
   635  	}
   636  }
   637  
   638  // WithTSForceConverted enable force converted mode
   639  func WithTSForceConverted() TimestampOpt {
   640  	return func(t *TimestampLogicalType) {
   641  		t.forceConverted = true
   642  	}
   643  }
   644  
   645  // WithTSFromConverted enable the timestamp logical type to be
   646  // constructed from a converted type.
   647  func WithTSFromConverted() TimestampOpt {
   648  	return func(t *TimestampLogicalType) {
   649  		t.fromConverted = true
   650  	}
   651  }
   652  
   653  // NewTimestampLogicalTypeWithOpts creates a new TimestampLogicalType with the provided options.
   654  //
   655  // TimestampType Unit defaults to milliseconds (TimeUnitMillis)
   656  func NewTimestampLogicalTypeWithOpts(opts ...TimestampOpt) LogicalType {
   657  	ts := &TimestampLogicalType{
   658  		typ: &format.TimestampType{
   659  			Unit: createTimeUnit(TimeUnitMillis), // default to milliseconds
   660  		},
   661  	}
   662  
   663  	for _, o := range opts {
   664  		o(ts)
   665  	}
   666  
   667  	return ts
   668  }
   669  
   670  // TimestampLogicalType represents an int64 number that can be decoded
   671  // into a year, month, day, hour, minute, second, and subsecond
   672  type TimestampLogicalType struct {
   673  	baseLogicalType
   674  	typ *format.TimestampType
   675  	// forceConverted denotes whether or not the resulting serialized
   676  	// type when writing to parquet will be written as the legacy
   677  	// ConvertedType TIMESTAMP_MICROS/TIMESTAMP_MILLIS (true)
   678  	// or if it will write the proper current Logical Types (false, default)
   679  	forceConverted bool
   680  	// fromConverted denotes if the timestamp type was created by
   681  	// translating a legacy converted type of TIMESTAMP_MILLIS or
   682  	// TIMESTAMP_MICROS rather than by using the current logical
   683  	// types. Default is false.
   684  	fromConverted bool
   685  }
   686  
   687  func (t TimestampLogicalType) IsFromConvertedType() bool {
   688  	return t.fromConverted
   689  }
   690  
   691  func (t TimestampLogicalType) IsAdjustedToUTC() bool {
   692  	return t.typ.IsAdjustedToUTC
   693  }
   694  
   695  func (t TimestampLogicalType) TimeUnit() TimeUnitType {
   696  	return timeUnitFromThrift(t.typ.Unit)
   697  }
   698  
   699  func (TimestampLogicalType) SortOrder() SortOrder {
   700  	return SortSIGNED
   701  }
   702  
   703  func (t TimestampLogicalType) MarshalJSON() ([]byte, error) {
   704  	return json.Marshal(map[string]interface{}{
   705  		"Type":                     "Timestamp",
   706  		"isAdjustedToUTC":          t.typ.IsAdjustedToUTC,
   707  		"timeUnit":                 timeUnitToString(t.typ.GetUnit()),
   708  		"is_from_converted_type":   t.fromConverted,
   709  		"force_set_converted_type": t.forceConverted,
   710  	})
   711  }
   712  
   713  func (t TimestampLogicalType) IsSerialized() bool {
   714  	return !t.fromConverted
   715  }
   716  
   717  func (t TimestampLogicalType) String() string {
   718  	return fmt.Sprintf("Timestamp(isAdjustedToUTC=%t, timeUnit=%s, is_from_converted_type=%t, force_set_converted_type=%t)",
   719  		t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit()), t.fromConverted, t.forceConverted)
   720  }
   721  
   722  func (t TimestampLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   723  	unit := timeUnitFromThrift(t.typ.Unit)
   724  	if t.typ.IsAdjustedToUTC || t.forceConverted {
   725  		switch unit {
   726  		case TimeUnitMillis:
   727  			return ConvertedTypes.TimestampMillis, DecimalMetadata{}
   728  		case TimeUnitMicros:
   729  			return ConvertedTypes.TimestampMicros, DecimalMetadata{}
   730  		}
   731  	}
   732  	return ConvertedTypes.None, DecimalMetadata{}
   733  }
   734  
   735  func (t TimestampLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   736  	if dec.IsSet {
   737  		return false
   738  	}
   739  
   740  	switch timeUnitFromThrift(t.typ.Unit) {
   741  	case TimeUnitMillis:
   742  		if t.typ.GetIsAdjustedToUTC() || t.forceConverted {
   743  			return c == ConvertedTypes.TimestampMillis
   744  		}
   745  	case TimeUnitMicros:
   746  		if t.typ.GetIsAdjustedToUTC() || t.forceConverted {
   747  			return c == ConvertedTypes.TimestampMicros
   748  		}
   749  	}
   750  
   751  	return c == ConvertedTypes.None || c == ConvertedTypes.NA
   752  }
   753  
   754  func (TimestampLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
   755  	return t == parquet.Types.Int64
   756  }
   757  
   758  func (t TimestampLogicalType) toThrift() *format.LogicalType {
   759  	return &format.LogicalType{TIMESTAMP: t.typ}
   760  }
   761  
   762  func (t TimestampLogicalType) Equals(rhs LogicalType) bool {
   763  	other, ok := rhs.(*TimestampLogicalType)
   764  	if !ok {
   765  		return false
   766  	}
   767  	return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC &&
   768  		timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit)
   769  }
   770  
   771  // NewIntLogicalType creates an integer logical type of the desired bitwidth
   772  // and whether it is signed or not.
   773  //
   774  // Bit width must be exactly 8, 16, 32 or 64 for an integer logical type
   775  func NewIntLogicalType(bitWidth int8, signed bool) LogicalType {
   776  	switch bitWidth {
   777  	case 8, 16, 32, 64:
   778  	default:
   779  		panic("parquet: bit width must be exactly 8, 16, 32, or 64 for Int logical type")
   780  	}
   781  	return &IntLogicalType{
   782  		typ: &format.IntType{
   783  			BitWidth: bitWidth,
   784  			IsSigned: signed,
   785  		},
   786  	}
   787  }
   788  
   789  // IntLogicalType represents an integer type of a specific bit width and
   790  // is either signed or unsigned.
   791  type IntLogicalType struct {
   792  	baseLogicalType
   793  	typ *format.IntType
   794  }
   795  
   796  func (t IntLogicalType) BitWidth() int8 {
   797  	return t.typ.BitWidth
   798  }
   799  
   800  func (t IntLogicalType) IsSigned() bool {
   801  	return t.typ.IsSigned
   802  }
   803  
   804  func (t IntLogicalType) SortOrder() SortOrder {
   805  	if t.typ.IsSigned {
   806  		return SortSIGNED
   807  	}
   808  	return SortUNSIGNED
   809  }
   810  
   811  func (t IntLogicalType) MarshalJSON() ([]byte, error) {
   812  	return json.Marshal(map[string]interface{}{
   813  		"Type": "Int", "bitWidth": t.typ.BitWidth, "isSigned": t.typ.IsSigned,
   814  	})
   815  }
   816  
   817  func (t IntLogicalType) String() string {
   818  	return fmt.Sprintf("Int(bitWidth=%d, isSigned=%t)", t.typ.GetBitWidth(), t.typ.GetIsSigned())
   819  }
   820  
   821  func (t IntLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   822  	var d DecimalMetadata
   823  	if t.typ.IsSigned {
   824  		switch t.typ.BitWidth {
   825  		case 8:
   826  			return ConvertedTypes.Int8, d
   827  		case 16:
   828  			return ConvertedTypes.Int16, d
   829  		case 32:
   830  			return ConvertedTypes.Int32, d
   831  		case 64:
   832  			return ConvertedTypes.Int64, d
   833  		}
   834  	} else {
   835  		switch t.typ.BitWidth {
   836  		case 8:
   837  			return ConvertedTypes.Uint8, d
   838  		case 16:
   839  			return ConvertedTypes.Uint16, d
   840  		case 32:
   841  			return ConvertedTypes.Uint32, d
   842  		case 64:
   843  			return ConvertedTypes.Uint64, d
   844  		}
   845  	}
   846  	return ConvertedTypes.None, d
   847  }
   848  
   849  func (t IntLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   850  	if dec.IsSet {
   851  		return false
   852  	}
   853  	v, _ := t.ToConvertedType()
   854  	return c == v
   855  }
   856  
   857  func (t IntLogicalType) IsApplicable(typ parquet.Type, _ int32) bool {
   858  	return (typ == parquet.Types.Int32 && t.typ.GetBitWidth() <= 32) ||
   859  		(typ == parquet.Types.Int64 && t.typ.GetBitWidth() == 64)
   860  }
   861  
   862  func (t IntLogicalType) toThrift() *format.LogicalType {
   863  	return &format.LogicalType{INTEGER: t.typ}
   864  }
   865  
   866  func (t IntLogicalType) Equals(rhs LogicalType) bool {
   867  	other, ok := rhs.(*IntLogicalType)
   868  	if !ok {
   869  		return false
   870  	}
   871  
   872  	return t.typ.GetIsSigned() == other.typ.GetIsSigned() &&
   873  		t.typ.GetBitWidth() == other.typ.GetBitWidth()
   874  }
   875  
   876  // UnknownLogicalType is a type that is essentially a placeholder for when
   877  // we don't know the type.
   878  type UnknownLogicalType struct{ baseLogicalType }
   879  
   880  func (UnknownLogicalType) SortOrder() SortOrder {
   881  	return SortUNKNOWN
   882  }
   883  
   884  func (UnknownLogicalType) MarshalJSON() ([]byte, error) {
   885  	return json.Marshal(map[string]string{"Type": UnknownLogicalType{}.String()})
   886  }
   887  
   888  func (UnknownLogicalType) IsValid() bool { return false }
   889  
   890  func (UnknownLogicalType) IsSerialized() bool { return false }
   891  
   892  func (UnknownLogicalType) String() string {
   893  	return "Unknown"
   894  }
   895  
   896  func (UnknownLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   897  	return ConvertedTypes.NA, DecimalMetadata{}
   898  }
   899  
   900  func (UnknownLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   901  	return c == ConvertedTypes.NA && !dec.IsSet
   902  }
   903  
   904  func (UnknownLogicalType) IsApplicable(parquet.Type, int32) bool { return true }
   905  
   906  func (UnknownLogicalType) toThrift() *format.LogicalType {
   907  	return &format.LogicalType{UNKNOWN: format.NewNullType()}
   908  }
   909  
   910  func (UnknownLogicalType) Equals(rhs LogicalType) bool {
   911  	_, ok := rhs.(UnknownLogicalType)
   912  	return ok
   913  }
   914  
   915  // JSONLogicalType represents a byte array column which is to be interpreted
   916  // as a JSON string.
   917  type JSONLogicalType struct{ baseLogicalType }
   918  
   919  func (JSONLogicalType) SortOrder() SortOrder {
   920  	return SortUNSIGNED
   921  }
   922  
   923  func (JSONLogicalType) MarshalJSON() ([]byte, error) {
   924  	return json.Marshal(map[string]string{"Type": JSONLogicalType{}.String()})
   925  }
   926  
   927  func (JSONLogicalType) String() string {
   928  	return "JSON"
   929  }
   930  
   931  func (JSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   932  	return ConvertedTypes.JSON, DecimalMetadata{}
   933  }
   934  
   935  func (JSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   936  	return c == ConvertedTypes.JSON && !dec.IsSet
   937  }
   938  
   939  func (JSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
   940  	return t == parquet.Types.ByteArray
   941  }
   942  
   943  func (JSONLogicalType) toThrift() *format.LogicalType {
   944  	return &format.LogicalType{JSON: format.NewJsonType()}
   945  }
   946  
   947  func (JSONLogicalType) Equals(rhs LogicalType) bool {
   948  	_, ok := rhs.(JSONLogicalType)
   949  	return ok
   950  }
   951  
   952  // BSONLogicalType represents a binary JSON string in the byte array
   953  type BSONLogicalType struct{ baseLogicalType }
   954  
   955  func (BSONLogicalType) SortOrder() SortOrder {
   956  	return SortUNSIGNED
   957  }
   958  
   959  func (BSONLogicalType) MarshalJSON() ([]byte, error) {
   960  	return json.Marshal(map[string]string{"Type": BSONLogicalType{}.String()})
   961  }
   962  
   963  func (BSONLogicalType) String() string {
   964  	return "BSON"
   965  }
   966  
   967  func (BSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   968  	return ConvertedTypes.BSON, DecimalMetadata{}
   969  }
   970  
   971  func (BSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   972  	return c == ConvertedTypes.BSON && !dec.IsSet
   973  }
   974  
   975  func (BSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
   976  	return t == parquet.Types.ByteArray
   977  }
   978  
   979  func (BSONLogicalType) toThrift() *format.LogicalType {
   980  	return &format.LogicalType{BSON: format.NewBsonType()}
   981  }
   982  
   983  func (BSONLogicalType) Equals(rhs LogicalType) bool {
   984  	_, ok := rhs.(BSONLogicalType)
   985  	return ok
   986  }
   987  
   988  // UUIDLogicalType can only be used with a FixedLength byte array column
   989  // that is exactly 16 bytes long
   990  type UUIDLogicalType struct{ baseLogicalType }
   991  
   992  func (UUIDLogicalType) SortOrder() SortOrder {
   993  	return SortUNSIGNED
   994  }
   995  
   996  func (UUIDLogicalType) MarshalJSON() ([]byte, error) {
   997  	return json.Marshal(map[string]string{"Type": UUIDLogicalType{}.String()})
   998  }
   999  
  1000  func (UUIDLogicalType) String() string {
  1001  	return "UUID"
  1002  }
  1003  
  1004  func (UUIDLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
  1005  	return ConvertedTypes.None, DecimalMetadata{}
  1006  }
  1007  
  1008  func (UUIDLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
  1009  	if dec.IsSet {
  1010  		return false
  1011  	}
  1012  	switch c {
  1013  	case ConvertedTypes.None, ConvertedTypes.NA:
  1014  		return true
  1015  	}
  1016  	return false
  1017  }
  1018  
  1019  func (UUIDLogicalType) IsApplicable(t parquet.Type, tlen int32) bool {
  1020  	return t == parquet.Types.FixedLenByteArray && tlen == 16
  1021  }
  1022  
  1023  func (UUIDLogicalType) toThrift() *format.LogicalType {
  1024  	return &format.LogicalType{UUID: format.NewUUIDType()}
  1025  }
  1026  
  1027  func (UUIDLogicalType) Equals(rhs LogicalType) bool {
  1028  	_, ok := rhs.(UUIDLogicalType)
  1029  	return ok
  1030  }
  1031  
  1032  // IntervalLogicalType is not yet in the thrift spec, but represents
  1033  // an interval time and needs to be a fixed length byte array of 12 bytes
  1034  type IntervalLogicalType struct{ baseLogicalType }
  1035  
  1036  func (IntervalLogicalType) SortOrder() SortOrder {
  1037  	return SortUNKNOWN
  1038  }
  1039  
  1040  func (IntervalLogicalType) MarshalJSON() ([]byte, error) {
  1041  	return json.Marshal(map[string]string{"Type": IntervalLogicalType{}.String()})
  1042  }
  1043  
  1044  func (IntervalLogicalType) String() string {
  1045  	return "Interval"
  1046  }
  1047  
  1048  func (IntervalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
  1049  	return ConvertedTypes.Interval, DecimalMetadata{}
  1050  }
  1051  
  1052  func (IntervalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
  1053  	return c == ConvertedTypes.Interval && !dec.IsSet
  1054  }
  1055  
  1056  func (IntervalLogicalType) IsApplicable(t parquet.Type, tlen int32) bool {
  1057  	return t == parquet.Types.FixedLenByteArray && tlen == 12
  1058  }
  1059  
  1060  func (IntervalLogicalType) toThrift() *format.LogicalType {
  1061  	panic("no parquet IntervalLogicalType yet implemented")
  1062  }
  1063  
  1064  func (IntervalLogicalType) Equals(rhs LogicalType) bool {
  1065  	_, ok := rhs.(IntervalLogicalType)
  1066  	return ok
  1067  }
  1068  
  1069  // Float16LogicalType can only be used with a FixedLength byte array column
  1070  // that is exactly 2 bytes long
  1071  type Float16LogicalType struct{ baseLogicalType }
  1072  
  1073  func (Float16LogicalType) SortOrder() SortOrder {
  1074  	return SortSIGNED
  1075  }
  1076  
  1077  func (Float16LogicalType) MarshalJSON() ([]byte, error) {
  1078  	return json.Marshal(map[string]string{"Type": Float16LogicalType{}.String()})
  1079  }
  1080  
  1081  func (Float16LogicalType) String() string {
  1082  	return "Float16"
  1083  }
  1084  
  1085  func (Float16LogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
  1086  	return ConvertedTypes.None, DecimalMetadata{}
  1087  }
  1088  
  1089  func (Float16LogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
  1090  	if dec.IsSet {
  1091  		return false
  1092  	}
  1093  	switch c {
  1094  	case ConvertedTypes.None, ConvertedTypes.NA:
  1095  		return true
  1096  	}
  1097  	return false
  1098  }
  1099  
  1100  func (Float16LogicalType) IsApplicable(t parquet.Type, tlen int32) bool {
  1101  	return t == parquet.Types.FixedLenByteArray && tlen == 2
  1102  }
  1103  
  1104  func (Float16LogicalType) toThrift() *format.LogicalType {
  1105  	return &format.LogicalType{FLOAT16: format.NewFloat16Type()}
  1106  }
  1107  
  1108  func (Float16LogicalType) Equals(rhs LogicalType) bool {
  1109  	_, ok := rhs.(Float16LogicalType)
  1110  	return ok
  1111  }
  1112  
  1113  type NullLogicalType struct{ baseLogicalType }
  1114  
  1115  func (NullLogicalType) SortOrder() SortOrder {
  1116  	return SortUNKNOWN
  1117  }
  1118  
  1119  func (NullLogicalType) MarshalJSON() ([]byte, error) {
  1120  	return json.Marshal(map[string]string{"Type": NullLogicalType{}.String()})
  1121  }
  1122  
  1123  func (NullLogicalType) String() string {
  1124  	return "Null"
  1125  }
  1126  
  1127  func (NullLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
  1128  	return ConvertedTypes.None, DecimalMetadata{}
  1129  }
  1130  
  1131  func (NullLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
  1132  	if dec.IsSet {
  1133  		return false
  1134  	}
  1135  	switch c {
  1136  	case ConvertedTypes.None, ConvertedTypes.NA:
  1137  		return true
  1138  	}
  1139  	return false
  1140  }
  1141  
  1142  func (NullLogicalType) IsApplicable(parquet.Type, int32) bool {
  1143  	return true
  1144  }
  1145  
  1146  func (NullLogicalType) toThrift() *format.LogicalType {
  1147  	return &format.LogicalType{UNKNOWN: format.NewNullType()}
  1148  }
  1149  
  1150  func (NullLogicalType) Equals(rhs LogicalType) bool {
  1151  	_, ok := rhs.(NullLogicalType)
  1152  	return ok
  1153  }
  1154  
  1155  type NoLogicalType struct{ baseLogicalType }
  1156  
  1157  func (NoLogicalType) SortOrder() SortOrder {
  1158  	return SortUNKNOWN
  1159  }
  1160  
  1161  func (NoLogicalType) MarshalJSON() ([]byte, error) {
  1162  	return json.Marshal(map[string]string{"Type": NoLogicalType{}.String()})
  1163  }
  1164  
  1165  func (NoLogicalType) IsSerialized() bool { return false }
  1166  
  1167  func (NoLogicalType) String() string {
  1168  	return "None"
  1169  }
  1170  
  1171  func (NoLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
  1172  	return ConvertedTypes.None, DecimalMetadata{}
  1173  }
  1174  
  1175  func (NoLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
  1176  	return c == ConvertedTypes.None && !dec.IsSet
  1177  }
  1178  
  1179  func (NoLogicalType) IsApplicable(parquet.Type, int32) bool {
  1180  	return true
  1181  }
  1182  
  1183  func (NoLogicalType) toThrift() *format.LogicalType {
  1184  	panic("cannot convert NoLogicalType to thrift")
  1185  }
  1186  
  1187  func (NoLogicalType) Equals(rhs LogicalType) bool {
  1188  	_, ok := rhs.(NoLogicalType)
  1189  	return ok
  1190  }
  1191  
  1192  func (NoLogicalType) IsNone() bool { return true }