github.com/apache/arrow/go/v14@v14.0.2/parquet/schema/logical_types.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package schema
    18  
    19  import (
    20  	"fmt"
    21  	"math"
    22  
    23  	"github.com/apache/arrow/go/v14/internal/json"
    24  	"github.com/apache/arrow/go/v14/parquet"
    25  	"github.com/apache/arrow/go/v14/parquet/internal/debug"
    26  	format "github.com/apache/arrow/go/v14/parquet/internal/gen-go/parquet"
    27  )
    28  
    29  // DecimalMetadata is a struct for managing scale and precision information between
    30  // converted and logical types.
    31  type DecimalMetadata struct {
    32  	IsSet     bool
    33  	Scale     int32
    34  	Precision int32
    35  }
    36  
    37  func getLogicalType(l *format.LogicalType) LogicalType {
    38  	switch {
    39  	case l.IsSetSTRING():
    40  		return StringLogicalType{}
    41  	case l.IsSetMAP():
    42  		return MapLogicalType{}
    43  	case l.IsSetLIST():
    44  		return ListLogicalType{}
    45  	case l.IsSetENUM():
    46  		return EnumLogicalType{}
    47  	case l.IsSetDECIMAL():
    48  		return &DecimalLogicalType{typ: l.DECIMAL}
    49  	case l.IsSetDATE():
    50  		return DateLogicalType{}
    51  	case l.IsSetTIME():
    52  		if timeUnitFromThrift(l.TIME.Unit) == TimeUnitUnknown {
    53  			panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Time logical type")
    54  		}
    55  		return &TimeLogicalType{typ: l.TIME}
    56  	case l.IsSetTIMESTAMP():
    57  		if timeUnitFromThrift(l.TIMESTAMP.Unit) == TimeUnitUnknown {
    58  			panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Timestamp logical type")
    59  		}
    60  		return &TimestampLogicalType{typ: l.TIMESTAMP}
    61  	case l.IsSetINTEGER():
    62  		return &IntLogicalType{typ: l.INTEGER}
    63  	case l.IsSetUNKNOWN():
    64  		return NullLogicalType{}
    65  	case l.IsSetJSON():
    66  		return JSONLogicalType{}
    67  	case l.IsSetBSON():
    68  		return BSONLogicalType{}
    69  	case l.IsSetUUID():
    70  		return UUIDLogicalType{}
    71  	case l == nil:
    72  		return NoLogicalType{}
    73  	default:
    74  		panic("invalid logical type")
    75  	}
    76  }
    77  
    78  // TimeUnitType is an enum for denoting whether a time based logical type
    79  // is using milliseconds, microseconds or nanoseconds.
    80  type TimeUnitType int
    81  
    82  // Constants for the TimeUnitType
    83  const (
    84  	TimeUnitMillis TimeUnitType = iota
    85  	TimeUnitMicros
    86  	TimeUnitNanos
    87  	TimeUnitUnknown
    88  )
    89  
    90  // LogicalType is the descriptor that defines the usage of a physical primitive
    91  // type in the schema, such as an Interval, Date, etc.
    92  type LogicalType interface {
    93  	// Returns true if a nested type like List or Map
    94  	IsNested() bool
    95  	// Returns true if this type can be serialized, ie: not Unknown/NoType/Interval
    96  	IsSerialized() bool
    97  	// Returns true if not NoLogicalType
    98  	IsValid() bool
    99  	// Returns true if it is NoType
   100  	IsNone() bool
   101  	// returns a string representation of the Logical Type
   102  	String() string
   103  	toThrift() *format.LogicalType
   104  	// Return the equivalent ConvertedType for legacy Parquet systems
   105  	ToConvertedType() (ConvertedType, DecimalMetadata)
   106  	// Returns true if the specified ConvertedType is compatible with this
   107  	// logical type
   108  	IsCompatible(ConvertedType, DecimalMetadata) bool
   109  	// Returns true if this logical type can be used with the provided physical type
   110  	IsApplicable(t parquet.Type, tlen int32) bool
   111  	// Returns true if the logical types are the same
   112  	Equals(LogicalType) bool
   113  	// Returns the default stat sort order for this logical type
   114  	SortOrder() SortOrder
   115  }
   116  
   117  // TemporalLogicalType is a smaller interface for Time based logical types
   118  // like Time / Timestamp
   119  type TemporalLogicalType interface {
   120  	LogicalType
   121  	IsAdjustedToUTC() bool
   122  	TimeUnit() TimeUnitType
   123  }
   124  
   125  // SortOrder mirrors the parquet.thrift sort order type
   126  type SortOrder int8
   127  
   128  // Constants for the Stat sort order definitions
   129  const (
   130  	SortSIGNED SortOrder = iota
   131  	SortUNSIGNED
   132  	SortUNKNOWN
   133  )
   134  
   135  // DefaultSortOrder returns the default stat sort order for the given physical type
   136  func DefaultSortOrder(primitive format.Type) SortOrder {
   137  	switch primitive {
   138  	case format.Type_BOOLEAN, format.Type_INT32, format.Type_INT64, format.Type_FLOAT, format.Type_DOUBLE:
   139  		return SortSIGNED
   140  	case format.Type_BYTE_ARRAY, format.Type_FIXED_LEN_BYTE_ARRAY:
   141  		return SortUNSIGNED
   142  	case format.Type_INT96:
   143  		fallthrough
   144  	default:
   145  		return SortUNKNOWN
   146  	}
   147  }
   148  
   149  // GetLogicalSortOrder returns the default sort order for this logical type
   150  // or falls back to the default sort order for the physical type if not valid
   151  func GetLogicalSortOrder(logical LogicalType, primitive format.Type) SortOrder {
   152  	switch {
   153  	case logical == nil || !logical.IsValid():
   154  		return SortUNKNOWN
   155  	case logical.Equals(NoLogicalType{}):
   156  		return DefaultSortOrder(primitive)
   157  	default:
   158  		return logical.SortOrder()
   159  	}
   160  }
   161  
   162  type baseLogicalType struct{}
   163  
   164  func (baseLogicalType) IsSerialized() bool {
   165  	return true
   166  }
   167  
   168  func (baseLogicalType) IsValid() bool {
   169  	return true
   170  }
   171  
   172  func (baseLogicalType) IsNested() bool {
   173  	return false
   174  }
   175  
   176  func (baseLogicalType) IsNone() bool { return false }
   177  
   178  // StringLogicalType is a UTF8 string, only usable with ByteArray and FixedLenByteArray
   179  type StringLogicalType struct{ baseLogicalType }
   180  
   181  func (StringLogicalType) SortOrder() SortOrder {
   182  	return SortUNSIGNED
   183  }
   184  
   185  func (StringLogicalType) MarshalJSON() ([]byte, error) {
   186  	return json.Marshal(map[string]string{"Type": StringLogicalType{}.String()})
   187  }
   188  
   189  func (StringLogicalType) String() string {
   190  	return "String"
   191  }
   192  
   193  func (StringLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   194  	return ConvertedTypes.UTF8, DecimalMetadata{}
   195  }
   196  
   197  func (StringLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
   198  	return t == ConvertedTypes.UTF8 && !dec.IsSet
   199  }
   200  
   201  func (StringLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
   202  	return t == parquet.Types.ByteArray
   203  }
   204  
   205  func (StringLogicalType) toThrift() *format.LogicalType {
   206  	return &format.LogicalType{STRING: format.NewStringType()}
   207  }
   208  
   209  func (StringLogicalType) Equals(rhs LogicalType) bool {
   210  	_, ok := rhs.(StringLogicalType)
   211  	return ok
   212  }
   213  
   214  // MapLogicalType represents a mapped type
   215  type MapLogicalType struct{ baseLogicalType }
   216  
   217  func (MapLogicalType) SortOrder() SortOrder {
   218  	return SortUNKNOWN
   219  }
   220  
   221  func (MapLogicalType) MarshalJSON() ([]byte, error) {
   222  	return json.Marshal(map[string]string{"Type": MapLogicalType{}.String()})
   223  }
   224  
   225  func (MapLogicalType) String() string {
   226  	return "Map"
   227  }
   228  
   229  func (MapLogicalType) IsNested() bool {
   230  	return true
   231  }
   232  
   233  func (MapLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   234  	return ConvertedTypes.Map, DecimalMetadata{}
   235  }
   236  
   237  func (MapLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
   238  	return (t == ConvertedTypes.Map || t == ConvertedTypes.MapKeyValue) && !dec.IsSet
   239  }
   240  
   241  func (MapLogicalType) IsApplicable(parquet.Type, int32) bool {
   242  	return false
   243  }
   244  
   245  func (MapLogicalType) toThrift() *format.LogicalType {
   246  	return &format.LogicalType{MAP: format.NewMapType()}
   247  }
   248  
   249  func (MapLogicalType) Equals(rhs LogicalType) bool {
   250  	_, ok := rhs.(MapLogicalType)
   251  	return ok
   252  }
   253  
   254  func NewListLogicalType() LogicalType {
   255  	return ListLogicalType{}
   256  }
   257  
   258  // ListLogicalType is used for columns which are themselves nested lists
   259  type ListLogicalType struct{ baseLogicalType }
   260  
   261  func (ListLogicalType) SortOrder() SortOrder {
   262  	return SortUNKNOWN
   263  }
   264  
   265  func (ListLogicalType) MarshalJSON() ([]byte, error) {
   266  	return json.Marshal(map[string]string{"Type": ListLogicalType{}.String()})
   267  }
   268  
   269  func (ListLogicalType) String() string {
   270  	return "List"
   271  }
   272  
   273  func (ListLogicalType) IsNested() bool {
   274  	return true
   275  }
   276  
   277  func (ListLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   278  	return ConvertedTypes.List, DecimalMetadata{}
   279  }
   280  
   281  func (ListLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
   282  	return t == ConvertedTypes.List && !dec.IsSet
   283  }
   284  
   285  func (ListLogicalType) IsApplicable(parquet.Type, int32) bool {
   286  	return false
   287  }
   288  
   289  func (ListLogicalType) toThrift() *format.LogicalType {
   290  	return &format.LogicalType{LIST: format.NewListType()}
   291  }
   292  
   293  func (ListLogicalType) Equals(rhs LogicalType) bool {
   294  	_, ok := rhs.(ListLogicalType)
   295  	return ok
   296  }
   297  
   298  // EnumLogicalType is for representing an enum, which should be a byte array type
   299  type EnumLogicalType struct{ baseLogicalType }
   300  
   301  func (EnumLogicalType) SortOrder() SortOrder {
   302  	return SortUNSIGNED
   303  }
   304  
   305  func (EnumLogicalType) MarshalJSON() ([]byte, error) {
   306  	return json.Marshal(map[string]string{"Type": EnumLogicalType{}.String()})
   307  }
   308  
   309  func (EnumLogicalType) String() string {
   310  	return "Enum"
   311  }
   312  
   313  func (EnumLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   314  	return ConvertedTypes.Enum, DecimalMetadata{}
   315  }
   316  
   317  func (EnumLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
   318  	return t == ConvertedTypes.Enum && !dec.IsSet
   319  }
   320  
   321  func (EnumLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
   322  	return t == parquet.Types.ByteArray
   323  }
   324  
   325  func (EnumLogicalType) toThrift() *format.LogicalType {
   326  	return &format.LogicalType{ENUM: format.NewEnumType()}
   327  }
   328  
   329  func (EnumLogicalType) Equals(rhs LogicalType) bool {
   330  	_, ok := rhs.(EnumLogicalType)
   331  	return ok
   332  }
   333  
   334  // NewDecimalLogicalType returns a Decimal logical type with the given
   335  // precision and scale.
   336  //
   337  // Panics if precision < 1 or scale is not in the range (0, precision)
   338  func NewDecimalLogicalType(precision int32, scale int32) LogicalType {
   339  	if precision < 1 {
   340  		panic("parquet: precision must be greater than or equal to 1 for decimal logical type")
   341  	}
   342  	if scale < 0 || scale > precision {
   343  		panic("parquet: scale must be a non-negative integer that does not exceed precision for decimal logical type")
   344  	}
   345  	return &DecimalLogicalType{typ: &format.DecimalType{Precision: precision, Scale: scale}}
   346  }
   347  
   348  // DecimalLogicalType is used to represent a decimal value of a given
   349  // precision and scale
   350  type DecimalLogicalType struct {
   351  	baseLogicalType
   352  	typ *format.DecimalType
   353  }
   354  
   355  func (t DecimalLogicalType) Precision() int32 {
   356  	return t.typ.Precision
   357  }
   358  
   359  func (t DecimalLogicalType) Scale() int32 {
   360  	return t.typ.Scale
   361  }
   362  
   363  func (DecimalLogicalType) SortOrder() SortOrder {
   364  	return SortSIGNED
   365  }
   366  
   367  func (t DecimalLogicalType) MarshalJSON() ([]byte, error) {
   368  	return json.Marshal(map[string]interface{}{"Type": "Decimal", "precision": t.typ.Precision, "scale": t.typ.Scale})
   369  }
   370  
   371  func (t DecimalLogicalType) String() string {
   372  	return fmt.Sprintf("Decimal(precision=%d, scale=%d)", t.typ.Precision, t.typ.Scale)
   373  }
   374  
   375  func (t DecimalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   376  	return ConvertedTypes.Decimal, DecimalMetadata{IsSet: true, Scale: t.typ.GetScale(), Precision: t.typ.GetPrecision()}
   377  }
   378  
   379  func (t DecimalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   380  	return c == ConvertedTypes.Decimal &&
   381  		dec.IsSet && dec.Scale == t.typ.Scale && dec.Precision == t.typ.Precision
   382  }
   383  
   384  func (t DecimalLogicalType) IsApplicable(typ parquet.Type, tlen int32) bool {
   385  	switch typ {
   386  	case parquet.Types.Int32:
   387  		return 1 <= t.typ.Precision && t.typ.Precision <= 9
   388  	case parquet.Types.Int64:
   389  		if t.typ.Precision < 10 {
   390  			debug.Log("int64 used for decimal logical, precision is small enough to use int32")
   391  		}
   392  		return 1 <= t.typ.Precision && t.typ.Precision <= 18
   393  	case parquet.Types.FixedLenByteArray:
   394  		return t.typ.Precision <= int32(math.Floor(math.Log10(math.Pow(2.0, (8.0*float64(tlen)-1.0)))))
   395  	case parquet.Types.ByteArray:
   396  		return true
   397  	}
   398  	return false
   399  }
   400  
   401  func (t DecimalLogicalType) toThrift() *format.LogicalType {
   402  	return &format.LogicalType{DECIMAL: t.typ}
   403  }
   404  
   405  func (t DecimalLogicalType) Equals(rhs LogicalType) bool {
   406  	other, ok := rhs.(*DecimalLogicalType)
   407  	if !ok {
   408  		return false
   409  	}
   410  	return t.typ.Precision == other.typ.Precision && t.typ.Scale == other.typ.Scale
   411  }
   412  
   413  // DateLogicalType is an int32 representing the number of days since the Unix Epoch
   414  // 1 January 1970
   415  type DateLogicalType struct{ baseLogicalType }
   416  
   417  func (DateLogicalType) SortOrder() SortOrder {
   418  	return SortSIGNED
   419  }
   420  
   421  func (DateLogicalType) MarshalJSON() ([]byte, error) {
   422  	return json.Marshal(map[string]string{"Type": DateLogicalType{}.String()})
   423  }
   424  
   425  func (DateLogicalType) String() string {
   426  	return "Date"
   427  }
   428  
   429  func (DateLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   430  	return ConvertedTypes.Date, DecimalMetadata{}
   431  }
   432  
   433  func (DateLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
   434  	return t == ConvertedTypes.Date && !dec.IsSet
   435  }
   436  
   437  func (DateLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
   438  	return t == parquet.Types.Int32
   439  }
   440  
   441  func (DateLogicalType) toThrift() *format.LogicalType {
   442  	return &format.LogicalType{DATE: format.NewDateType()}
   443  }
   444  
   445  func (DateLogicalType) Equals(rhs LogicalType) bool {
   446  	_, ok := rhs.(DateLogicalType)
   447  	return ok
   448  }
   449  
   450  func timeUnitFromThrift(unit *format.TimeUnit) TimeUnitType {
   451  	switch {
   452  	case unit == nil:
   453  		return TimeUnitUnknown
   454  	case unit.IsSetMILLIS():
   455  		return TimeUnitMillis
   456  	case unit.IsSetMICROS():
   457  		return TimeUnitMicros
   458  	case unit.IsSetNANOS():
   459  		return TimeUnitNanos
   460  	default:
   461  		return TimeUnitUnknown
   462  	}
   463  }
   464  
   465  func timeUnitToString(unit *format.TimeUnit) string {
   466  	switch {
   467  	case unit == nil:
   468  		return "unknown"
   469  	case unit.IsSetMILLIS():
   470  		return "milliseconds"
   471  	case unit.IsSetMICROS():
   472  		return "microseconds"
   473  	case unit.IsSetNANOS():
   474  		return "nanoseconds"
   475  	default:
   476  		return "unknown"
   477  	}
   478  }
   479  
   480  func timeUnitFromString(v string) TimeUnitType {
   481  	switch v {
   482  	case "millis":
   483  		return TimeUnitMillis
   484  	case "micros":
   485  		return TimeUnitMicros
   486  	case "nanos":
   487  		return TimeUnitNanos
   488  	default:
   489  		return TimeUnitUnknown
   490  	}
   491  }
   492  
   493  func createTimeUnit(unit TimeUnitType) *format.TimeUnit {
   494  	tunit := format.NewTimeUnit()
   495  	switch unit {
   496  	case TimeUnitMicros:
   497  		tunit.MICROS = format.NewMicroSeconds()
   498  	case TimeUnitMillis:
   499  		tunit.MILLIS = format.NewMilliSeconds()
   500  	case TimeUnitNanos:
   501  		tunit.NANOS = format.NewNanoSeconds()
   502  	default:
   503  		panic("parquet: time unit must be one of MILLIS, MICROS, or NANOS for Time logical type")
   504  	}
   505  	return tunit
   506  }
   507  
   508  // NewTimeLogicalType returns a time type of the given unit.
   509  func NewTimeLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
   510  	return &TimeLogicalType{typ: &format.TimeType{
   511  		IsAdjustedToUTC: isAdjustedToUTC,
   512  		Unit:            createTimeUnit(unit),
   513  	}}
   514  }
   515  
   516  // TimeLogicalType is a time type without a date and must be an
   517  // int32 for milliseconds, or an int64 for micro or nano seconds.
   518  type TimeLogicalType struct {
   519  	baseLogicalType
   520  	typ *format.TimeType
   521  }
   522  
   523  func (t TimeLogicalType) IsAdjustedToUTC() bool {
   524  	return t.typ.IsAdjustedToUTC
   525  }
   526  
   527  func (t TimeLogicalType) TimeUnit() TimeUnitType {
   528  	return timeUnitFromThrift(t.typ.Unit)
   529  }
   530  
   531  func (TimeLogicalType) SortOrder() SortOrder {
   532  	return SortSIGNED
   533  }
   534  
   535  func (t TimeLogicalType) MarshalJSON() ([]byte, error) {
   536  	return json.Marshal(map[string]interface{}{
   537  		"Type": "Time", "isAdjustedToUTC": t.typ.IsAdjustedToUTC, "timeUnit": timeUnitToString(t.typ.GetUnit())})
   538  }
   539  
   540  func (t TimeLogicalType) String() string {
   541  	return fmt.Sprintf("Time(isAdjustedToUTC=%t, timeUnit=%s)", t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit()))
   542  }
   543  
   544  func (t TimeLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   545  	unit := timeUnitFromThrift(t.typ.Unit)
   546  	if t.typ.IsAdjustedToUTC {
   547  		switch unit {
   548  		case TimeUnitMillis:
   549  			return ConvertedTypes.TimeMillis, DecimalMetadata{}
   550  		case TimeUnitMicros:
   551  			return ConvertedTypes.TimeMicros, DecimalMetadata{}
   552  		}
   553  	}
   554  	return ConvertedTypes.None, DecimalMetadata{}
   555  }
   556  
   557  func (t TimeLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   558  	if dec.IsSet {
   559  		return false
   560  	}
   561  	unit := timeUnitFromThrift(t.typ.Unit)
   562  	if t.typ.IsAdjustedToUTC {
   563  		switch unit {
   564  		case TimeUnitMillis:
   565  			return c == ConvertedTypes.TimeMillis
   566  		case TimeUnitMicros:
   567  			return c == ConvertedTypes.TimeMicros
   568  		}
   569  	}
   570  
   571  	return c == ConvertedTypes.None || c == ConvertedTypes.NA
   572  }
   573  
   574  func (t TimeLogicalType) IsApplicable(typ parquet.Type, _ int32) bool {
   575  	return (typ == parquet.Types.Int32 && t.typ.GetUnit().IsSetMILLIS()) ||
   576  		(typ == parquet.Types.Int64 &&
   577  			(t.typ.GetUnit().IsSetMICROS() || t.typ.GetUnit().IsSetNANOS()))
   578  }
   579  
   580  func (t TimeLogicalType) toThrift() *format.LogicalType {
   581  	return &format.LogicalType{TIME: t.typ}
   582  }
   583  
   584  func (t TimeLogicalType) Equals(rhs LogicalType) bool {
   585  	other, ok := rhs.(*TimeLogicalType)
   586  	if !ok {
   587  		return false
   588  	}
   589  	return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC &&
   590  		timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit)
   591  }
   592  
   593  // NewTimestampLogicalType returns a logical timestamp type with "forceConverted"
   594  // set to false
   595  func NewTimestampLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
   596  	return &TimestampLogicalType{
   597  		typ: &format.TimestampType{
   598  			IsAdjustedToUTC: isAdjustedToUTC,
   599  			Unit:            createTimeUnit(unit),
   600  		},
   601  		forceConverted: false,
   602  		fromConverted:  false,
   603  	}
   604  }
   605  
   606  // NewTimestampLogicalTypeForce returns a timestamp logical type with
   607  // "forceConverted" set to true
   608  func NewTimestampLogicalTypeForce(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
   609  	return &TimestampLogicalType{
   610  		typ: &format.TimestampType{
   611  			IsAdjustedToUTC: isAdjustedToUTC,
   612  			Unit:            createTimeUnit(unit),
   613  		},
   614  		forceConverted: true,
   615  		fromConverted:  false,
   616  	}
   617  }
   618  
   619  // TimestampOpt options used with New Timestamp Logical Type
   620  type TimestampOpt func(*TimestampLogicalType)
   621  
   622  // WithTSIsAdjustedToUTC sets the IsAdjustedToUTC field of the timestamp type.
   623  func WithTSIsAdjustedToUTC() TimestampOpt {
   624  	return func(t *TimestampLogicalType) {
   625  		t.typ.IsAdjustedToUTC = true
   626  	}
   627  }
   628  
   629  // WithTSTimeUnitType sets the time unit for the timestamp type
   630  func WithTSTimeUnitType(unit TimeUnitType) TimestampOpt {
   631  	return func(t *TimestampLogicalType) {
   632  		t.typ.Unit = createTimeUnit(unit)
   633  	}
   634  }
   635  
   636  // WithTSForceConverted enable force converted mode
   637  func WithTSForceConverted() TimestampOpt {
   638  	return func(t *TimestampLogicalType) {
   639  		t.forceConverted = true
   640  	}
   641  }
   642  
   643  // WithTSFromConverted enable the timestamp logical type to be
   644  // constructed from a converted type.
   645  func WithTSFromConverted() TimestampOpt {
   646  	return func(t *TimestampLogicalType) {
   647  		t.fromConverted = true
   648  	}
   649  }
   650  
   651  // NewTimestampLogicalTypeWithOpts creates a new TimestampLogicalType with the provided options.
   652  //
   653  // TimestampType Unit defaults to milliseconds (TimeUnitMillis)
   654  func NewTimestampLogicalTypeWithOpts(opts ...TimestampOpt) LogicalType {
   655  	ts := &TimestampLogicalType{
   656  		typ: &format.TimestampType{
   657  			Unit: createTimeUnit(TimeUnitMillis), // default to milliseconds
   658  		},
   659  	}
   660  
   661  	for _, o := range opts {
   662  		o(ts)
   663  	}
   664  
   665  	return ts
   666  }
   667  
   668  // TimestampLogicalType represents an int64 number that can be decoded
   669  // into a year, month, day, hour, minute, second, and subsecond
   670  type TimestampLogicalType struct {
   671  	baseLogicalType
   672  	typ *format.TimestampType
   673  	// forceConverted denotes whether or not the resulting serialized
   674  	// type when writing to parquet will be written as the legacy
   675  	// ConvertedType TIMESTAMP_MICROS/TIMESTAMP_MILLIS (true)
   676  	// or if it will write the proper current Logical Types (false, default)
   677  	forceConverted bool
   678  	// fromConverted denotes if the timestamp type was created by
   679  	// translating a legacy converted type of TIMESTAMP_MILLIS or
   680  	// TIMESTAMP_MICROS rather than by using the current logical
   681  	// types. Default is false.
   682  	fromConverted bool
   683  }
   684  
   685  func (t TimestampLogicalType) IsFromConvertedType() bool {
   686  	return t.fromConverted
   687  }
   688  
   689  func (t TimestampLogicalType) IsAdjustedToUTC() bool {
   690  	return t.typ.IsAdjustedToUTC
   691  }
   692  
   693  func (t TimestampLogicalType) TimeUnit() TimeUnitType {
   694  	return timeUnitFromThrift(t.typ.Unit)
   695  }
   696  
   697  func (TimestampLogicalType) SortOrder() SortOrder {
   698  	return SortSIGNED
   699  }
   700  
   701  func (t TimestampLogicalType) MarshalJSON() ([]byte, error) {
   702  	return json.Marshal(map[string]interface{}{
   703  		"Type":                     "Timestamp",
   704  		"isAdjustedToUTC":          t.typ.IsAdjustedToUTC,
   705  		"timeUnit":                 timeUnitToString(t.typ.GetUnit()),
   706  		"is_from_converted_type":   t.fromConverted,
   707  		"force_set_converted_type": t.forceConverted,
   708  	})
   709  }
   710  
   711  func (t TimestampLogicalType) IsSerialized() bool {
   712  	return !t.fromConverted
   713  }
   714  
   715  func (t TimestampLogicalType) String() string {
   716  	return fmt.Sprintf("Timestamp(isAdjustedToUTC=%t, timeUnit=%s, is_from_converted_type=%t, force_set_converted_type=%t)",
   717  		t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit()), t.fromConverted, t.forceConverted)
   718  }
   719  
   720  func (t TimestampLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   721  	unit := timeUnitFromThrift(t.typ.Unit)
   722  	if t.typ.IsAdjustedToUTC || t.forceConverted {
   723  		switch unit {
   724  		case TimeUnitMillis:
   725  			return ConvertedTypes.TimestampMillis, DecimalMetadata{}
   726  		case TimeUnitMicros:
   727  			return ConvertedTypes.TimestampMicros, DecimalMetadata{}
   728  		}
   729  	}
   730  	return ConvertedTypes.None, DecimalMetadata{}
   731  }
   732  
   733  func (t TimestampLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   734  	if dec.IsSet {
   735  		return false
   736  	}
   737  
   738  	switch timeUnitFromThrift(t.typ.Unit) {
   739  	case TimeUnitMillis:
   740  		if t.typ.GetIsAdjustedToUTC() || t.forceConverted {
   741  			return c == ConvertedTypes.TimestampMillis
   742  		}
   743  	case TimeUnitMicros:
   744  		if t.typ.GetIsAdjustedToUTC() || t.forceConverted {
   745  			return c == ConvertedTypes.TimestampMicros
   746  		}
   747  	}
   748  
   749  	return c == ConvertedTypes.None || c == ConvertedTypes.NA
   750  }
   751  
   752  func (TimestampLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
   753  	return t == parquet.Types.Int64
   754  }
   755  
   756  func (t TimestampLogicalType) toThrift() *format.LogicalType {
   757  	return &format.LogicalType{TIMESTAMP: t.typ}
   758  }
   759  
   760  func (t TimestampLogicalType) Equals(rhs LogicalType) bool {
   761  	other, ok := rhs.(*TimestampLogicalType)
   762  	if !ok {
   763  		return false
   764  	}
   765  	return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC &&
   766  		timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit)
   767  }
   768  
   769  // NewIntLogicalType creates an integer logical type of the desired bitwidth
   770  // and whether it is signed or not.
   771  //
   772  // Bit width must be exactly 8, 16, 32 or 64 for an integer logical type
   773  func NewIntLogicalType(bitWidth int8, signed bool) LogicalType {
   774  	switch bitWidth {
   775  	case 8, 16, 32, 64:
   776  	default:
   777  		panic("parquet: bit width must be exactly 8, 16, 32, or 64 for Int logical type")
   778  	}
   779  	return &IntLogicalType{
   780  		typ: &format.IntType{
   781  			BitWidth: bitWidth,
   782  			IsSigned: signed,
   783  		},
   784  	}
   785  }
   786  
   787  // IntLogicalType represents an integer type of a specific bit width and
   788  // is either signed or unsigned.
   789  type IntLogicalType struct {
   790  	baseLogicalType
   791  	typ *format.IntType
   792  }
   793  
   794  func (t IntLogicalType) BitWidth() int8 {
   795  	return t.typ.BitWidth
   796  }
   797  
   798  func (t IntLogicalType) IsSigned() bool {
   799  	return t.typ.IsSigned
   800  }
   801  
   802  func (t IntLogicalType) SortOrder() SortOrder {
   803  	if t.typ.IsSigned {
   804  		return SortSIGNED
   805  	}
   806  	return SortUNSIGNED
   807  }
   808  
   809  func (t IntLogicalType) MarshalJSON() ([]byte, error) {
   810  	return json.Marshal(map[string]interface{}{
   811  		"Type": "Int", "bitWidth": t.typ.BitWidth, "isSigned": t.typ.IsSigned,
   812  	})
   813  }
   814  
   815  func (t IntLogicalType) String() string {
   816  	return fmt.Sprintf("Int(bitWidth=%d, isSigned=%t)", t.typ.GetBitWidth(), t.typ.GetIsSigned())
   817  }
   818  
   819  func (t IntLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   820  	var d DecimalMetadata
   821  	if t.typ.IsSigned {
   822  		switch t.typ.BitWidth {
   823  		case 8:
   824  			return ConvertedTypes.Int8, d
   825  		case 16:
   826  			return ConvertedTypes.Int16, d
   827  		case 32:
   828  			return ConvertedTypes.Int32, d
   829  		case 64:
   830  			return ConvertedTypes.Int64, d
   831  		}
   832  	} else {
   833  		switch t.typ.BitWidth {
   834  		case 8:
   835  			return ConvertedTypes.Uint8, d
   836  		case 16:
   837  			return ConvertedTypes.Uint16, d
   838  		case 32:
   839  			return ConvertedTypes.Uint32, d
   840  		case 64:
   841  			return ConvertedTypes.Uint64, d
   842  		}
   843  	}
   844  	return ConvertedTypes.None, d
   845  }
   846  
   847  func (t IntLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   848  	if dec.IsSet {
   849  		return false
   850  	}
   851  	v, _ := t.ToConvertedType()
   852  	return c == v
   853  }
   854  
   855  func (t IntLogicalType) IsApplicable(typ parquet.Type, _ int32) bool {
   856  	return (typ == parquet.Types.Int32 && t.typ.GetBitWidth() <= 32) ||
   857  		(typ == parquet.Types.Int64 && t.typ.GetBitWidth() == 64)
   858  }
   859  
   860  func (t IntLogicalType) toThrift() *format.LogicalType {
   861  	return &format.LogicalType{INTEGER: t.typ}
   862  }
   863  
   864  func (t IntLogicalType) Equals(rhs LogicalType) bool {
   865  	other, ok := rhs.(*IntLogicalType)
   866  	if !ok {
   867  		return false
   868  	}
   869  
   870  	return t.typ.GetIsSigned() == other.typ.GetIsSigned() &&
   871  		t.typ.GetBitWidth() == other.typ.GetBitWidth()
   872  }
   873  
   874  // UnknownLogicalType is a type that is essentially a placeholder for when
   875  // we don't know the type.
   876  type UnknownLogicalType struct{ baseLogicalType }
   877  
   878  func (UnknownLogicalType) SortOrder() SortOrder {
   879  	return SortUNKNOWN
   880  }
   881  
   882  func (UnknownLogicalType) MarshalJSON() ([]byte, error) {
   883  	return json.Marshal(map[string]string{"Type": UnknownLogicalType{}.String()})
   884  }
   885  
   886  func (UnknownLogicalType) IsValid() bool { return false }
   887  
   888  func (UnknownLogicalType) IsSerialized() bool { return false }
   889  
   890  func (UnknownLogicalType) String() string {
   891  	return "Unknown"
   892  }
   893  
   894  func (UnknownLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   895  	return ConvertedTypes.NA, DecimalMetadata{}
   896  }
   897  
   898  func (UnknownLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   899  	return c == ConvertedTypes.NA && !dec.IsSet
   900  }
   901  
   902  func (UnknownLogicalType) IsApplicable(parquet.Type, int32) bool { return true }
   903  
   904  func (UnknownLogicalType) toThrift() *format.LogicalType {
   905  	return &format.LogicalType{UNKNOWN: format.NewNullType()}
   906  }
   907  
   908  func (UnknownLogicalType) Equals(rhs LogicalType) bool {
   909  	_, ok := rhs.(UnknownLogicalType)
   910  	return ok
   911  }
   912  
   913  // JSONLogicalType represents a byte array column which is to be interpreted
   914  // as a JSON string.
   915  type JSONLogicalType struct{ baseLogicalType }
   916  
   917  func (JSONLogicalType) SortOrder() SortOrder {
   918  	return SortUNSIGNED
   919  }
   920  
   921  func (JSONLogicalType) MarshalJSON() ([]byte, error) {
   922  	return json.Marshal(map[string]string{"Type": JSONLogicalType{}.String()})
   923  }
   924  
   925  func (JSONLogicalType) String() string {
   926  	return "JSON"
   927  }
   928  
   929  func (JSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   930  	return ConvertedTypes.JSON, DecimalMetadata{}
   931  }
   932  
   933  func (JSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   934  	return c == ConvertedTypes.JSON && !dec.IsSet
   935  }
   936  
   937  func (JSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
   938  	return t == parquet.Types.ByteArray
   939  }
   940  
   941  func (JSONLogicalType) toThrift() *format.LogicalType {
   942  	return &format.LogicalType{JSON: format.NewJsonType()}
   943  }
   944  
   945  func (JSONLogicalType) Equals(rhs LogicalType) bool {
   946  	_, ok := rhs.(JSONLogicalType)
   947  	return ok
   948  }
   949  
   950  // BSONLogicalType represents a binary JSON string in the byte array
   951  type BSONLogicalType struct{ baseLogicalType }
   952  
   953  func (BSONLogicalType) SortOrder() SortOrder {
   954  	return SortUNSIGNED
   955  }
   956  
   957  func (BSONLogicalType) MarshalJSON() ([]byte, error) {
   958  	return json.Marshal(map[string]string{"Type": BSONLogicalType{}.String()})
   959  }
   960  
   961  func (BSONLogicalType) String() string {
   962  	return "BSON"
   963  }
   964  
   965  func (BSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   966  	return ConvertedTypes.BSON, DecimalMetadata{}
   967  }
   968  
   969  func (BSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   970  	return c == ConvertedTypes.BSON && !dec.IsSet
   971  }
   972  
   973  func (BSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
   974  	return t == parquet.Types.ByteArray
   975  }
   976  
   977  func (BSONLogicalType) toThrift() *format.LogicalType {
   978  	return &format.LogicalType{BSON: format.NewBsonType()}
   979  }
   980  
   981  func (BSONLogicalType) Equals(rhs LogicalType) bool {
   982  	_, ok := rhs.(BSONLogicalType)
   983  	return ok
   984  }
   985  
   986  // UUIDLogicalType can only be used with a FixedLength byte array column
   987  // that is exactly 16 bytes long
   988  type UUIDLogicalType struct{ baseLogicalType }
   989  
   990  func (UUIDLogicalType) SortOrder() SortOrder {
   991  	return SortUNSIGNED
   992  }
   993  
   994  func (UUIDLogicalType) MarshalJSON() ([]byte, error) {
   995  	return json.Marshal(map[string]string{"Type": UUIDLogicalType{}.String()})
   996  }
   997  
   998  func (UUIDLogicalType) String() string {
   999  	return "UUID"
  1000  }
  1001  
  1002  func (UUIDLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
  1003  	return ConvertedTypes.None, DecimalMetadata{}
  1004  }
  1005  
  1006  func (UUIDLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
  1007  	if dec.IsSet {
  1008  		return false
  1009  	}
  1010  	switch c {
  1011  	case ConvertedTypes.None, ConvertedTypes.NA:
  1012  		return true
  1013  	}
  1014  	return false
  1015  }
  1016  
  1017  func (UUIDLogicalType) IsApplicable(t parquet.Type, tlen int32) bool {
  1018  	return t == parquet.Types.FixedLenByteArray && tlen == 16
  1019  }
  1020  
  1021  func (UUIDLogicalType) toThrift() *format.LogicalType {
  1022  	return &format.LogicalType{UUID: format.NewUUIDType()}
  1023  }
  1024  
  1025  func (UUIDLogicalType) Equals(rhs LogicalType) bool {
  1026  	_, ok := rhs.(UUIDLogicalType)
  1027  	return ok
  1028  }
  1029  
  1030  // IntervalLogicalType is not yet in the thrift spec, but represents
  1031  // an interval time and needs to be a fixed length byte array of 12 bytes
  1032  type IntervalLogicalType struct{ baseLogicalType }
  1033  
  1034  func (IntervalLogicalType) SortOrder() SortOrder {
  1035  	return SortUNKNOWN
  1036  }
  1037  
  1038  func (IntervalLogicalType) MarshalJSON() ([]byte, error) {
  1039  	return json.Marshal(map[string]string{"Type": IntervalLogicalType{}.String()})
  1040  }
  1041  
  1042  func (IntervalLogicalType) String() string {
  1043  	return "Interval"
  1044  }
  1045  
  1046  func (IntervalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
  1047  	return ConvertedTypes.Interval, DecimalMetadata{}
  1048  }
  1049  
  1050  func (IntervalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
  1051  	return c == ConvertedTypes.Interval && !dec.IsSet
  1052  }
  1053  
  1054  func (IntervalLogicalType) IsApplicable(t parquet.Type, tlen int32) bool {
  1055  	return t == parquet.Types.FixedLenByteArray && tlen == 12
  1056  }
  1057  
  1058  func (IntervalLogicalType) toThrift() *format.LogicalType {
  1059  	panic("no parquet IntervalLogicalType yet implemented")
  1060  }
  1061  
  1062  func (IntervalLogicalType) Equals(rhs LogicalType) bool {
  1063  	_, ok := rhs.(IntervalLogicalType)
  1064  	return ok
  1065  }
  1066  
  1067  type NullLogicalType struct{ baseLogicalType }
  1068  
  1069  func (NullLogicalType) SortOrder() SortOrder {
  1070  	return SortUNKNOWN
  1071  }
  1072  
  1073  func (NullLogicalType) MarshalJSON() ([]byte, error) {
  1074  	return json.Marshal(map[string]string{"Type": NullLogicalType{}.String()})
  1075  }
  1076  
  1077  func (NullLogicalType) String() string {
  1078  	return "Null"
  1079  }
  1080  
  1081  func (NullLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
  1082  	return ConvertedTypes.None, DecimalMetadata{}
  1083  }
  1084  
  1085  func (NullLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
  1086  	if dec.IsSet {
  1087  		return false
  1088  	}
  1089  	switch c {
  1090  	case ConvertedTypes.None, ConvertedTypes.NA:
  1091  		return true
  1092  	}
  1093  	return false
  1094  }
  1095  
  1096  func (NullLogicalType) IsApplicable(parquet.Type, int32) bool {
  1097  	return true
  1098  }
  1099  
  1100  func (NullLogicalType) toThrift() *format.LogicalType {
  1101  	return &format.LogicalType{UNKNOWN: format.NewNullType()}
  1102  }
  1103  
  1104  func (NullLogicalType) Equals(rhs LogicalType) bool {
  1105  	_, ok := rhs.(NullLogicalType)
  1106  	return ok
  1107  }
  1108  
  1109  type NoLogicalType struct{ baseLogicalType }
  1110  
  1111  func (NoLogicalType) SortOrder() SortOrder {
  1112  	return SortUNKNOWN
  1113  }
  1114  
  1115  func (NoLogicalType) MarshalJSON() ([]byte, error) {
  1116  	return json.Marshal(map[string]string{"Type": NoLogicalType{}.String()})
  1117  }
  1118  
  1119  func (NoLogicalType) IsSerialized() bool { return false }
  1120  
  1121  func (NoLogicalType) String() string {
  1122  	return "None"
  1123  }
  1124  
  1125  func (NoLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
  1126  	return ConvertedTypes.None, DecimalMetadata{}
  1127  }
  1128  
  1129  func (NoLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
  1130  	return c == ConvertedTypes.None && !dec.IsSet
  1131  }
  1132  
  1133  func (NoLogicalType) IsApplicable(parquet.Type, int32) bool {
  1134  	return true
  1135  }
  1136  
  1137  func (NoLogicalType) toThrift() *format.LogicalType {
  1138  	panic("cannot convert NoLogicalType to thrift")
  1139  }
  1140  
  1141  func (NoLogicalType) Equals(rhs LogicalType) bool {
  1142  	_, ok := rhs.(NoLogicalType)
  1143  	return ok
  1144  }
  1145  
  1146  func (NoLogicalType) IsNone() bool { return true }