github.com/apache/arrow/go/v12@v12.0.1/parquet/schema/logical_types.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package schema
    18  
    19  import (
    20  	"encoding/json"
    21  	"fmt"
    22  	"math"
    23  
    24  	"github.com/apache/arrow/go/v12/parquet"
    25  	"github.com/apache/arrow/go/v12/parquet/internal/debug"
    26  	format "github.com/apache/arrow/go/v12/parquet/internal/gen-go/parquet"
    27  )
    28  
    29  // DecimalMetadata is a struct for managing scale and precision information between
    30  // converted and logical types.
    31  type DecimalMetadata struct {
    32  	IsSet     bool
    33  	Scale     int32
    34  	Precision int32
    35  }
    36  
    37  func getLogicalType(l *format.LogicalType) LogicalType {
    38  	switch {
    39  	case l.IsSetSTRING():
    40  		return StringLogicalType{}
    41  	case l.IsSetMAP():
    42  		return MapLogicalType{}
    43  	case l.IsSetLIST():
    44  		return ListLogicalType{}
    45  	case l.IsSetENUM():
    46  		return EnumLogicalType{}
    47  	case l.IsSetDECIMAL():
    48  		return &DecimalLogicalType{typ: l.DECIMAL}
    49  	case l.IsSetDATE():
    50  		return DateLogicalType{}
    51  	case l.IsSetTIME():
    52  		if timeUnitFromThrift(l.TIME.Unit) == TimeUnitUnknown {
    53  			panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Time logical type")
    54  		}
    55  		return &TimeLogicalType{typ: l.TIME}
    56  	case l.IsSetTIMESTAMP():
    57  		if timeUnitFromThrift(l.TIMESTAMP.Unit) == TimeUnitUnknown {
    58  			panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Timestamp logical type")
    59  		}
    60  		return &TimestampLogicalType{typ: l.TIMESTAMP}
    61  	case l.IsSetINTEGER():
    62  		return &IntLogicalType{typ: l.INTEGER}
    63  	case l.IsSetUNKNOWN():
    64  		return NullLogicalType{}
    65  	case l.IsSetJSON():
    66  		return JSONLogicalType{}
    67  	case l.IsSetBSON():
    68  		return BSONLogicalType{}
    69  	case l.IsSetUUID():
    70  		return UUIDLogicalType{}
    71  	case l == nil:
    72  		return NoLogicalType{}
    73  	default:
    74  		panic("invalid logical type")
    75  	}
    76  }
    77  
    78  // TimeUnitType is an enum for denoting whether a time based logical type
    79  // is using milliseconds, microseconds or nanoseconds.
    80  type TimeUnitType int
    81  
    82  // Constants for the TimeUnitType
    83  const (
    84  	TimeUnitMillis TimeUnitType = iota
    85  	TimeUnitMicros
    86  	TimeUnitNanos
    87  	TimeUnitUnknown
    88  )
    89  
    90  // LogicalType is the descriptor that defines the usage of a physical primitive
    91  // type in the schema, such as an Interval, Date, etc.
    92  type LogicalType interface {
    93  	// Returns true if a nested type like List or Map
    94  	IsNested() bool
    95  	// Returns true if this type can be serialized, ie: not Unknown/NoType/Interval
    96  	IsSerialized() bool
    97  	// Returns true if not NoLogicalType
    98  	IsValid() bool
    99  	// Returns true if it is NoType
   100  	IsNone() bool
   101  	// returns a string representation of the Logical Type
   102  	String() string
   103  	toThrift() *format.LogicalType
   104  	// Return the equivalent ConvertedType for legacy Parquet systems
   105  	ToConvertedType() (ConvertedType, DecimalMetadata)
   106  	// Returns true if the specified ConvertedType is compatible with this
   107  	// logical type
   108  	IsCompatible(ConvertedType, DecimalMetadata) bool
   109  	// Returns true if this logical type can be used with the provided physical type
   110  	IsApplicable(t parquet.Type, tlen int32) bool
   111  	// Returns true if the logical types are the same
   112  	Equals(LogicalType) bool
   113  	// Returns the default stat sort order for this logical type
   114  	SortOrder() SortOrder
   115  }
   116  
   117  // TemporalLogicalType is a smaller interface for Time based logical types
   118  // like Time / Timestamp
   119  type TemporalLogicalType interface {
   120  	LogicalType
   121  	IsAdjustedToUTC() bool
   122  	TimeUnit() TimeUnitType
   123  }
   124  
   125  // SortOrder mirrors the parquet.thrift sort order type
   126  type SortOrder int8
   127  
   128  // Constants for the Stat sort order definitions
   129  const (
   130  	SortSIGNED SortOrder = iota
   131  	SortUNSIGNED
   132  	SortUNKNOWN
   133  )
   134  
   135  // DefaultSortOrder returns the default stat sort order for the given physical type
   136  func DefaultSortOrder(primitive format.Type) SortOrder {
   137  	switch primitive {
   138  	case format.Type_BOOLEAN, format.Type_INT32, format.Type_INT64, format.Type_FLOAT, format.Type_DOUBLE:
   139  		return SortSIGNED
   140  	case format.Type_BYTE_ARRAY, format.Type_FIXED_LEN_BYTE_ARRAY:
   141  		return SortUNSIGNED
   142  	case format.Type_INT96:
   143  		fallthrough
   144  	default:
   145  		return SortUNKNOWN
   146  	}
   147  }
   148  
   149  // GetLogicalSortOrder returns the default sort order for this logical type
   150  // or falls back to the default sort order for the physical type if not valid
   151  func GetLogicalSortOrder(logical LogicalType, primitive format.Type) SortOrder {
   152  	switch {
   153  	case logical == nil || !logical.IsValid():
   154  		return SortUNKNOWN
   155  	case logical.Equals(NoLogicalType{}):
   156  		return DefaultSortOrder(primitive)
   157  	default:
   158  		return logical.SortOrder()
   159  	}
   160  }
   161  
   162  type baseLogicalType struct{}
   163  
   164  func (baseLogicalType) IsSerialized() bool {
   165  	return true
   166  }
   167  
   168  func (baseLogicalType) IsValid() bool {
   169  	return true
   170  }
   171  
   172  func (baseLogicalType) IsNested() bool {
   173  	return false
   174  }
   175  
   176  func (baseLogicalType) IsNone() bool { return false }
   177  
   178  // StringLogicalType is a UTF8 string, only usable with ByteArray and FixedLenByteArray
   179  type StringLogicalType struct{ baseLogicalType }
   180  
   181  func (StringLogicalType) SortOrder() SortOrder {
   182  	return SortUNSIGNED
   183  }
   184  
   185  func (StringLogicalType) MarshalJSON() ([]byte, error) {
   186  	return json.Marshal(map[string]string{"Type": StringLogicalType{}.String()})
   187  }
   188  
   189  func (StringLogicalType) String() string {
   190  	return "String"
   191  }
   192  
   193  func (StringLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   194  	return ConvertedTypes.UTF8, DecimalMetadata{}
   195  }
   196  
   197  func (StringLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
   198  	return t == ConvertedTypes.UTF8 && !dec.IsSet
   199  }
   200  
   201  func (StringLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
   202  	return t == parquet.Types.ByteArray
   203  }
   204  
   205  func (StringLogicalType) toThrift() *format.LogicalType {
   206  	return &format.LogicalType{STRING: format.NewStringType()}
   207  }
   208  
   209  func (StringLogicalType) Equals(rhs LogicalType) bool {
   210  	_, ok := rhs.(StringLogicalType)
   211  	return ok
   212  }
   213  
   214  // MapLogicalType represents a mapped type
   215  type MapLogicalType struct{ baseLogicalType }
   216  
   217  func (MapLogicalType) SortOrder() SortOrder {
   218  	return SortUNKNOWN
   219  }
   220  
   221  func (MapLogicalType) MarshalJSON() ([]byte, error) {
   222  	return json.Marshal(map[string]string{"Type": MapLogicalType{}.String()})
   223  }
   224  
   225  func (MapLogicalType) String() string {
   226  	return "Map"
   227  }
   228  
   229  func (MapLogicalType) IsNested() bool {
   230  	return true
   231  }
   232  
   233  func (MapLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   234  	return ConvertedTypes.Map, DecimalMetadata{}
   235  }
   236  
   237  func (MapLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
   238  	return (t == ConvertedTypes.Map || t == ConvertedTypes.MapKeyValue) && !dec.IsSet
   239  }
   240  
   241  func (MapLogicalType) IsApplicable(parquet.Type, int32) bool {
   242  	return false
   243  }
   244  
   245  func (MapLogicalType) toThrift() *format.LogicalType {
   246  	return &format.LogicalType{MAP: format.NewMapType()}
   247  }
   248  
   249  func (MapLogicalType) Equals(rhs LogicalType) bool {
   250  	_, ok := rhs.(MapLogicalType)
   251  	return ok
   252  }
   253  
   254  func NewListLogicalType() LogicalType {
   255  	return ListLogicalType{}
   256  }
   257  
   258  // ListLogicalType is used for columns which are themselves nested lists
   259  type ListLogicalType struct{ baseLogicalType }
   260  
   261  func (ListLogicalType) SortOrder() SortOrder {
   262  	return SortUNKNOWN
   263  }
   264  
   265  func (ListLogicalType) MarshalJSON() ([]byte, error) {
   266  	return json.Marshal(map[string]string{"Type": ListLogicalType{}.String()})
   267  }
   268  
   269  func (ListLogicalType) String() string {
   270  	return "List"
   271  }
   272  
   273  func (ListLogicalType) IsNested() bool {
   274  	return true
   275  }
   276  
   277  func (ListLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   278  	return ConvertedTypes.List, DecimalMetadata{}
   279  }
   280  
   281  func (ListLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
   282  	return t == ConvertedTypes.List && !dec.IsSet
   283  }
   284  
   285  func (ListLogicalType) IsApplicable(parquet.Type, int32) bool {
   286  	return false
   287  }
   288  
   289  func (ListLogicalType) toThrift() *format.LogicalType {
   290  	return &format.LogicalType{LIST: format.NewListType()}
   291  }
   292  
   293  func (ListLogicalType) Equals(rhs LogicalType) bool {
   294  	_, ok := rhs.(ListLogicalType)
   295  	return ok
   296  }
   297  
   298  // EnumLogicalType is for representing an enum, which should be a byte array type
   299  type EnumLogicalType struct{ baseLogicalType }
   300  
   301  func (EnumLogicalType) SortOrder() SortOrder {
   302  	return SortUNSIGNED
   303  }
   304  
   305  func (EnumLogicalType) MarshalJSON() ([]byte, error) {
   306  	return json.Marshal(map[string]string{"Type": EnumLogicalType{}.String()})
   307  }
   308  
   309  func (EnumLogicalType) String() string {
   310  	return "Enum"
   311  }
   312  
   313  func (EnumLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   314  	return ConvertedTypes.Enum, DecimalMetadata{}
   315  }
   316  
   317  func (EnumLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
   318  	return t == ConvertedTypes.Enum && !dec.IsSet
   319  }
   320  
   321  func (EnumLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
   322  	return t == parquet.Types.ByteArray
   323  }
   324  
   325  func (EnumLogicalType) toThrift() *format.LogicalType {
   326  	return &format.LogicalType{ENUM: format.NewEnumType()}
   327  }
   328  
   329  func (EnumLogicalType) Equals(rhs LogicalType) bool {
   330  	_, ok := rhs.(EnumLogicalType)
   331  	return ok
   332  }
   333  
   334  // NewDecimalLogicalType returns a Decimal logical type with the given
   335  // precision and scale.
   336  //
   337  // Panics if precision < 1 or scale is not in the range (0, precision)
   338  func NewDecimalLogicalType(precision int32, scale int32) LogicalType {
   339  	if precision < 1 {
   340  		panic("parquet: precision must be greater than or equal to 1 for decimal logical type")
   341  	}
   342  	if scale < 0 || scale > precision {
   343  		panic("parquet: scale must be a non-negative integer that does not exceed precision for decimal logical type")
   344  	}
   345  	return &DecimalLogicalType{typ: &format.DecimalType{Precision: precision, Scale: scale}}
   346  }
   347  
   348  // DecimalLogicalType is used to represent a decimal value of a given
   349  // precision and scale
   350  type DecimalLogicalType struct {
   351  	baseLogicalType
   352  	typ *format.DecimalType
   353  }
   354  
   355  func (t DecimalLogicalType) Precision() int32 {
   356  	return t.typ.Precision
   357  }
   358  
   359  func (t DecimalLogicalType) Scale() int32 {
   360  	return t.typ.Scale
   361  }
   362  
   363  func (DecimalLogicalType) SortOrder() SortOrder {
   364  	return SortSIGNED
   365  }
   366  
   367  func (t DecimalLogicalType) MarshalJSON() ([]byte, error) {
   368  	return json.Marshal(map[string]interface{}{"Type": "Decimal", "precision": t.typ.Precision, "scale": t.typ.Scale})
   369  }
   370  
   371  func (t DecimalLogicalType) String() string {
   372  	return fmt.Sprintf("Decimal(precision=%d, scale=%d)", t.typ.Precision, t.typ.Scale)
   373  }
   374  
   375  func (t DecimalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   376  	return ConvertedTypes.Decimal, DecimalMetadata{IsSet: true, Scale: t.typ.GetScale(), Precision: t.typ.GetPrecision()}
   377  }
   378  
   379  func (t DecimalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   380  	return c == ConvertedTypes.Decimal &&
   381  		dec.IsSet && dec.Scale == t.typ.Scale && dec.Precision == t.typ.Precision
   382  }
   383  
   384  func (t DecimalLogicalType) IsApplicable(typ parquet.Type, tlen int32) bool {
   385  	switch typ {
   386  	case parquet.Types.Int32:
   387  		return 1 <= t.typ.Precision && t.typ.Precision <= 9
   388  	case parquet.Types.Int64:
   389  		if t.typ.Precision < 10 {
   390  			debug.Log("int64 used for decimal logical, precision is small enough to use int32")
   391  		}
   392  		return 1 <= t.typ.Precision && t.typ.Precision <= 18
   393  	case parquet.Types.FixedLenByteArray:
   394  		return t.typ.Precision <= int32(math.Floor(math.Log10(math.Pow(2.0, (8.0*float64(tlen)-1.0)))))
   395  	case parquet.Types.ByteArray:
   396  		return true
   397  	}
   398  	return false
   399  }
   400  
   401  func (t DecimalLogicalType) toThrift() *format.LogicalType {
   402  	return &format.LogicalType{DECIMAL: t.typ}
   403  }
   404  
   405  func (t DecimalLogicalType) Equals(rhs LogicalType) bool {
   406  	other, ok := rhs.(*DecimalLogicalType)
   407  	if !ok {
   408  		return false
   409  	}
   410  	return t.typ.Precision == other.typ.Precision && t.typ.Scale == other.typ.Scale
   411  }
   412  
   413  // DateLogicalType is an int32 representing the number of days since the Unix Epoch
   414  // 1 January 1970
   415  type DateLogicalType struct{ baseLogicalType }
   416  
   417  func (DateLogicalType) SortOrder() SortOrder {
   418  	return SortSIGNED
   419  }
   420  
   421  func (DateLogicalType) MarshalJSON() ([]byte, error) {
   422  	return json.Marshal(map[string]string{"Type": DateLogicalType{}.String()})
   423  }
   424  
   425  func (DateLogicalType) String() string {
   426  	return "Date"
   427  }
   428  
   429  func (DateLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   430  	return ConvertedTypes.Date, DecimalMetadata{}
   431  }
   432  
   433  func (DateLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool {
   434  	return t == ConvertedTypes.Date && !dec.IsSet
   435  }
   436  
   437  func (DateLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
   438  	return t == parquet.Types.Int32
   439  }
   440  
   441  func (DateLogicalType) toThrift() *format.LogicalType {
   442  	return &format.LogicalType{DATE: format.NewDateType()}
   443  }
   444  
   445  func (DateLogicalType) Equals(rhs LogicalType) bool {
   446  	_, ok := rhs.(DateLogicalType)
   447  	return ok
   448  }
   449  
   450  func timeUnitFromThrift(unit *format.TimeUnit) TimeUnitType {
   451  	switch {
   452  	case unit == nil:
   453  		return TimeUnitUnknown
   454  	case unit.IsSetMILLIS():
   455  		return TimeUnitMillis
   456  	case unit.IsSetMICROS():
   457  		return TimeUnitMicros
   458  	case unit.IsSetNANOS():
   459  		return TimeUnitNanos
   460  	default:
   461  		return TimeUnitUnknown
   462  	}
   463  }
   464  
   465  func timeUnitToString(unit *format.TimeUnit) string {
   466  	switch {
   467  	case unit == nil:
   468  		return "unknown"
   469  	case unit.IsSetMILLIS():
   470  		return "milliseconds"
   471  	case unit.IsSetMICROS():
   472  		return "microseconds"
   473  	case unit.IsSetNANOS():
   474  		return "nanoseconds"
   475  	default:
   476  		return "unknown"
   477  	}
   478  }
   479  
   480  func timeUnitFromString(v string) TimeUnitType {
   481  	switch v {
   482  	case "millis":
   483  		return TimeUnitMillis
   484  	case "micros":
   485  		return TimeUnitMicros
   486  	case "nanos":
   487  		return TimeUnitNanos
   488  	default:
   489  		return TimeUnitUnknown
   490  	}
   491  }
   492  
   493  func createTimeUnit(unit TimeUnitType) *format.TimeUnit {
   494  	tunit := format.NewTimeUnit()
   495  	switch unit {
   496  	case TimeUnitMicros:
   497  		tunit.MICROS = format.NewMicroSeconds()
   498  	case TimeUnitMillis:
   499  		tunit.MILLIS = format.NewMilliSeconds()
   500  	case TimeUnitNanos:
   501  		tunit.NANOS = format.NewNanoSeconds()
   502  	default:
   503  		panic("parquet: time unit must be one of MILLIS, MICROS, or NANOS for Time logical type")
   504  	}
   505  	return tunit
   506  }
   507  
   508  // NewTimeLogicalType returns a time type of the given unit.
   509  func NewTimeLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
   510  	return &TimeLogicalType{typ: &format.TimeType{
   511  		IsAdjustedToUTC: isAdjustedToUTC,
   512  		Unit:            createTimeUnit(unit),
   513  	}}
   514  }
   515  
   516  // TimeLogicalType is a time type without a date and must be an
   517  // int32 for milliseconds, or an int64 for micro or nano seconds.
   518  type TimeLogicalType struct {
   519  	baseLogicalType
   520  	typ *format.TimeType
   521  }
   522  
   523  func (t TimeLogicalType) IsAdjustedToUTC() bool {
   524  	return t.typ.IsAdjustedToUTC
   525  }
   526  
   527  func (t TimeLogicalType) TimeUnit() TimeUnitType {
   528  	return timeUnitFromThrift(t.typ.Unit)
   529  }
   530  
   531  func (TimeLogicalType) SortOrder() SortOrder {
   532  	return SortSIGNED
   533  }
   534  
   535  func (t TimeLogicalType) MarshalJSON() ([]byte, error) {
   536  	return json.Marshal(map[string]interface{}{
   537  		"Type": "Time", "isAdjustedToUTC": t.typ.IsAdjustedToUTC, "timeUnit": timeUnitToString(t.typ.GetUnit())})
   538  }
   539  
   540  func (t TimeLogicalType) String() string {
   541  	return fmt.Sprintf("Time(isAdjustedToUTC=%t, timeUnit=%s)", t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit()))
   542  }
   543  
   544  func (t TimeLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   545  	unit := timeUnitFromThrift(t.typ.Unit)
   546  	if t.typ.IsAdjustedToUTC {
   547  		switch unit {
   548  		case TimeUnitMillis:
   549  			return ConvertedTypes.TimeMillis, DecimalMetadata{}
   550  		case TimeUnitMicros:
   551  			return ConvertedTypes.TimeMicros, DecimalMetadata{}
   552  		}
   553  	}
   554  	return ConvertedTypes.None, DecimalMetadata{}
   555  }
   556  
   557  func (t TimeLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   558  	if dec.IsSet {
   559  		return false
   560  	}
   561  	unit := timeUnitFromThrift(t.typ.Unit)
   562  	if t.typ.IsAdjustedToUTC {
   563  		switch unit {
   564  		case TimeUnitMillis:
   565  			return c == ConvertedTypes.TimeMillis
   566  		case TimeUnitMicros:
   567  			return c == ConvertedTypes.TimeMicros
   568  		}
   569  	}
   570  
   571  	return c == ConvertedTypes.None || c == ConvertedTypes.NA
   572  }
   573  
   574  func (t TimeLogicalType) IsApplicable(typ parquet.Type, _ int32) bool {
   575  	return (typ == parquet.Types.Int32 && t.typ.GetUnit().IsSetMILLIS()) ||
   576  		(typ == parquet.Types.Int64 &&
   577  			(t.typ.GetUnit().IsSetMICROS() || t.typ.GetUnit().IsSetNANOS()))
   578  }
   579  
   580  func (t TimeLogicalType) toThrift() *format.LogicalType {
   581  	return &format.LogicalType{TIME: t.typ}
   582  }
   583  
   584  func (t TimeLogicalType) Equals(rhs LogicalType) bool {
   585  	other, ok := rhs.(*TimeLogicalType)
   586  	if !ok {
   587  		return false
   588  	}
   589  	return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC &&
   590  		timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit)
   591  }
   592  
   593  // NewTimestampLogicalType returns a logical timestamp type with "forceConverted"
   594  // set to false
   595  func NewTimestampLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
   596  	return &TimestampLogicalType{
   597  		typ: &format.TimestampType{
   598  			IsAdjustedToUTC: isAdjustedToUTC,
   599  			Unit:            createTimeUnit(unit),
   600  		},
   601  		forceConverted: false,
   602  		fromConverted:  false,
   603  	}
   604  }
   605  
   606  // NewTimestampLogicalTypeForce returns a timestamp logical type with
   607  // "forceConverted" set to true
   608  func NewTimestampLogicalTypeForce(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
   609  	return &TimestampLogicalType{
   610  		typ: &format.TimestampType{
   611  			IsAdjustedToUTC: isAdjustedToUTC,
   612  			Unit:            createTimeUnit(unit),
   613  		},
   614  		forceConverted: true,
   615  		fromConverted:  false,
   616  	}
   617  }
   618  
   619  // TimestampLogicalType represents an int64 number that can be decoded
   620  // into a year, month, day, hour, minute, second, and subsecond
   621  type TimestampLogicalType struct {
   622  	baseLogicalType
   623  	typ *format.TimestampType
   624  	// forceConverted denotes whether or not the resulting serialized
   625  	// type when writing to parquet will be written as the legacy
   626  	// ConvertedType TIMESTAMP_MICROS/TIMESTAMP_MILLIS (true)
   627  	// or if it will write the proper current Logical Types (false, default)
   628  	forceConverted bool
   629  	// fromConverted denotes if the timestamp type was created by
   630  	// translating a legacy converted type of TIMESTAMP_MILLIS or
   631  	// TIMESTAMP_MICROS rather than by using the current logical
   632  	// types. Default is false.
   633  	fromConverted bool
   634  }
   635  
   636  func (t TimestampLogicalType) IsFromConvertedType() bool {
   637  	return t.fromConverted
   638  }
   639  
   640  func (t TimestampLogicalType) IsAdjustedToUTC() bool {
   641  	return t.typ.IsAdjustedToUTC
   642  }
   643  
   644  func (t TimestampLogicalType) TimeUnit() TimeUnitType {
   645  	return timeUnitFromThrift(t.typ.Unit)
   646  }
   647  
   648  func (TimestampLogicalType) SortOrder() SortOrder {
   649  	return SortSIGNED
   650  }
   651  
   652  func (t TimestampLogicalType) MarshalJSON() ([]byte, error) {
   653  	return json.Marshal(map[string]interface{}{
   654  		"Type":                     "Timestamp",
   655  		"isAdjustedToUTC":          t.typ.IsAdjustedToUTC,
   656  		"timeUnit":                 timeUnitToString(t.typ.GetUnit()),
   657  		"is_from_converted_type":   t.fromConverted,
   658  		"force_set_converted_type": t.forceConverted,
   659  	})
   660  }
   661  
   662  func (t TimestampLogicalType) IsSerialized() bool {
   663  	return !t.fromConverted
   664  }
   665  
   666  func (t TimestampLogicalType) String() string {
   667  	return fmt.Sprintf("Timestamp(isAdjustedToUTC=%t, timeUnit=%s, is_from_converted_type=%t, force_set_converted_type=%t)",
   668  		t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit()), t.fromConverted, t.forceConverted)
   669  }
   670  
   671  func (t TimestampLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   672  	unit := timeUnitFromThrift(t.typ.Unit)
   673  	if t.typ.IsAdjustedToUTC || t.forceConverted {
   674  		switch unit {
   675  		case TimeUnitMillis:
   676  			return ConvertedTypes.TimestampMillis, DecimalMetadata{}
   677  		case TimeUnitMicros:
   678  			return ConvertedTypes.TimestampMicros, DecimalMetadata{}
   679  		}
   680  	}
   681  	return ConvertedTypes.None, DecimalMetadata{}
   682  }
   683  
   684  func (t TimestampLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   685  	if dec.IsSet {
   686  		return false
   687  	}
   688  
   689  	switch timeUnitFromThrift(t.typ.Unit) {
   690  	case TimeUnitMillis:
   691  		if t.typ.GetIsAdjustedToUTC() || t.forceConverted {
   692  			return c == ConvertedTypes.TimestampMillis
   693  		}
   694  	case TimeUnitMicros:
   695  		if t.typ.GetIsAdjustedToUTC() || t.forceConverted {
   696  			return c == ConvertedTypes.TimestampMicros
   697  		}
   698  	}
   699  
   700  	return c == ConvertedTypes.None || c == ConvertedTypes.NA
   701  }
   702  
   703  func (TimestampLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
   704  	return t == parquet.Types.Int64
   705  }
   706  
   707  func (t TimestampLogicalType) toThrift() *format.LogicalType {
   708  	return &format.LogicalType{TIMESTAMP: t.typ}
   709  }
   710  
   711  func (t TimestampLogicalType) Equals(rhs LogicalType) bool {
   712  	other, ok := rhs.(*TimestampLogicalType)
   713  	if !ok {
   714  		return false
   715  	}
   716  	return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC &&
   717  		timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit)
   718  }
   719  
   720  // NewIntLogicalType creates an integer logical type of the desired bitwidth
   721  // and whether it is signed or not.
   722  //
   723  // Bit width must be exactly 8, 16, 32 or 64 for an integer logical type
   724  func NewIntLogicalType(bitWidth int8, signed bool) LogicalType {
   725  	switch bitWidth {
   726  	case 8, 16, 32, 64:
   727  	default:
   728  		panic("parquet: bit width must be exactly 8, 16, 32, or 64 for Int logical type")
   729  	}
   730  	return &IntLogicalType{
   731  		typ: &format.IntType{
   732  			BitWidth: bitWidth,
   733  			IsSigned: signed,
   734  		},
   735  	}
   736  }
   737  
   738  // IntLogicalType represents an integer type of a specific bit width and
   739  // is either signed or unsigned.
   740  type IntLogicalType struct {
   741  	baseLogicalType
   742  	typ *format.IntType
   743  }
   744  
   745  func (t IntLogicalType) BitWidth() int8 {
   746  	return t.typ.BitWidth
   747  }
   748  
   749  func (t IntLogicalType) IsSigned() bool {
   750  	return t.typ.IsSigned
   751  }
   752  
   753  func (t IntLogicalType) SortOrder() SortOrder {
   754  	if t.typ.IsSigned {
   755  		return SortSIGNED
   756  	}
   757  	return SortUNSIGNED
   758  }
   759  
   760  func (t IntLogicalType) MarshalJSON() ([]byte, error) {
   761  	return json.Marshal(map[string]interface{}{
   762  		"Type": "Int", "bitWidth": t.typ.BitWidth, "isSigned": t.typ.IsSigned,
   763  	})
   764  }
   765  
   766  func (t IntLogicalType) String() string {
   767  	return fmt.Sprintf("Int(bitWidth=%d, isSigned=%t)", t.typ.GetBitWidth(), t.typ.GetIsSigned())
   768  }
   769  
   770  func (t IntLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   771  	var d DecimalMetadata
   772  	if t.typ.IsSigned {
   773  		switch t.typ.BitWidth {
   774  		case 8:
   775  			return ConvertedTypes.Int8, d
   776  		case 16:
   777  			return ConvertedTypes.Int16, d
   778  		case 32:
   779  			return ConvertedTypes.Int32, d
   780  		case 64:
   781  			return ConvertedTypes.Int64, d
   782  		}
   783  	} else {
   784  		switch t.typ.BitWidth {
   785  		case 8:
   786  			return ConvertedTypes.Uint8, d
   787  		case 16:
   788  			return ConvertedTypes.Uint16, d
   789  		case 32:
   790  			return ConvertedTypes.Uint32, d
   791  		case 64:
   792  			return ConvertedTypes.Uint64, d
   793  		}
   794  	}
   795  	return ConvertedTypes.None, d
   796  }
   797  
   798  func (t IntLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   799  	if dec.IsSet {
   800  		return false
   801  	}
   802  	v, _ := t.ToConvertedType()
   803  	return c == v
   804  }
   805  
   806  func (t IntLogicalType) IsApplicable(typ parquet.Type, _ int32) bool {
   807  	return (typ == parquet.Types.Int32 && t.typ.GetBitWidth() <= 32) ||
   808  		(typ == parquet.Types.Int64 && t.typ.GetBitWidth() == 64)
   809  }
   810  
   811  func (t IntLogicalType) toThrift() *format.LogicalType {
   812  	return &format.LogicalType{INTEGER: t.typ}
   813  }
   814  
   815  func (t IntLogicalType) Equals(rhs LogicalType) bool {
   816  	other, ok := rhs.(*IntLogicalType)
   817  	if !ok {
   818  		return false
   819  	}
   820  
   821  	return t.typ.GetIsSigned() == other.typ.GetIsSigned() &&
   822  		t.typ.GetBitWidth() == other.typ.GetBitWidth()
   823  }
   824  
   825  // UnknownLogicalType is a type that is essentially a placeholder for when
   826  // we don't know the type.
   827  type UnknownLogicalType struct{ baseLogicalType }
   828  
   829  func (UnknownLogicalType) SortOrder() SortOrder {
   830  	return SortUNKNOWN
   831  }
   832  
   833  func (UnknownLogicalType) MarshalJSON() ([]byte, error) {
   834  	return json.Marshal(map[string]string{"Type": UnknownLogicalType{}.String()})
   835  }
   836  
   837  func (UnknownLogicalType) IsValid() bool { return false }
   838  
   839  func (UnknownLogicalType) IsSerialized() bool { return false }
   840  
   841  func (UnknownLogicalType) String() string {
   842  	return "Unknown"
   843  }
   844  
   845  func (UnknownLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   846  	return ConvertedTypes.NA, DecimalMetadata{}
   847  }
   848  
   849  func (UnknownLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   850  	return c == ConvertedTypes.NA && !dec.IsSet
   851  }
   852  
   853  func (UnknownLogicalType) IsApplicable(parquet.Type, int32) bool { return true }
   854  
   855  func (UnknownLogicalType) toThrift() *format.LogicalType {
   856  	return &format.LogicalType{UNKNOWN: format.NewNullType()}
   857  }
   858  
   859  func (UnknownLogicalType) Equals(rhs LogicalType) bool {
   860  	_, ok := rhs.(UnknownLogicalType)
   861  	return ok
   862  }
   863  
   864  // JSONLogicalType represents a byte array column which is to be interpreted
   865  // as a JSON string.
   866  type JSONLogicalType struct{ baseLogicalType }
   867  
   868  func (JSONLogicalType) SortOrder() SortOrder {
   869  	return SortUNSIGNED
   870  }
   871  
   872  func (JSONLogicalType) MarshalJSON() ([]byte, error) {
   873  	return json.Marshal(map[string]string{"Type": JSONLogicalType{}.String()})
   874  }
   875  
   876  func (JSONLogicalType) String() string {
   877  	return "JSON"
   878  }
   879  
   880  func (JSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   881  	return ConvertedTypes.JSON, DecimalMetadata{}
   882  }
   883  
   884  func (JSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   885  	return c == ConvertedTypes.JSON && !dec.IsSet
   886  }
   887  
   888  func (JSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
   889  	return t == parquet.Types.ByteArray
   890  }
   891  
   892  func (JSONLogicalType) toThrift() *format.LogicalType {
   893  	return &format.LogicalType{JSON: format.NewJsonType()}
   894  }
   895  
   896  func (JSONLogicalType) Equals(rhs LogicalType) bool {
   897  	_, ok := rhs.(JSONLogicalType)
   898  	return ok
   899  }
   900  
   901  // BSONLogicalType represents a binary JSON string in the byte array
   902  type BSONLogicalType struct{ baseLogicalType }
   903  
   904  func (BSONLogicalType) SortOrder() SortOrder {
   905  	return SortUNSIGNED
   906  }
   907  
   908  func (BSONLogicalType) MarshalJSON() ([]byte, error) {
   909  	return json.Marshal(map[string]string{"Type": BSONLogicalType{}.String()})
   910  }
   911  
   912  func (BSONLogicalType) String() string {
   913  	return "BSON"
   914  }
   915  
   916  func (BSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   917  	return ConvertedTypes.BSON, DecimalMetadata{}
   918  }
   919  
   920  func (BSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   921  	return c == ConvertedTypes.BSON && !dec.IsSet
   922  }
   923  
   924  func (BSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool {
   925  	return t == parquet.Types.ByteArray
   926  }
   927  
   928  func (BSONLogicalType) toThrift() *format.LogicalType {
   929  	return &format.LogicalType{BSON: format.NewBsonType()}
   930  }
   931  
   932  func (BSONLogicalType) Equals(rhs LogicalType) bool {
   933  	_, ok := rhs.(BSONLogicalType)
   934  	return ok
   935  }
   936  
   937  // UUIDLogicalType can only be used with a FixedLength byte array column
   938  // that is exactly 16 bytes long
   939  type UUIDLogicalType struct{ baseLogicalType }
   940  
   941  func (UUIDLogicalType) SortOrder() SortOrder {
   942  	return SortUNSIGNED
   943  }
   944  
   945  func (UUIDLogicalType) MarshalJSON() ([]byte, error) {
   946  	return json.Marshal(map[string]string{"Type": UUIDLogicalType{}.String()})
   947  }
   948  
   949  func (UUIDLogicalType) String() string {
   950  	return "UUID"
   951  }
   952  
   953  func (UUIDLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   954  	return ConvertedTypes.None, DecimalMetadata{}
   955  }
   956  
   957  func (UUIDLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
   958  	if dec.IsSet {
   959  		return false
   960  	}
   961  	switch c {
   962  	case ConvertedTypes.None, ConvertedTypes.NA:
   963  		return true
   964  	}
   965  	return false
   966  }
   967  
   968  func (UUIDLogicalType) IsApplicable(t parquet.Type, tlen int32) bool {
   969  	return t == parquet.Types.FixedLenByteArray && tlen == 16
   970  }
   971  
   972  func (UUIDLogicalType) toThrift() *format.LogicalType {
   973  	return &format.LogicalType{UUID: format.NewUUIDType()}
   974  }
   975  
   976  func (UUIDLogicalType) Equals(rhs LogicalType) bool {
   977  	_, ok := rhs.(UUIDLogicalType)
   978  	return ok
   979  }
   980  
   981  // IntervalLogicalType is not yet in the thrift spec, but represents
   982  // an interval time and needs to be a fixed length byte array of 12 bytes
   983  type IntervalLogicalType struct{ baseLogicalType }
   984  
   985  func (IntervalLogicalType) SortOrder() SortOrder {
   986  	return SortUNKNOWN
   987  }
   988  
   989  func (IntervalLogicalType) MarshalJSON() ([]byte, error) {
   990  	return json.Marshal(map[string]string{"Type": IntervalLogicalType{}.String()})
   991  }
   992  
   993  func (IntervalLogicalType) String() string {
   994  	return "Interval"
   995  }
   996  
   997  func (IntervalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
   998  	return ConvertedTypes.Interval, DecimalMetadata{}
   999  }
  1000  
  1001  func (IntervalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
  1002  	return c == ConvertedTypes.Interval && !dec.IsSet
  1003  }
  1004  
  1005  func (IntervalLogicalType) IsApplicable(t parquet.Type, tlen int32) bool {
  1006  	return t == parquet.Types.FixedLenByteArray && tlen == 12
  1007  }
  1008  
  1009  func (IntervalLogicalType) toThrift() *format.LogicalType {
  1010  	panic("no parquet IntervalLogicalType yet implemented")
  1011  }
  1012  
  1013  func (IntervalLogicalType) Equals(rhs LogicalType) bool {
  1014  	_, ok := rhs.(IntervalLogicalType)
  1015  	return ok
  1016  }
  1017  
  1018  type NullLogicalType struct{ baseLogicalType }
  1019  
  1020  func (NullLogicalType) SortOrder() SortOrder {
  1021  	return SortUNKNOWN
  1022  }
  1023  
  1024  func (NullLogicalType) MarshalJSON() ([]byte, error) {
  1025  	return json.Marshal(map[string]string{"Type": NullLogicalType{}.String()})
  1026  }
  1027  
  1028  func (NullLogicalType) String() string {
  1029  	return "Null"
  1030  }
  1031  
  1032  func (NullLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
  1033  	return ConvertedTypes.None, DecimalMetadata{}
  1034  }
  1035  
  1036  func (NullLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
  1037  	if dec.IsSet {
  1038  		return false
  1039  	}
  1040  	switch c {
  1041  	case ConvertedTypes.None, ConvertedTypes.NA:
  1042  		return true
  1043  	}
  1044  	return false
  1045  }
  1046  
  1047  func (NullLogicalType) IsApplicable(parquet.Type, int32) bool {
  1048  	return true
  1049  }
  1050  
  1051  func (NullLogicalType) toThrift() *format.LogicalType {
  1052  	return &format.LogicalType{UNKNOWN: format.NewNullType()}
  1053  }
  1054  
  1055  func (NullLogicalType) Equals(rhs LogicalType) bool {
  1056  	_, ok := rhs.(NullLogicalType)
  1057  	return ok
  1058  }
  1059  
  1060  type NoLogicalType struct{ baseLogicalType }
  1061  
  1062  func (NoLogicalType) SortOrder() SortOrder {
  1063  	return SortUNKNOWN
  1064  }
  1065  
  1066  func (NoLogicalType) MarshalJSON() ([]byte, error) {
  1067  	return json.Marshal(map[string]string{"Type": NoLogicalType{}.String()})
  1068  }
  1069  
  1070  func (NoLogicalType) IsSerialized() bool { return false }
  1071  
  1072  func (NoLogicalType) String() string {
  1073  	return "None"
  1074  }
  1075  
  1076  func (NoLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) {
  1077  	return ConvertedTypes.None, DecimalMetadata{}
  1078  }
  1079  
  1080  func (NoLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool {
  1081  	return c == ConvertedTypes.None && !dec.IsSet
  1082  }
  1083  
  1084  func (NoLogicalType) IsApplicable(parquet.Type, int32) bool {
  1085  	return true
  1086  }
  1087  
  1088  func (NoLogicalType) toThrift() *format.LogicalType {
  1089  	panic("cannot convert NoLogicalType to thrift")
  1090  }
  1091  
  1092  func (NoLogicalType) Equals(rhs LogicalType) bool {
  1093  	_, ok := rhs.(NoLogicalType)
  1094  	return ok
  1095  }
  1096  
  1097  func (NoLogicalType) IsNone() bool { return true }