github.com/apache/arrow/go/v14@v14.0.2/parquet/schema/logical_types.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package schema 18 19 import ( 20 "fmt" 21 "math" 22 23 "github.com/apache/arrow/go/v14/internal/json" 24 "github.com/apache/arrow/go/v14/parquet" 25 "github.com/apache/arrow/go/v14/parquet/internal/debug" 26 format "github.com/apache/arrow/go/v14/parquet/internal/gen-go/parquet" 27 ) 28 29 // DecimalMetadata is a struct for managing scale and precision information between 30 // converted and logical types. 31 type DecimalMetadata struct { 32 IsSet bool 33 Scale int32 34 Precision int32 35 } 36 37 func getLogicalType(l *format.LogicalType) LogicalType { 38 switch { 39 case l.IsSetSTRING(): 40 return StringLogicalType{} 41 case l.IsSetMAP(): 42 return MapLogicalType{} 43 case l.IsSetLIST(): 44 return ListLogicalType{} 45 case l.IsSetENUM(): 46 return EnumLogicalType{} 47 case l.IsSetDECIMAL(): 48 return &DecimalLogicalType{typ: l.DECIMAL} 49 case l.IsSetDATE(): 50 return DateLogicalType{} 51 case l.IsSetTIME(): 52 if timeUnitFromThrift(l.TIME.Unit) == TimeUnitUnknown { 53 panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Time logical type") 54 } 55 return &TimeLogicalType{typ: l.TIME} 56 case l.IsSetTIMESTAMP(): 57 if timeUnitFromThrift(l.TIMESTAMP.Unit) == TimeUnitUnknown { 58 panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Timestamp logical type") 59 } 60 return &TimestampLogicalType{typ: l.TIMESTAMP} 61 case l.IsSetINTEGER(): 62 return &IntLogicalType{typ: l.INTEGER} 63 case l.IsSetUNKNOWN(): 64 return NullLogicalType{} 65 case l.IsSetJSON(): 66 return JSONLogicalType{} 67 case l.IsSetBSON(): 68 return BSONLogicalType{} 69 case l.IsSetUUID(): 70 return UUIDLogicalType{} 71 case l == nil: 72 return NoLogicalType{} 73 default: 74 panic("invalid logical type") 75 } 76 } 77 78 // TimeUnitType is an enum for denoting whether a time based logical type 79 // is using milliseconds, microseconds or nanoseconds. 80 type TimeUnitType int 81 82 // Constants for the TimeUnitType 83 const ( 84 TimeUnitMillis TimeUnitType = iota 85 TimeUnitMicros 86 TimeUnitNanos 87 TimeUnitUnknown 88 ) 89 90 // LogicalType is the descriptor that defines the usage of a physical primitive 91 // type in the schema, such as an Interval, Date, etc. 92 type LogicalType interface { 93 // Returns true if a nested type like List or Map 94 IsNested() bool 95 // Returns true if this type can be serialized, ie: not Unknown/NoType/Interval 96 IsSerialized() bool 97 // Returns true if not NoLogicalType 98 IsValid() bool 99 // Returns true if it is NoType 100 IsNone() bool 101 // returns a string representation of the Logical Type 102 String() string 103 toThrift() *format.LogicalType 104 // Return the equivalent ConvertedType for legacy Parquet systems 105 ToConvertedType() (ConvertedType, DecimalMetadata) 106 // Returns true if the specified ConvertedType is compatible with this 107 // logical type 108 IsCompatible(ConvertedType, DecimalMetadata) bool 109 // Returns true if this logical type can be used with the provided physical type 110 IsApplicable(t parquet.Type, tlen int32) bool 111 // Returns true if the logical types are the same 112 Equals(LogicalType) bool 113 // Returns the default stat sort order for this logical type 114 SortOrder() SortOrder 115 } 116 117 // TemporalLogicalType is a smaller interface for Time based logical types 118 // like Time / Timestamp 119 type TemporalLogicalType interface { 120 LogicalType 121 IsAdjustedToUTC() bool 122 TimeUnit() TimeUnitType 123 } 124 125 // SortOrder mirrors the parquet.thrift sort order type 126 type SortOrder int8 127 128 // Constants for the Stat sort order definitions 129 const ( 130 SortSIGNED SortOrder = iota 131 SortUNSIGNED 132 SortUNKNOWN 133 ) 134 135 // DefaultSortOrder returns the default stat sort order for the given physical type 136 func DefaultSortOrder(primitive format.Type) SortOrder { 137 switch primitive { 138 case format.Type_BOOLEAN, format.Type_INT32, format.Type_INT64, format.Type_FLOAT, format.Type_DOUBLE: 139 return SortSIGNED 140 case format.Type_BYTE_ARRAY, format.Type_FIXED_LEN_BYTE_ARRAY: 141 return SortUNSIGNED 142 case format.Type_INT96: 143 fallthrough 144 default: 145 return SortUNKNOWN 146 } 147 } 148 149 // GetLogicalSortOrder returns the default sort order for this logical type 150 // or falls back to the default sort order for the physical type if not valid 151 func GetLogicalSortOrder(logical LogicalType, primitive format.Type) SortOrder { 152 switch { 153 case logical == nil || !logical.IsValid(): 154 return SortUNKNOWN 155 case logical.Equals(NoLogicalType{}): 156 return DefaultSortOrder(primitive) 157 default: 158 return logical.SortOrder() 159 } 160 } 161 162 type baseLogicalType struct{} 163 164 func (baseLogicalType) IsSerialized() bool { 165 return true 166 } 167 168 func (baseLogicalType) IsValid() bool { 169 return true 170 } 171 172 func (baseLogicalType) IsNested() bool { 173 return false 174 } 175 176 func (baseLogicalType) IsNone() bool { return false } 177 178 // StringLogicalType is a UTF8 string, only usable with ByteArray and FixedLenByteArray 179 type StringLogicalType struct{ baseLogicalType } 180 181 func (StringLogicalType) SortOrder() SortOrder { 182 return SortUNSIGNED 183 } 184 185 func (StringLogicalType) MarshalJSON() ([]byte, error) { 186 return json.Marshal(map[string]string{"Type": StringLogicalType{}.String()}) 187 } 188 189 func (StringLogicalType) String() string { 190 return "String" 191 } 192 193 func (StringLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 194 return ConvertedTypes.UTF8, DecimalMetadata{} 195 } 196 197 func (StringLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool { 198 return t == ConvertedTypes.UTF8 && !dec.IsSet 199 } 200 201 func (StringLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 202 return t == parquet.Types.ByteArray 203 } 204 205 func (StringLogicalType) toThrift() *format.LogicalType { 206 return &format.LogicalType{STRING: format.NewStringType()} 207 } 208 209 func (StringLogicalType) Equals(rhs LogicalType) bool { 210 _, ok := rhs.(StringLogicalType) 211 return ok 212 } 213 214 // MapLogicalType represents a mapped type 215 type MapLogicalType struct{ baseLogicalType } 216 217 func (MapLogicalType) SortOrder() SortOrder { 218 return SortUNKNOWN 219 } 220 221 func (MapLogicalType) MarshalJSON() ([]byte, error) { 222 return json.Marshal(map[string]string{"Type": MapLogicalType{}.String()}) 223 } 224 225 func (MapLogicalType) String() string { 226 return "Map" 227 } 228 229 func (MapLogicalType) IsNested() bool { 230 return true 231 } 232 233 func (MapLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 234 return ConvertedTypes.Map, DecimalMetadata{} 235 } 236 237 func (MapLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool { 238 return (t == ConvertedTypes.Map || t == ConvertedTypes.MapKeyValue) && !dec.IsSet 239 } 240 241 func (MapLogicalType) IsApplicable(parquet.Type, int32) bool { 242 return false 243 } 244 245 func (MapLogicalType) toThrift() *format.LogicalType { 246 return &format.LogicalType{MAP: format.NewMapType()} 247 } 248 249 func (MapLogicalType) Equals(rhs LogicalType) bool { 250 _, ok := rhs.(MapLogicalType) 251 return ok 252 } 253 254 func NewListLogicalType() LogicalType { 255 return ListLogicalType{} 256 } 257 258 // ListLogicalType is used for columns which are themselves nested lists 259 type ListLogicalType struct{ baseLogicalType } 260 261 func (ListLogicalType) SortOrder() SortOrder { 262 return SortUNKNOWN 263 } 264 265 func (ListLogicalType) MarshalJSON() ([]byte, error) { 266 return json.Marshal(map[string]string{"Type": ListLogicalType{}.String()}) 267 } 268 269 func (ListLogicalType) String() string { 270 return "List" 271 } 272 273 func (ListLogicalType) IsNested() bool { 274 return true 275 } 276 277 func (ListLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 278 return ConvertedTypes.List, DecimalMetadata{} 279 } 280 281 func (ListLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool { 282 return t == ConvertedTypes.List && !dec.IsSet 283 } 284 285 func (ListLogicalType) IsApplicable(parquet.Type, int32) bool { 286 return false 287 } 288 289 func (ListLogicalType) toThrift() *format.LogicalType { 290 return &format.LogicalType{LIST: format.NewListType()} 291 } 292 293 func (ListLogicalType) Equals(rhs LogicalType) bool { 294 _, ok := rhs.(ListLogicalType) 295 return ok 296 } 297 298 // EnumLogicalType is for representing an enum, which should be a byte array type 299 type EnumLogicalType struct{ baseLogicalType } 300 301 func (EnumLogicalType) SortOrder() SortOrder { 302 return SortUNSIGNED 303 } 304 305 func (EnumLogicalType) MarshalJSON() ([]byte, error) { 306 return json.Marshal(map[string]string{"Type": EnumLogicalType{}.String()}) 307 } 308 309 func (EnumLogicalType) String() string { 310 return "Enum" 311 } 312 313 func (EnumLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 314 return ConvertedTypes.Enum, DecimalMetadata{} 315 } 316 317 func (EnumLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool { 318 return t == ConvertedTypes.Enum && !dec.IsSet 319 } 320 321 func (EnumLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 322 return t == parquet.Types.ByteArray 323 } 324 325 func (EnumLogicalType) toThrift() *format.LogicalType { 326 return &format.LogicalType{ENUM: format.NewEnumType()} 327 } 328 329 func (EnumLogicalType) Equals(rhs LogicalType) bool { 330 _, ok := rhs.(EnumLogicalType) 331 return ok 332 } 333 334 // NewDecimalLogicalType returns a Decimal logical type with the given 335 // precision and scale. 336 // 337 // Panics if precision < 1 or scale is not in the range (0, precision) 338 func NewDecimalLogicalType(precision int32, scale int32) LogicalType { 339 if precision < 1 { 340 panic("parquet: precision must be greater than or equal to 1 for decimal logical type") 341 } 342 if scale < 0 || scale > precision { 343 panic("parquet: scale must be a non-negative integer that does not exceed precision for decimal logical type") 344 } 345 return &DecimalLogicalType{typ: &format.DecimalType{Precision: precision, Scale: scale}} 346 } 347 348 // DecimalLogicalType is used to represent a decimal value of a given 349 // precision and scale 350 type DecimalLogicalType struct { 351 baseLogicalType 352 typ *format.DecimalType 353 } 354 355 func (t DecimalLogicalType) Precision() int32 { 356 return t.typ.Precision 357 } 358 359 func (t DecimalLogicalType) Scale() int32 { 360 return t.typ.Scale 361 } 362 363 func (DecimalLogicalType) SortOrder() SortOrder { 364 return SortSIGNED 365 } 366 367 func (t DecimalLogicalType) MarshalJSON() ([]byte, error) { 368 return json.Marshal(map[string]interface{}{"Type": "Decimal", "precision": t.typ.Precision, "scale": t.typ.Scale}) 369 } 370 371 func (t DecimalLogicalType) String() string { 372 return fmt.Sprintf("Decimal(precision=%d, scale=%d)", t.typ.Precision, t.typ.Scale) 373 } 374 375 func (t DecimalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 376 return ConvertedTypes.Decimal, DecimalMetadata{IsSet: true, Scale: t.typ.GetScale(), Precision: t.typ.GetPrecision()} 377 } 378 379 func (t DecimalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 380 return c == ConvertedTypes.Decimal && 381 dec.IsSet && dec.Scale == t.typ.Scale && dec.Precision == t.typ.Precision 382 } 383 384 func (t DecimalLogicalType) IsApplicable(typ parquet.Type, tlen int32) bool { 385 switch typ { 386 case parquet.Types.Int32: 387 return 1 <= t.typ.Precision && t.typ.Precision <= 9 388 case parquet.Types.Int64: 389 if t.typ.Precision < 10 { 390 debug.Log("int64 used for decimal logical, precision is small enough to use int32") 391 } 392 return 1 <= t.typ.Precision && t.typ.Precision <= 18 393 case parquet.Types.FixedLenByteArray: 394 return t.typ.Precision <= int32(math.Floor(math.Log10(math.Pow(2.0, (8.0*float64(tlen)-1.0))))) 395 case parquet.Types.ByteArray: 396 return true 397 } 398 return false 399 } 400 401 func (t DecimalLogicalType) toThrift() *format.LogicalType { 402 return &format.LogicalType{DECIMAL: t.typ} 403 } 404 405 func (t DecimalLogicalType) Equals(rhs LogicalType) bool { 406 other, ok := rhs.(*DecimalLogicalType) 407 if !ok { 408 return false 409 } 410 return t.typ.Precision == other.typ.Precision && t.typ.Scale == other.typ.Scale 411 } 412 413 // DateLogicalType is an int32 representing the number of days since the Unix Epoch 414 // 1 January 1970 415 type DateLogicalType struct{ baseLogicalType } 416 417 func (DateLogicalType) SortOrder() SortOrder { 418 return SortSIGNED 419 } 420 421 func (DateLogicalType) MarshalJSON() ([]byte, error) { 422 return json.Marshal(map[string]string{"Type": DateLogicalType{}.String()}) 423 } 424 425 func (DateLogicalType) String() string { 426 return "Date" 427 } 428 429 func (DateLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 430 return ConvertedTypes.Date, DecimalMetadata{} 431 } 432 433 func (DateLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool { 434 return t == ConvertedTypes.Date && !dec.IsSet 435 } 436 437 func (DateLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 438 return t == parquet.Types.Int32 439 } 440 441 func (DateLogicalType) toThrift() *format.LogicalType { 442 return &format.LogicalType{DATE: format.NewDateType()} 443 } 444 445 func (DateLogicalType) Equals(rhs LogicalType) bool { 446 _, ok := rhs.(DateLogicalType) 447 return ok 448 } 449 450 func timeUnitFromThrift(unit *format.TimeUnit) TimeUnitType { 451 switch { 452 case unit == nil: 453 return TimeUnitUnknown 454 case unit.IsSetMILLIS(): 455 return TimeUnitMillis 456 case unit.IsSetMICROS(): 457 return TimeUnitMicros 458 case unit.IsSetNANOS(): 459 return TimeUnitNanos 460 default: 461 return TimeUnitUnknown 462 } 463 } 464 465 func timeUnitToString(unit *format.TimeUnit) string { 466 switch { 467 case unit == nil: 468 return "unknown" 469 case unit.IsSetMILLIS(): 470 return "milliseconds" 471 case unit.IsSetMICROS(): 472 return "microseconds" 473 case unit.IsSetNANOS(): 474 return "nanoseconds" 475 default: 476 return "unknown" 477 } 478 } 479 480 func timeUnitFromString(v string) TimeUnitType { 481 switch v { 482 case "millis": 483 return TimeUnitMillis 484 case "micros": 485 return TimeUnitMicros 486 case "nanos": 487 return TimeUnitNanos 488 default: 489 return TimeUnitUnknown 490 } 491 } 492 493 func createTimeUnit(unit TimeUnitType) *format.TimeUnit { 494 tunit := format.NewTimeUnit() 495 switch unit { 496 case TimeUnitMicros: 497 tunit.MICROS = format.NewMicroSeconds() 498 case TimeUnitMillis: 499 tunit.MILLIS = format.NewMilliSeconds() 500 case TimeUnitNanos: 501 tunit.NANOS = format.NewNanoSeconds() 502 default: 503 panic("parquet: time unit must be one of MILLIS, MICROS, or NANOS for Time logical type") 504 } 505 return tunit 506 } 507 508 // NewTimeLogicalType returns a time type of the given unit. 509 func NewTimeLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType { 510 return &TimeLogicalType{typ: &format.TimeType{ 511 IsAdjustedToUTC: isAdjustedToUTC, 512 Unit: createTimeUnit(unit), 513 }} 514 } 515 516 // TimeLogicalType is a time type without a date and must be an 517 // int32 for milliseconds, or an int64 for micro or nano seconds. 518 type TimeLogicalType struct { 519 baseLogicalType 520 typ *format.TimeType 521 } 522 523 func (t TimeLogicalType) IsAdjustedToUTC() bool { 524 return t.typ.IsAdjustedToUTC 525 } 526 527 func (t TimeLogicalType) TimeUnit() TimeUnitType { 528 return timeUnitFromThrift(t.typ.Unit) 529 } 530 531 func (TimeLogicalType) SortOrder() SortOrder { 532 return SortSIGNED 533 } 534 535 func (t TimeLogicalType) MarshalJSON() ([]byte, error) { 536 return json.Marshal(map[string]interface{}{ 537 "Type": "Time", "isAdjustedToUTC": t.typ.IsAdjustedToUTC, "timeUnit": timeUnitToString(t.typ.GetUnit())}) 538 } 539 540 func (t TimeLogicalType) String() string { 541 return fmt.Sprintf("Time(isAdjustedToUTC=%t, timeUnit=%s)", t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit())) 542 } 543 544 func (t TimeLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 545 unit := timeUnitFromThrift(t.typ.Unit) 546 if t.typ.IsAdjustedToUTC { 547 switch unit { 548 case TimeUnitMillis: 549 return ConvertedTypes.TimeMillis, DecimalMetadata{} 550 case TimeUnitMicros: 551 return ConvertedTypes.TimeMicros, DecimalMetadata{} 552 } 553 } 554 return ConvertedTypes.None, DecimalMetadata{} 555 } 556 557 func (t TimeLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 558 if dec.IsSet { 559 return false 560 } 561 unit := timeUnitFromThrift(t.typ.Unit) 562 if t.typ.IsAdjustedToUTC { 563 switch unit { 564 case TimeUnitMillis: 565 return c == ConvertedTypes.TimeMillis 566 case TimeUnitMicros: 567 return c == ConvertedTypes.TimeMicros 568 } 569 } 570 571 return c == ConvertedTypes.None || c == ConvertedTypes.NA 572 } 573 574 func (t TimeLogicalType) IsApplicable(typ parquet.Type, _ int32) bool { 575 return (typ == parquet.Types.Int32 && t.typ.GetUnit().IsSetMILLIS()) || 576 (typ == parquet.Types.Int64 && 577 (t.typ.GetUnit().IsSetMICROS() || t.typ.GetUnit().IsSetNANOS())) 578 } 579 580 func (t TimeLogicalType) toThrift() *format.LogicalType { 581 return &format.LogicalType{TIME: t.typ} 582 } 583 584 func (t TimeLogicalType) Equals(rhs LogicalType) bool { 585 other, ok := rhs.(*TimeLogicalType) 586 if !ok { 587 return false 588 } 589 return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC && 590 timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit) 591 } 592 593 // NewTimestampLogicalType returns a logical timestamp type with "forceConverted" 594 // set to false 595 func NewTimestampLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType { 596 return &TimestampLogicalType{ 597 typ: &format.TimestampType{ 598 IsAdjustedToUTC: isAdjustedToUTC, 599 Unit: createTimeUnit(unit), 600 }, 601 forceConverted: false, 602 fromConverted: false, 603 } 604 } 605 606 // NewTimestampLogicalTypeForce returns a timestamp logical type with 607 // "forceConverted" set to true 608 func NewTimestampLogicalTypeForce(isAdjustedToUTC bool, unit TimeUnitType) LogicalType { 609 return &TimestampLogicalType{ 610 typ: &format.TimestampType{ 611 IsAdjustedToUTC: isAdjustedToUTC, 612 Unit: createTimeUnit(unit), 613 }, 614 forceConverted: true, 615 fromConverted: false, 616 } 617 } 618 619 // TimestampOpt options used with New Timestamp Logical Type 620 type TimestampOpt func(*TimestampLogicalType) 621 622 // WithTSIsAdjustedToUTC sets the IsAdjustedToUTC field of the timestamp type. 623 func WithTSIsAdjustedToUTC() TimestampOpt { 624 return func(t *TimestampLogicalType) { 625 t.typ.IsAdjustedToUTC = true 626 } 627 } 628 629 // WithTSTimeUnitType sets the time unit for the timestamp type 630 func WithTSTimeUnitType(unit TimeUnitType) TimestampOpt { 631 return func(t *TimestampLogicalType) { 632 t.typ.Unit = createTimeUnit(unit) 633 } 634 } 635 636 // WithTSForceConverted enable force converted mode 637 func WithTSForceConverted() TimestampOpt { 638 return func(t *TimestampLogicalType) { 639 t.forceConverted = true 640 } 641 } 642 643 // WithTSFromConverted enable the timestamp logical type to be 644 // constructed from a converted type. 645 func WithTSFromConverted() TimestampOpt { 646 return func(t *TimestampLogicalType) { 647 t.fromConverted = true 648 } 649 } 650 651 // NewTimestampLogicalTypeWithOpts creates a new TimestampLogicalType with the provided options. 652 // 653 // TimestampType Unit defaults to milliseconds (TimeUnitMillis) 654 func NewTimestampLogicalTypeWithOpts(opts ...TimestampOpt) LogicalType { 655 ts := &TimestampLogicalType{ 656 typ: &format.TimestampType{ 657 Unit: createTimeUnit(TimeUnitMillis), // default to milliseconds 658 }, 659 } 660 661 for _, o := range opts { 662 o(ts) 663 } 664 665 return ts 666 } 667 668 // TimestampLogicalType represents an int64 number that can be decoded 669 // into a year, month, day, hour, minute, second, and subsecond 670 type TimestampLogicalType struct { 671 baseLogicalType 672 typ *format.TimestampType 673 // forceConverted denotes whether or not the resulting serialized 674 // type when writing to parquet will be written as the legacy 675 // ConvertedType TIMESTAMP_MICROS/TIMESTAMP_MILLIS (true) 676 // or if it will write the proper current Logical Types (false, default) 677 forceConverted bool 678 // fromConverted denotes if the timestamp type was created by 679 // translating a legacy converted type of TIMESTAMP_MILLIS or 680 // TIMESTAMP_MICROS rather than by using the current logical 681 // types. Default is false. 682 fromConverted bool 683 } 684 685 func (t TimestampLogicalType) IsFromConvertedType() bool { 686 return t.fromConverted 687 } 688 689 func (t TimestampLogicalType) IsAdjustedToUTC() bool { 690 return t.typ.IsAdjustedToUTC 691 } 692 693 func (t TimestampLogicalType) TimeUnit() TimeUnitType { 694 return timeUnitFromThrift(t.typ.Unit) 695 } 696 697 func (TimestampLogicalType) SortOrder() SortOrder { 698 return SortSIGNED 699 } 700 701 func (t TimestampLogicalType) MarshalJSON() ([]byte, error) { 702 return json.Marshal(map[string]interface{}{ 703 "Type": "Timestamp", 704 "isAdjustedToUTC": t.typ.IsAdjustedToUTC, 705 "timeUnit": timeUnitToString(t.typ.GetUnit()), 706 "is_from_converted_type": t.fromConverted, 707 "force_set_converted_type": t.forceConverted, 708 }) 709 } 710 711 func (t TimestampLogicalType) IsSerialized() bool { 712 return !t.fromConverted 713 } 714 715 func (t TimestampLogicalType) String() string { 716 return fmt.Sprintf("Timestamp(isAdjustedToUTC=%t, timeUnit=%s, is_from_converted_type=%t, force_set_converted_type=%t)", 717 t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit()), t.fromConverted, t.forceConverted) 718 } 719 720 func (t TimestampLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 721 unit := timeUnitFromThrift(t.typ.Unit) 722 if t.typ.IsAdjustedToUTC || t.forceConverted { 723 switch unit { 724 case TimeUnitMillis: 725 return ConvertedTypes.TimestampMillis, DecimalMetadata{} 726 case TimeUnitMicros: 727 return ConvertedTypes.TimestampMicros, DecimalMetadata{} 728 } 729 } 730 return ConvertedTypes.None, DecimalMetadata{} 731 } 732 733 func (t TimestampLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 734 if dec.IsSet { 735 return false 736 } 737 738 switch timeUnitFromThrift(t.typ.Unit) { 739 case TimeUnitMillis: 740 if t.typ.GetIsAdjustedToUTC() || t.forceConverted { 741 return c == ConvertedTypes.TimestampMillis 742 } 743 case TimeUnitMicros: 744 if t.typ.GetIsAdjustedToUTC() || t.forceConverted { 745 return c == ConvertedTypes.TimestampMicros 746 } 747 } 748 749 return c == ConvertedTypes.None || c == ConvertedTypes.NA 750 } 751 752 func (TimestampLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 753 return t == parquet.Types.Int64 754 } 755 756 func (t TimestampLogicalType) toThrift() *format.LogicalType { 757 return &format.LogicalType{TIMESTAMP: t.typ} 758 } 759 760 func (t TimestampLogicalType) Equals(rhs LogicalType) bool { 761 other, ok := rhs.(*TimestampLogicalType) 762 if !ok { 763 return false 764 } 765 return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC && 766 timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit) 767 } 768 769 // NewIntLogicalType creates an integer logical type of the desired bitwidth 770 // and whether it is signed or not. 771 // 772 // Bit width must be exactly 8, 16, 32 or 64 for an integer logical type 773 func NewIntLogicalType(bitWidth int8, signed bool) LogicalType { 774 switch bitWidth { 775 case 8, 16, 32, 64: 776 default: 777 panic("parquet: bit width must be exactly 8, 16, 32, or 64 for Int logical type") 778 } 779 return &IntLogicalType{ 780 typ: &format.IntType{ 781 BitWidth: bitWidth, 782 IsSigned: signed, 783 }, 784 } 785 } 786 787 // IntLogicalType represents an integer type of a specific bit width and 788 // is either signed or unsigned. 789 type IntLogicalType struct { 790 baseLogicalType 791 typ *format.IntType 792 } 793 794 func (t IntLogicalType) BitWidth() int8 { 795 return t.typ.BitWidth 796 } 797 798 func (t IntLogicalType) IsSigned() bool { 799 return t.typ.IsSigned 800 } 801 802 func (t IntLogicalType) SortOrder() SortOrder { 803 if t.typ.IsSigned { 804 return SortSIGNED 805 } 806 return SortUNSIGNED 807 } 808 809 func (t IntLogicalType) MarshalJSON() ([]byte, error) { 810 return json.Marshal(map[string]interface{}{ 811 "Type": "Int", "bitWidth": t.typ.BitWidth, "isSigned": t.typ.IsSigned, 812 }) 813 } 814 815 func (t IntLogicalType) String() string { 816 return fmt.Sprintf("Int(bitWidth=%d, isSigned=%t)", t.typ.GetBitWidth(), t.typ.GetIsSigned()) 817 } 818 819 func (t IntLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 820 var d DecimalMetadata 821 if t.typ.IsSigned { 822 switch t.typ.BitWidth { 823 case 8: 824 return ConvertedTypes.Int8, d 825 case 16: 826 return ConvertedTypes.Int16, d 827 case 32: 828 return ConvertedTypes.Int32, d 829 case 64: 830 return ConvertedTypes.Int64, d 831 } 832 } else { 833 switch t.typ.BitWidth { 834 case 8: 835 return ConvertedTypes.Uint8, d 836 case 16: 837 return ConvertedTypes.Uint16, d 838 case 32: 839 return ConvertedTypes.Uint32, d 840 case 64: 841 return ConvertedTypes.Uint64, d 842 } 843 } 844 return ConvertedTypes.None, d 845 } 846 847 func (t IntLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 848 if dec.IsSet { 849 return false 850 } 851 v, _ := t.ToConvertedType() 852 return c == v 853 } 854 855 func (t IntLogicalType) IsApplicable(typ parquet.Type, _ int32) bool { 856 return (typ == parquet.Types.Int32 && t.typ.GetBitWidth() <= 32) || 857 (typ == parquet.Types.Int64 && t.typ.GetBitWidth() == 64) 858 } 859 860 func (t IntLogicalType) toThrift() *format.LogicalType { 861 return &format.LogicalType{INTEGER: t.typ} 862 } 863 864 func (t IntLogicalType) Equals(rhs LogicalType) bool { 865 other, ok := rhs.(*IntLogicalType) 866 if !ok { 867 return false 868 } 869 870 return t.typ.GetIsSigned() == other.typ.GetIsSigned() && 871 t.typ.GetBitWidth() == other.typ.GetBitWidth() 872 } 873 874 // UnknownLogicalType is a type that is essentially a placeholder for when 875 // we don't know the type. 876 type UnknownLogicalType struct{ baseLogicalType } 877 878 func (UnknownLogicalType) SortOrder() SortOrder { 879 return SortUNKNOWN 880 } 881 882 func (UnknownLogicalType) MarshalJSON() ([]byte, error) { 883 return json.Marshal(map[string]string{"Type": UnknownLogicalType{}.String()}) 884 } 885 886 func (UnknownLogicalType) IsValid() bool { return false } 887 888 func (UnknownLogicalType) IsSerialized() bool { return false } 889 890 func (UnknownLogicalType) String() string { 891 return "Unknown" 892 } 893 894 func (UnknownLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 895 return ConvertedTypes.NA, DecimalMetadata{} 896 } 897 898 func (UnknownLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 899 return c == ConvertedTypes.NA && !dec.IsSet 900 } 901 902 func (UnknownLogicalType) IsApplicable(parquet.Type, int32) bool { return true } 903 904 func (UnknownLogicalType) toThrift() *format.LogicalType { 905 return &format.LogicalType{UNKNOWN: format.NewNullType()} 906 } 907 908 func (UnknownLogicalType) Equals(rhs LogicalType) bool { 909 _, ok := rhs.(UnknownLogicalType) 910 return ok 911 } 912 913 // JSONLogicalType represents a byte array column which is to be interpreted 914 // as a JSON string. 915 type JSONLogicalType struct{ baseLogicalType } 916 917 func (JSONLogicalType) SortOrder() SortOrder { 918 return SortUNSIGNED 919 } 920 921 func (JSONLogicalType) MarshalJSON() ([]byte, error) { 922 return json.Marshal(map[string]string{"Type": JSONLogicalType{}.String()}) 923 } 924 925 func (JSONLogicalType) String() string { 926 return "JSON" 927 } 928 929 func (JSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 930 return ConvertedTypes.JSON, DecimalMetadata{} 931 } 932 933 func (JSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 934 return c == ConvertedTypes.JSON && !dec.IsSet 935 } 936 937 func (JSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 938 return t == parquet.Types.ByteArray 939 } 940 941 func (JSONLogicalType) toThrift() *format.LogicalType { 942 return &format.LogicalType{JSON: format.NewJsonType()} 943 } 944 945 func (JSONLogicalType) Equals(rhs LogicalType) bool { 946 _, ok := rhs.(JSONLogicalType) 947 return ok 948 } 949 950 // BSONLogicalType represents a binary JSON string in the byte array 951 type BSONLogicalType struct{ baseLogicalType } 952 953 func (BSONLogicalType) SortOrder() SortOrder { 954 return SortUNSIGNED 955 } 956 957 func (BSONLogicalType) MarshalJSON() ([]byte, error) { 958 return json.Marshal(map[string]string{"Type": BSONLogicalType{}.String()}) 959 } 960 961 func (BSONLogicalType) String() string { 962 return "BSON" 963 } 964 965 func (BSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 966 return ConvertedTypes.BSON, DecimalMetadata{} 967 } 968 969 func (BSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 970 return c == ConvertedTypes.BSON && !dec.IsSet 971 } 972 973 func (BSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 974 return t == parquet.Types.ByteArray 975 } 976 977 func (BSONLogicalType) toThrift() *format.LogicalType { 978 return &format.LogicalType{BSON: format.NewBsonType()} 979 } 980 981 func (BSONLogicalType) Equals(rhs LogicalType) bool { 982 _, ok := rhs.(BSONLogicalType) 983 return ok 984 } 985 986 // UUIDLogicalType can only be used with a FixedLength byte array column 987 // that is exactly 16 bytes long 988 type UUIDLogicalType struct{ baseLogicalType } 989 990 func (UUIDLogicalType) SortOrder() SortOrder { 991 return SortUNSIGNED 992 } 993 994 func (UUIDLogicalType) MarshalJSON() ([]byte, error) { 995 return json.Marshal(map[string]string{"Type": UUIDLogicalType{}.String()}) 996 } 997 998 func (UUIDLogicalType) String() string { 999 return "UUID" 1000 } 1001 1002 func (UUIDLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 1003 return ConvertedTypes.None, DecimalMetadata{} 1004 } 1005 1006 func (UUIDLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 1007 if dec.IsSet { 1008 return false 1009 } 1010 switch c { 1011 case ConvertedTypes.None, ConvertedTypes.NA: 1012 return true 1013 } 1014 return false 1015 } 1016 1017 func (UUIDLogicalType) IsApplicable(t parquet.Type, tlen int32) bool { 1018 return t == parquet.Types.FixedLenByteArray && tlen == 16 1019 } 1020 1021 func (UUIDLogicalType) toThrift() *format.LogicalType { 1022 return &format.LogicalType{UUID: format.NewUUIDType()} 1023 } 1024 1025 func (UUIDLogicalType) Equals(rhs LogicalType) bool { 1026 _, ok := rhs.(UUIDLogicalType) 1027 return ok 1028 } 1029 1030 // IntervalLogicalType is not yet in the thrift spec, but represents 1031 // an interval time and needs to be a fixed length byte array of 12 bytes 1032 type IntervalLogicalType struct{ baseLogicalType } 1033 1034 func (IntervalLogicalType) SortOrder() SortOrder { 1035 return SortUNKNOWN 1036 } 1037 1038 func (IntervalLogicalType) MarshalJSON() ([]byte, error) { 1039 return json.Marshal(map[string]string{"Type": IntervalLogicalType{}.String()}) 1040 } 1041 1042 func (IntervalLogicalType) String() string { 1043 return "Interval" 1044 } 1045 1046 func (IntervalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 1047 return ConvertedTypes.Interval, DecimalMetadata{} 1048 } 1049 1050 func (IntervalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 1051 return c == ConvertedTypes.Interval && !dec.IsSet 1052 } 1053 1054 func (IntervalLogicalType) IsApplicable(t parquet.Type, tlen int32) bool { 1055 return t == parquet.Types.FixedLenByteArray && tlen == 12 1056 } 1057 1058 func (IntervalLogicalType) toThrift() *format.LogicalType { 1059 panic("no parquet IntervalLogicalType yet implemented") 1060 } 1061 1062 func (IntervalLogicalType) Equals(rhs LogicalType) bool { 1063 _, ok := rhs.(IntervalLogicalType) 1064 return ok 1065 } 1066 1067 type NullLogicalType struct{ baseLogicalType } 1068 1069 func (NullLogicalType) SortOrder() SortOrder { 1070 return SortUNKNOWN 1071 } 1072 1073 func (NullLogicalType) MarshalJSON() ([]byte, error) { 1074 return json.Marshal(map[string]string{"Type": NullLogicalType{}.String()}) 1075 } 1076 1077 func (NullLogicalType) String() string { 1078 return "Null" 1079 } 1080 1081 func (NullLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 1082 return ConvertedTypes.None, DecimalMetadata{} 1083 } 1084 1085 func (NullLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 1086 if dec.IsSet { 1087 return false 1088 } 1089 switch c { 1090 case ConvertedTypes.None, ConvertedTypes.NA: 1091 return true 1092 } 1093 return false 1094 } 1095 1096 func (NullLogicalType) IsApplicable(parquet.Type, int32) bool { 1097 return true 1098 } 1099 1100 func (NullLogicalType) toThrift() *format.LogicalType { 1101 return &format.LogicalType{UNKNOWN: format.NewNullType()} 1102 } 1103 1104 func (NullLogicalType) Equals(rhs LogicalType) bool { 1105 _, ok := rhs.(NullLogicalType) 1106 return ok 1107 } 1108 1109 type NoLogicalType struct{ baseLogicalType } 1110 1111 func (NoLogicalType) SortOrder() SortOrder { 1112 return SortUNKNOWN 1113 } 1114 1115 func (NoLogicalType) MarshalJSON() ([]byte, error) { 1116 return json.Marshal(map[string]string{"Type": NoLogicalType{}.String()}) 1117 } 1118 1119 func (NoLogicalType) IsSerialized() bool { return false } 1120 1121 func (NoLogicalType) String() string { 1122 return "None" 1123 } 1124 1125 func (NoLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 1126 return ConvertedTypes.None, DecimalMetadata{} 1127 } 1128 1129 func (NoLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 1130 return c == ConvertedTypes.None && !dec.IsSet 1131 } 1132 1133 func (NoLogicalType) IsApplicable(parquet.Type, int32) bool { 1134 return true 1135 } 1136 1137 func (NoLogicalType) toThrift() *format.LogicalType { 1138 panic("cannot convert NoLogicalType to thrift") 1139 } 1140 1141 func (NoLogicalType) Equals(rhs LogicalType) bool { 1142 _, ok := rhs.(NoLogicalType) 1143 return ok 1144 } 1145 1146 func (NoLogicalType) IsNone() bool { return true }