github.com/apache/arrow/go/v12@v12.0.1/parquet/schema/logical_types.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package schema 18 19 import ( 20 "encoding/json" 21 "fmt" 22 "math" 23 24 "github.com/apache/arrow/go/v12/parquet" 25 "github.com/apache/arrow/go/v12/parquet/internal/debug" 26 format "github.com/apache/arrow/go/v12/parquet/internal/gen-go/parquet" 27 ) 28 29 // DecimalMetadata is a struct for managing scale and precision information between 30 // converted and logical types. 31 type DecimalMetadata struct { 32 IsSet bool 33 Scale int32 34 Precision int32 35 } 36 37 func getLogicalType(l *format.LogicalType) LogicalType { 38 switch { 39 case l.IsSetSTRING(): 40 return StringLogicalType{} 41 case l.IsSetMAP(): 42 return MapLogicalType{} 43 case l.IsSetLIST(): 44 return ListLogicalType{} 45 case l.IsSetENUM(): 46 return EnumLogicalType{} 47 case l.IsSetDECIMAL(): 48 return &DecimalLogicalType{typ: l.DECIMAL} 49 case l.IsSetDATE(): 50 return DateLogicalType{} 51 case l.IsSetTIME(): 52 if timeUnitFromThrift(l.TIME.Unit) == TimeUnitUnknown { 53 panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Time logical type") 54 } 55 return &TimeLogicalType{typ: l.TIME} 56 case l.IsSetTIMESTAMP(): 57 if timeUnitFromThrift(l.TIMESTAMP.Unit) == TimeUnitUnknown { 58 panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Timestamp logical type") 59 } 60 return &TimestampLogicalType{typ: l.TIMESTAMP} 61 case l.IsSetINTEGER(): 62 return &IntLogicalType{typ: l.INTEGER} 63 case l.IsSetUNKNOWN(): 64 return NullLogicalType{} 65 case l.IsSetJSON(): 66 return JSONLogicalType{} 67 case l.IsSetBSON(): 68 return BSONLogicalType{} 69 case l.IsSetUUID(): 70 return UUIDLogicalType{} 71 case l == nil: 72 return NoLogicalType{} 73 default: 74 panic("invalid logical type") 75 } 76 } 77 78 // TimeUnitType is an enum for denoting whether a time based logical type 79 // is using milliseconds, microseconds or nanoseconds. 80 type TimeUnitType int 81 82 // Constants for the TimeUnitType 83 const ( 84 TimeUnitMillis TimeUnitType = iota 85 TimeUnitMicros 86 TimeUnitNanos 87 TimeUnitUnknown 88 ) 89 90 // LogicalType is the descriptor that defines the usage of a physical primitive 91 // type in the schema, such as an Interval, Date, etc. 92 type LogicalType interface { 93 // Returns true if a nested type like List or Map 94 IsNested() bool 95 // Returns true if this type can be serialized, ie: not Unknown/NoType/Interval 96 IsSerialized() bool 97 // Returns true if not NoLogicalType 98 IsValid() bool 99 // Returns true if it is NoType 100 IsNone() bool 101 // returns a string representation of the Logical Type 102 String() string 103 toThrift() *format.LogicalType 104 // Return the equivalent ConvertedType for legacy Parquet systems 105 ToConvertedType() (ConvertedType, DecimalMetadata) 106 // Returns true if the specified ConvertedType is compatible with this 107 // logical type 108 IsCompatible(ConvertedType, DecimalMetadata) bool 109 // Returns true if this logical type can be used with the provided physical type 110 IsApplicable(t parquet.Type, tlen int32) bool 111 // Returns true if the logical types are the same 112 Equals(LogicalType) bool 113 // Returns the default stat sort order for this logical type 114 SortOrder() SortOrder 115 } 116 117 // TemporalLogicalType is a smaller interface for Time based logical types 118 // like Time / Timestamp 119 type TemporalLogicalType interface { 120 LogicalType 121 IsAdjustedToUTC() bool 122 TimeUnit() TimeUnitType 123 } 124 125 // SortOrder mirrors the parquet.thrift sort order type 126 type SortOrder int8 127 128 // Constants for the Stat sort order definitions 129 const ( 130 SortSIGNED SortOrder = iota 131 SortUNSIGNED 132 SortUNKNOWN 133 ) 134 135 // DefaultSortOrder returns the default stat sort order for the given physical type 136 func DefaultSortOrder(primitive format.Type) SortOrder { 137 switch primitive { 138 case format.Type_BOOLEAN, format.Type_INT32, format.Type_INT64, format.Type_FLOAT, format.Type_DOUBLE: 139 return SortSIGNED 140 case format.Type_BYTE_ARRAY, format.Type_FIXED_LEN_BYTE_ARRAY: 141 return SortUNSIGNED 142 case format.Type_INT96: 143 fallthrough 144 default: 145 return SortUNKNOWN 146 } 147 } 148 149 // GetLogicalSortOrder returns the default sort order for this logical type 150 // or falls back to the default sort order for the physical type if not valid 151 func GetLogicalSortOrder(logical LogicalType, primitive format.Type) SortOrder { 152 switch { 153 case logical == nil || !logical.IsValid(): 154 return SortUNKNOWN 155 case logical.Equals(NoLogicalType{}): 156 return DefaultSortOrder(primitive) 157 default: 158 return logical.SortOrder() 159 } 160 } 161 162 type baseLogicalType struct{} 163 164 func (baseLogicalType) IsSerialized() bool { 165 return true 166 } 167 168 func (baseLogicalType) IsValid() bool { 169 return true 170 } 171 172 func (baseLogicalType) IsNested() bool { 173 return false 174 } 175 176 func (baseLogicalType) IsNone() bool { return false } 177 178 // StringLogicalType is a UTF8 string, only usable with ByteArray and FixedLenByteArray 179 type StringLogicalType struct{ baseLogicalType } 180 181 func (StringLogicalType) SortOrder() SortOrder { 182 return SortUNSIGNED 183 } 184 185 func (StringLogicalType) MarshalJSON() ([]byte, error) { 186 return json.Marshal(map[string]string{"Type": StringLogicalType{}.String()}) 187 } 188 189 func (StringLogicalType) String() string { 190 return "String" 191 } 192 193 func (StringLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 194 return ConvertedTypes.UTF8, DecimalMetadata{} 195 } 196 197 func (StringLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool { 198 return t == ConvertedTypes.UTF8 && !dec.IsSet 199 } 200 201 func (StringLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 202 return t == parquet.Types.ByteArray 203 } 204 205 func (StringLogicalType) toThrift() *format.LogicalType { 206 return &format.LogicalType{STRING: format.NewStringType()} 207 } 208 209 func (StringLogicalType) Equals(rhs LogicalType) bool { 210 _, ok := rhs.(StringLogicalType) 211 return ok 212 } 213 214 // MapLogicalType represents a mapped type 215 type MapLogicalType struct{ baseLogicalType } 216 217 func (MapLogicalType) SortOrder() SortOrder { 218 return SortUNKNOWN 219 } 220 221 func (MapLogicalType) MarshalJSON() ([]byte, error) { 222 return json.Marshal(map[string]string{"Type": MapLogicalType{}.String()}) 223 } 224 225 func (MapLogicalType) String() string { 226 return "Map" 227 } 228 229 func (MapLogicalType) IsNested() bool { 230 return true 231 } 232 233 func (MapLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 234 return ConvertedTypes.Map, DecimalMetadata{} 235 } 236 237 func (MapLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool { 238 return (t == ConvertedTypes.Map || t == ConvertedTypes.MapKeyValue) && !dec.IsSet 239 } 240 241 func (MapLogicalType) IsApplicable(parquet.Type, int32) bool { 242 return false 243 } 244 245 func (MapLogicalType) toThrift() *format.LogicalType { 246 return &format.LogicalType{MAP: format.NewMapType()} 247 } 248 249 func (MapLogicalType) Equals(rhs LogicalType) bool { 250 _, ok := rhs.(MapLogicalType) 251 return ok 252 } 253 254 func NewListLogicalType() LogicalType { 255 return ListLogicalType{} 256 } 257 258 // ListLogicalType is used for columns which are themselves nested lists 259 type ListLogicalType struct{ baseLogicalType } 260 261 func (ListLogicalType) SortOrder() SortOrder { 262 return SortUNKNOWN 263 } 264 265 func (ListLogicalType) MarshalJSON() ([]byte, error) { 266 return json.Marshal(map[string]string{"Type": ListLogicalType{}.String()}) 267 } 268 269 func (ListLogicalType) String() string { 270 return "List" 271 } 272 273 func (ListLogicalType) IsNested() bool { 274 return true 275 } 276 277 func (ListLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 278 return ConvertedTypes.List, DecimalMetadata{} 279 } 280 281 func (ListLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool { 282 return t == ConvertedTypes.List && !dec.IsSet 283 } 284 285 func (ListLogicalType) IsApplicable(parquet.Type, int32) bool { 286 return false 287 } 288 289 func (ListLogicalType) toThrift() *format.LogicalType { 290 return &format.LogicalType{LIST: format.NewListType()} 291 } 292 293 func (ListLogicalType) Equals(rhs LogicalType) bool { 294 _, ok := rhs.(ListLogicalType) 295 return ok 296 } 297 298 // EnumLogicalType is for representing an enum, which should be a byte array type 299 type EnumLogicalType struct{ baseLogicalType } 300 301 func (EnumLogicalType) SortOrder() SortOrder { 302 return SortUNSIGNED 303 } 304 305 func (EnumLogicalType) MarshalJSON() ([]byte, error) { 306 return json.Marshal(map[string]string{"Type": EnumLogicalType{}.String()}) 307 } 308 309 func (EnumLogicalType) String() string { 310 return "Enum" 311 } 312 313 func (EnumLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 314 return ConvertedTypes.Enum, DecimalMetadata{} 315 } 316 317 func (EnumLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool { 318 return t == ConvertedTypes.Enum && !dec.IsSet 319 } 320 321 func (EnumLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 322 return t == parquet.Types.ByteArray 323 } 324 325 func (EnumLogicalType) toThrift() *format.LogicalType { 326 return &format.LogicalType{ENUM: format.NewEnumType()} 327 } 328 329 func (EnumLogicalType) Equals(rhs LogicalType) bool { 330 _, ok := rhs.(EnumLogicalType) 331 return ok 332 } 333 334 // NewDecimalLogicalType returns a Decimal logical type with the given 335 // precision and scale. 336 // 337 // Panics if precision < 1 or scale is not in the range (0, precision) 338 func NewDecimalLogicalType(precision int32, scale int32) LogicalType { 339 if precision < 1 { 340 panic("parquet: precision must be greater than or equal to 1 for decimal logical type") 341 } 342 if scale < 0 || scale > precision { 343 panic("parquet: scale must be a non-negative integer that does not exceed precision for decimal logical type") 344 } 345 return &DecimalLogicalType{typ: &format.DecimalType{Precision: precision, Scale: scale}} 346 } 347 348 // DecimalLogicalType is used to represent a decimal value of a given 349 // precision and scale 350 type DecimalLogicalType struct { 351 baseLogicalType 352 typ *format.DecimalType 353 } 354 355 func (t DecimalLogicalType) Precision() int32 { 356 return t.typ.Precision 357 } 358 359 func (t DecimalLogicalType) Scale() int32 { 360 return t.typ.Scale 361 } 362 363 func (DecimalLogicalType) SortOrder() SortOrder { 364 return SortSIGNED 365 } 366 367 func (t DecimalLogicalType) MarshalJSON() ([]byte, error) { 368 return json.Marshal(map[string]interface{}{"Type": "Decimal", "precision": t.typ.Precision, "scale": t.typ.Scale}) 369 } 370 371 func (t DecimalLogicalType) String() string { 372 return fmt.Sprintf("Decimal(precision=%d, scale=%d)", t.typ.Precision, t.typ.Scale) 373 } 374 375 func (t DecimalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 376 return ConvertedTypes.Decimal, DecimalMetadata{IsSet: true, Scale: t.typ.GetScale(), Precision: t.typ.GetPrecision()} 377 } 378 379 func (t DecimalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 380 return c == ConvertedTypes.Decimal && 381 dec.IsSet && dec.Scale == t.typ.Scale && dec.Precision == t.typ.Precision 382 } 383 384 func (t DecimalLogicalType) IsApplicable(typ parquet.Type, tlen int32) bool { 385 switch typ { 386 case parquet.Types.Int32: 387 return 1 <= t.typ.Precision && t.typ.Precision <= 9 388 case parquet.Types.Int64: 389 if t.typ.Precision < 10 { 390 debug.Log("int64 used for decimal logical, precision is small enough to use int32") 391 } 392 return 1 <= t.typ.Precision && t.typ.Precision <= 18 393 case parquet.Types.FixedLenByteArray: 394 return t.typ.Precision <= int32(math.Floor(math.Log10(math.Pow(2.0, (8.0*float64(tlen)-1.0))))) 395 case parquet.Types.ByteArray: 396 return true 397 } 398 return false 399 } 400 401 func (t DecimalLogicalType) toThrift() *format.LogicalType { 402 return &format.LogicalType{DECIMAL: t.typ} 403 } 404 405 func (t DecimalLogicalType) Equals(rhs LogicalType) bool { 406 other, ok := rhs.(*DecimalLogicalType) 407 if !ok { 408 return false 409 } 410 return t.typ.Precision == other.typ.Precision && t.typ.Scale == other.typ.Scale 411 } 412 413 // DateLogicalType is an int32 representing the number of days since the Unix Epoch 414 // 1 January 1970 415 type DateLogicalType struct{ baseLogicalType } 416 417 func (DateLogicalType) SortOrder() SortOrder { 418 return SortSIGNED 419 } 420 421 func (DateLogicalType) MarshalJSON() ([]byte, error) { 422 return json.Marshal(map[string]string{"Type": DateLogicalType{}.String()}) 423 } 424 425 func (DateLogicalType) String() string { 426 return "Date" 427 } 428 429 func (DateLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 430 return ConvertedTypes.Date, DecimalMetadata{} 431 } 432 433 func (DateLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool { 434 return t == ConvertedTypes.Date && !dec.IsSet 435 } 436 437 func (DateLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 438 return t == parquet.Types.Int32 439 } 440 441 func (DateLogicalType) toThrift() *format.LogicalType { 442 return &format.LogicalType{DATE: format.NewDateType()} 443 } 444 445 func (DateLogicalType) Equals(rhs LogicalType) bool { 446 _, ok := rhs.(DateLogicalType) 447 return ok 448 } 449 450 func timeUnitFromThrift(unit *format.TimeUnit) TimeUnitType { 451 switch { 452 case unit == nil: 453 return TimeUnitUnknown 454 case unit.IsSetMILLIS(): 455 return TimeUnitMillis 456 case unit.IsSetMICROS(): 457 return TimeUnitMicros 458 case unit.IsSetNANOS(): 459 return TimeUnitNanos 460 default: 461 return TimeUnitUnknown 462 } 463 } 464 465 func timeUnitToString(unit *format.TimeUnit) string { 466 switch { 467 case unit == nil: 468 return "unknown" 469 case unit.IsSetMILLIS(): 470 return "milliseconds" 471 case unit.IsSetMICROS(): 472 return "microseconds" 473 case unit.IsSetNANOS(): 474 return "nanoseconds" 475 default: 476 return "unknown" 477 } 478 } 479 480 func timeUnitFromString(v string) TimeUnitType { 481 switch v { 482 case "millis": 483 return TimeUnitMillis 484 case "micros": 485 return TimeUnitMicros 486 case "nanos": 487 return TimeUnitNanos 488 default: 489 return TimeUnitUnknown 490 } 491 } 492 493 func createTimeUnit(unit TimeUnitType) *format.TimeUnit { 494 tunit := format.NewTimeUnit() 495 switch unit { 496 case TimeUnitMicros: 497 tunit.MICROS = format.NewMicroSeconds() 498 case TimeUnitMillis: 499 tunit.MILLIS = format.NewMilliSeconds() 500 case TimeUnitNanos: 501 tunit.NANOS = format.NewNanoSeconds() 502 default: 503 panic("parquet: time unit must be one of MILLIS, MICROS, or NANOS for Time logical type") 504 } 505 return tunit 506 } 507 508 // NewTimeLogicalType returns a time type of the given unit. 509 func NewTimeLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType { 510 return &TimeLogicalType{typ: &format.TimeType{ 511 IsAdjustedToUTC: isAdjustedToUTC, 512 Unit: createTimeUnit(unit), 513 }} 514 } 515 516 // TimeLogicalType is a time type without a date and must be an 517 // int32 for milliseconds, or an int64 for micro or nano seconds. 518 type TimeLogicalType struct { 519 baseLogicalType 520 typ *format.TimeType 521 } 522 523 func (t TimeLogicalType) IsAdjustedToUTC() bool { 524 return t.typ.IsAdjustedToUTC 525 } 526 527 func (t TimeLogicalType) TimeUnit() TimeUnitType { 528 return timeUnitFromThrift(t.typ.Unit) 529 } 530 531 func (TimeLogicalType) SortOrder() SortOrder { 532 return SortSIGNED 533 } 534 535 func (t TimeLogicalType) MarshalJSON() ([]byte, error) { 536 return json.Marshal(map[string]interface{}{ 537 "Type": "Time", "isAdjustedToUTC": t.typ.IsAdjustedToUTC, "timeUnit": timeUnitToString(t.typ.GetUnit())}) 538 } 539 540 func (t TimeLogicalType) String() string { 541 return fmt.Sprintf("Time(isAdjustedToUTC=%t, timeUnit=%s)", t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit())) 542 } 543 544 func (t TimeLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 545 unit := timeUnitFromThrift(t.typ.Unit) 546 if t.typ.IsAdjustedToUTC { 547 switch unit { 548 case TimeUnitMillis: 549 return ConvertedTypes.TimeMillis, DecimalMetadata{} 550 case TimeUnitMicros: 551 return ConvertedTypes.TimeMicros, DecimalMetadata{} 552 } 553 } 554 return ConvertedTypes.None, DecimalMetadata{} 555 } 556 557 func (t TimeLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 558 if dec.IsSet { 559 return false 560 } 561 unit := timeUnitFromThrift(t.typ.Unit) 562 if t.typ.IsAdjustedToUTC { 563 switch unit { 564 case TimeUnitMillis: 565 return c == ConvertedTypes.TimeMillis 566 case TimeUnitMicros: 567 return c == ConvertedTypes.TimeMicros 568 } 569 } 570 571 return c == ConvertedTypes.None || c == ConvertedTypes.NA 572 } 573 574 func (t TimeLogicalType) IsApplicable(typ parquet.Type, _ int32) bool { 575 return (typ == parquet.Types.Int32 && t.typ.GetUnit().IsSetMILLIS()) || 576 (typ == parquet.Types.Int64 && 577 (t.typ.GetUnit().IsSetMICROS() || t.typ.GetUnit().IsSetNANOS())) 578 } 579 580 func (t TimeLogicalType) toThrift() *format.LogicalType { 581 return &format.LogicalType{TIME: t.typ} 582 } 583 584 func (t TimeLogicalType) Equals(rhs LogicalType) bool { 585 other, ok := rhs.(*TimeLogicalType) 586 if !ok { 587 return false 588 } 589 return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC && 590 timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit) 591 } 592 593 // NewTimestampLogicalType returns a logical timestamp type with "forceConverted" 594 // set to false 595 func NewTimestampLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType { 596 return &TimestampLogicalType{ 597 typ: &format.TimestampType{ 598 IsAdjustedToUTC: isAdjustedToUTC, 599 Unit: createTimeUnit(unit), 600 }, 601 forceConverted: false, 602 fromConverted: false, 603 } 604 } 605 606 // NewTimestampLogicalTypeForce returns a timestamp logical type with 607 // "forceConverted" set to true 608 func NewTimestampLogicalTypeForce(isAdjustedToUTC bool, unit TimeUnitType) LogicalType { 609 return &TimestampLogicalType{ 610 typ: &format.TimestampType{ 611 IsAdjustedToUTC: isAdjustedToUTC, 612 Unit: createTimeUnit(unit), 613 }, 614 forceConverted: true, 615 fromConverted: false, 616 } 617 } 618 619 // TimestampLogicalType represents an int64 number that can be decoded 620 // into a year, month, day, hour, minute, second, and subsecond 621 type TimestampLogicalType struct { 622 baseLogicalType 623 typ *format.TimestampType 624 // forceConverted denotes whether or not the resulting serialized 625 // type when writing to parquet will be written as the legacy 626 // ConvertedType TIMESTAMP_MICROS/TIMESTAMP_MILLIS (true) 627 // or if it will write the proper current Logical Types (false, default) 628 forceConverted bool 629 // fromConverted denotes if the timestamp type was created by 630 // translating a legacy converted type of TIMESTAMP_MILLIS or 631 // TIMESTAMP_MICROS rather than by using the current logical 632 // types. Default is false. 633 fromConverted bool 634 } 635 636 func (t TimestampLogicalType) IsFromConvertedType() bool { 637 return t.fromConverted 638 } 639 640 func (t TimestampLogicalType) IsAdjustedToUTC() bool { 641 return t.typ.IsAdjustedToUTC 642 } 643 644 func (t TimestampLogicalType) TimeUnit() TimeUnitType { 645 return timeUnitFromThrift(t.typ.Unit) 646 } 647 648 func (TimestampLogicalType) SortOrder() SortOrder { 649 return SortSIGNED 650 } 651 652 func (t TimestampLogicalType) MarshalJSON() ([]byte, error) { 653 return json.Marshal(map[string]interface{}{ 654 "Type": "Timestamp", 655 "isAdjustedToUTC": t.typ.IsAdjustedToUTC, 656 "timeUnit": timeUnitToString(t.typ.GetUnit()), 657 "is_from_converted_type": t.fromConverted, 658 "force_set_converted_type": t.forceConverted, 659 }) 660 } 661 662 func (t TimestampLogicalType) IsSerialized() bool { 663 return !t.fromConverted 664 } 665 666 func (t TimestampLogicalType) String() string { 667 return fmt.Sprintf("Timestamp(isAdjustedToUTC=%t, timeUnit=%s, is_from_converted_type=%t, force_set_converted_type=%t)", 668 t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit()), t.fromConverted, t.forceConverted) 669 } 670 671 func (t TimestampLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 672 unit := timeUnitFromThrift(t.typ.Unit) 673 if t.typ.IsAdjustedToUTC || t.forceConverted { 674 switch unit { 675 case TimeUnitMillis: 676 return ConvertedTypes.TimestampMillis, DecimalMetadata{} 677 case TimeUnitMicros: 678 return ConvertedTypes.TimestampMicros, DecimalMetadata{} 679 } 680 } 681 return ConvertedTypes.None, DecimalMetadata{} 682 } 683 684 func (t TimestampLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 685 if dec.IsSet { 686 return false 687 } 688 689 switch timeUnitFromThrift(t.typ.Unit) { 690 case TimeUnitMillis: 691 if t.typ.GetIsAdjustedToUTC() || t.forceConverted { 692 return c == ConvertedTypes.TimestampMillis 693 } 694 case TimeUnitMicros: 695 if t.typ.GetIsAdjustedToUTC() || t.forceConverted { 696 return c == ConvertedTypes.TimestampMicros 697 } 698 } 699 700 return c == ConvertedTypes.None || c == ConvertedTypes.NA 701 } 702 703 func (TimestampLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 704 return t == parquet.Types.Int64 705 } 706 707 func (t TimestampLogicalType) toThrift() *format.LogicalType { 708 return &format.LogicalType{TIMESTAMP: t.typ} 709 } 710 711 func (t TimestampLogicalType) Equals(rhs LogicalType) bool { 712 other, ok := rhs.(*TimestampLogicalType) 713 if !ok { 714 return false 715 } 716 return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC && 717 timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit) 718 } 719 720 // NewIntLogicalType creates an integer logical type of the desired bitwidth 721 // and whether it is signed or not. 722 // 723 // Bit width must be exactly 8, 16, 32 or 64 for an integer logical type 724 func NewIntLogicalType(bitWidth int8, signed bool) LogicalType { 725 switch bitWidth { 726 case 8, 16, 32, 64: 727 default: 728 panic("parquet: bit width must be exactly 8, 16, 32, or 64 for Int logical type") 729 } 730 return &IntLogicalType{ 731 typ: &format.IntType{ 732 BitWidth: bitWidth, 733 IsSigned: signed, 734 }, 735 } 736 } 737 738 // IntLogicalType represents an integer type of a specific bit width and 739 // is either signed or unsigned. 740 type IntLogicalType struct { 741 baseLogicalType 742 typ *format.IntType 743 } 744 745 func (t IntLogicalType) BitWidth() int8 { 746 return t.typ.BitWidth 747 } 748 749 func (t IntLogicalType) IsSigned() bool { 750 return t.typ.IsSigned 751 } 752 753 func (t IntLogicalType) SortOrder() SortOrder { 754 if t.typ.IsSigned { 755 return SortSIGNED 756 } 757 return SortUNSIGNED 758 } 759 760 func (t IntLogicalType) MarshalJSON() ([]byte, error) { 761 return json.Marshal(map[string]interface{}{ 762 "Type": "Int", "bitWidth": t.typ.BitWidth, "isSigned": t.typ.IsSigned, 763 }) 764 } 765 766 func (t IntLogicalType) String() string { 767 return fmt.Sprintf("Int(bitWidth=%d, isSigned=%t)", t.typ.GetBitWidth(), t.typ.GetIsSigned()) 768 } 769 770 func (t IntLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 771 var d DecimalMetadata 772 if t.typ.IsSigned { 773 switch t.typ.BitWidth { 774 case 8: 775 return ConvertedTypes.Int8, d 776 case 16: 777 return ConvertedTypes.Int16, d 778 case 32: 779 return ConvertedTypes.Int32, d 780 case 64: 781 return ConvertedTypes.Int64, d 782 } 783 } else { 784 switch t.typ.BitWidth { 785 case 8: 786 return ConvertedTypes.Uint8, d 787 case 16: 788 return ConvertedTypes.Uint16, d 789 case 32: 790 return ConvertedTypes.Uint32, d 791 case 64: 792 return ConvertedTypes.Uint64, d 793 } 794 } 795 return ConvertedTypes.None, d 796 } 797 798 func (t IntLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 799 if dec.IsSet { 800 return false 801 } 802 v, _ := t.ToConvertedType() 803 return c == v 804 } 805 806 func (t IntLogicalType) IsApplicable(typ parquet.Type, _ int32) bool { 807 return (typ == parquet.Types.Int32 && t.typ.GetBitWidth() <= 32) || 808 (typ == parquet.Types.Int64 && t.typ.GetBitWidth() == 64) 809 } 810 811 func (t IntLogicalType) toThrift() *format.LogicalType { 812 return &format.LogicalType{INTEGER: t.typ} 813 } 814 815 func (t IntLogicalType) Equals(rhs LogicalType) bool { 816 other, ok := rhs.(*IntLogicalType) 817 if !ok { 818 return false 819 } 820 821 return t.typ.GetIsSigned() == other.typ.GetIsSigned() && 822 t.typ.GetBitWidth() == other.typ.GetBitWidth() 823 } 824 825 // UnknownLogicalType is a type that is essentially a placeholder for when 826 // we don't know the type. 827 type UnknownLogicalType struct{ baseLogicalType } 828 829 func (UnknownLogicalType) SortOrder() SortOrder { 830 return SortUNKNOWN 831 } 832 833 func (UnknownLogicalType) MarshalJSON() ([]byte, error) { 834 return json.Marshal(map[string]string{"Type": UnknownLogicalType{}.String()}) 835 } 836 837 func (UnknownLogicalType) IsValid() bool { return false } 838 839 func (UnknownLogicalType) IsSerialized() bool { return false } 840 841 func (UnknownLogicalType) String() string { 842 return "Unknown" 843 } 844 845 func (UnknownLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 846 return ConvertedTypes.NA, DecimalMetadata{} 847 } 848 849 func (UnknownLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 850 return c == ConvertedTypes.NA && !dec.IsSet 851 } 852 853 func (UnknownLogicalType) IsApplicable(parquet.Type, int32) bool { return true } 854 855 func (UnknownLogicalType) toThrift() *format.LogicalType { 856 return &format.LogicalType{UNKNOWN: format.NewNullType()} 857 } 858 859 func (UnknownLogicalType) Equals(rhs LogicalType) bool { 860 _, ok := rhs.(UnknownLogicalType) 861 return ok 862 } 863 864 // JSONLogicalType represents a byte array column which is to be interpreted 865 // as a JSON string. 866 type JSONLogicalType struct{ baseLogicalType } 867 868 func (JSONLogicalType) SortOrder() SortOrder { 869 return SortUNSIGNED 870 } 871 872 func (JSONLogicalType) MarshalJSON() ([]byte, error) { 873 return json.Marshal(map[string]string{"Type": JSONLogicalType{}.String()}) 874 } 875 876 func (JSONLogicalType) String() string { 877 return "JSON" 878 } 879 880 func (JSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 881 return ConvertedTypes.JSON, DecimalMetadata{} 882 } 883 884 func (JSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 885 return c == ConvertedTypes.JSON && !dec.IsSet 886 } 887 888 func (JSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 889 return t == parquet.Types.ByteArray 890 } 891 892 func (JSONLogicalType) toThrift() *format.LogicalType { 893 return &format.LogicalType{JSON: format.NewJsonType()} 894 } 895 896 func (JSONLogicalType) Equals(rhs LogicalType) bool { 897 _, ok := rhs.(JSONLogicalType) 898 return ok 899 } 900 901 // BSONLogicalType represents a binary JSON string in the byte array 902 type BSONLogicalType struct{ baseLogicalType } 903 904 func (BSONLogicalType) SortOrder() SortOrder { 905 return SortUNSIGNED 906 } 907 908 func (BSONLogicalType) MarshalJSON() ([]byte, error) { 909 return json.Marshal(map[string]string{"Type": BSONLogicalType{}.String()}) 910 } 911 912 func (BSONLogicalType) String() string { 913 return "BSON" 914 } 915 916 func (BSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 917 return ConvertedTypes.BSON, DecimalMetadata{} 918 } 919 920 func (BSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 921 return c == ConvertedTypes.BSON && !dec.IsSet 922 } 923 924 func (BSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 925 return t == parquet.Types.ByteArray 926 } 927 928 func (BSONLogicalType) toThrift() *format.LogicalType { 929 return &format.LogicalType{BSON: format.NewBsonType()} 930 } 931 932 func (BSONLogicalType) Equals(rhs LogicalType) bool { 933 _, ok := rhs.(BSONLogicalType) 934 return ok 935 } 936 937 // UUIDLogicalType can only be used with a FixedLength byte array column 938 // that is exactly 16 bytes long 939 type UUIDLogicalType struct{ baseLogicalType } 940 941 func (UUIDLogicalType) SortOrder() SortOrder { 942 return SortUNSIGNED 943 } 944 945 func (UUIDLogicalType) MarshalJSON() ([]byte, error) { 946 return json.Marshal(map[string]string{"Type": UUIDLogicalType{}.String()}) 947 } 948 949 func (UUIDLogicalType) String() string { 950 return "UUID" 951 } 952 953 func (UUIDLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 954 return ConvertedTypes.None, DecimalMetadata{} 955 } 956 957 func (UUIDLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 958 if dec.IsSet { 959 return false 960 } 961 switch c { 962 case ConvertedTypes.None, ConvertedTypes.NA: 963 return true 964 } 965 return false 966 } 967 968 func (UUIDLogicalType) IsApplicable(t parquet.Type, tlen int32) bool { 969 return t == parquet.Types.FixedLenByteArray && tlen == 16 970 } 971 972 func (UUIDLogicalType) toThrift() *format.LogicalType { 973 return &format.LogicalType{UUID: format.NewUUIDType()} 974 } 975 976 func (UUIDLogicalType) Equals(rhs LogicalType) bool { 977 _, ok := rhs.(UUIDLogicalType) 978 return ok 979 } 980 981 // IntervalLogicalType is not yet in the thrift spec, but represents 982 // an interval time and needs to be a fixed length byte array of 12 bytes 983 type IntervalLogicalType struct{ baseLogicalType } 984 985 func (IntervalLogicalType) SortOrder() SortOrder { 986 return SortUNKNOWN 987 } 988 989 func (IntervalLogicalType) MarshalJSON() ([]byte, error) { 990 return json.Marshal(map[string]string{"Type": IntervalLogicalType{}.String()}) 991 } 992 993 func (IntervalLogicalType) String() string { 994 return "Interval" 995 } 996 997 func (IntervalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 998 return ConvertedTypes.Interval, DecimalMetadata{} 999 } 1000 1001 func (IntervalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 1002 return c == ConvertedTypes.Interval && !dec.IsSet 1003 } 1004 1005 func (IntervalLogicalType) IsApplicable(t parquet.Type, tlen int32) bool { 1006 return t == parquet.Types.FixedLenByteArray && tlen == 12 1007 } 1008 1009 func (IntervalLogicalType) toThrift() *format.LogicalType { 1010 panic("no parquet IntervalLogicalType yet implemented") 1011 } 1012 1013 func (IntervalLogicalType) Equals(rhs LogicalType) bool { 1014 _, ok := rhs.(IntervalLogicalType) 1015 return ok 1016 } 1017 1018 type NullLogicalType struct{ baseLogicalType } 1019 1020 func (NullLogicalType) SortOrder() SortOrder { 1021 return SortUNKNOWN 1022 } 1023 1024 func (NullLogicalType) MarshalJSON() ([]byte, error) { 1025 return json.Marshal(map[string]string{"Type": NullLogicalType{}.String()}) 1026 } 1027 1028 func (NullLogicalType) String() string { 1029 return "Null" 1030 } 1031 1032 func (NullLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 1033 return ConvertedTypes.None, DecimalMetadata{} 1034 } 1035 1036 func (NullLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 1037 if dec.IsSet { 1038 return false 1039 } 1040 switch c { 1041 case ConvertedTypes.None, ConvertedTypes.NA: 1042 return true 1043 } 1044 return false 1045 } 1046 1047 func (NullLogicalType) IsApplicable(parquet.Type, int32) bool { 1048 return true 1049 } 1050 1051 func (NullLogicalType) toThrift() *format.LogicalType { 1052 return &format.LogicalType{UNKNOWN: format.NewNullType()} 1053 } 1054 1055 func (NullLogicalType) Equals(rhs LogicalType) bool { 1056 _, ok := rhs.(NullLogicalType) 1057 return ok 1058 } 1059 1060 type NoLogicalType struct{ baseLogicalType } 1061 1062 func (NoLogicalType) SortOrder() SortOrder { 1063 return SortUNKNOWN 1064 } 1065 1066 func (NoLogicalType) MarshalJSON() ([]byte, error) { 1067 return json.Marshal(map[string]string{"Type": NoLogicalType{}.String()}) 1068 } 1069 1070 func (NoLogicalType) IsSerialized() bool { return false } 1071 1072 func (NoLogicalType) String() string { 1073 return "None" 1074 } 1075 1076 func (NoLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 1077 return ConvertedTypes.None, DecimalMetadata{} 1078 } 1079 1080 func (NoLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 1081 return c == ConvertedTypes.None && !dec.IsSet 1082 } 1083 1084 func (NoLogicalType) IsApplicable(parquet.Type, int32) bool { 1085 return true 1086 } 1087 1088 func (NoLogicalType) toThrift() *format.LogicalType { 1089 panic("cannot convert NoLogicalType to thrift") 1090 } 1091 1092 func (NoLogicalType) Equals(rhs LogicalType) bool { 1093 _, ok := rhs.(NoLogicalType) 1094 return ok 1095 } 1096 1097 func (NoLogicalType) IsNone() bool { return true }