github.com/apache/arrow/go/v15@v15.0.1/parquet/schema/logical_types.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package schema 18 19 import ( 20 "fmt" 21 "math" 22 23 "github.com/apache/arrow/go/v15/internal/json" 24 "github.com/apache/arrow/go/v15/parquet" 25 "github.com/apache/arrow/go/v15/parquet/internal/debug" 26 format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet" 27 ) 28 29 // DecimalMetadata is a struct for managing scale and precision information between 30 // converted and logical types. 31 type DecimalMetadata struct { 32 IsSet bool 33 Scale int32 34 Precision int32 35 } 36 37 func getLogicalType(l *format.LogicalType) LogicalType { 38 switch { 39 case l.IsSetSTRING(): 40 return StringLogicalType{} 41 case l.IsSetMAP(): 42 return MapLogicalType{} 43 case l.IsSetLIST(): 44 return ListLogicalType{} 45 case l.IsSetENUM(): 46 return EnumLogicalType{} 47 case l.IsSetDECIMAL(): 48 return &DecimalLogicalType{typ: l.DECIMAL} 49 case l.IsSetDATE(): 50 return DateLogicalType{} 51 case l.IsSetTIME(): 52 if timeUnitFromThrift(l.TIME.Unit) == TimeUnitUnknown { 53 panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Time logical type") 54 } 55 return &TimeLogicalType{typ: l.TIME} 56 case l.IsSetTIMESTAMP(): 57 if timeUnitFromThrift(l.TIMESTAMP.Unit) == TimeUnitUnknown { 58 panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Timestamp logical type") 59 } 60 return &TimestampLogicalType{typ: l.TIMESTAMP} 61 case l.IsSetINTEGER(): 62 return &IntLogicalType{typ: l.INTEGER} 63 case l.IsSetUNKNOWN(): 64 return NullLogicalType{} 65 case l.IsSetJSON(): 66 return JSONLogicalType{} 67 case l.IsSetBSON(): 68 return BSONLogicalType{} 69 case l.IsSetUUID(): 70 return UUIDLogicalType{} 71 case l.IsSetFLOAT16(): 72 return Float16LogicalType{} 73 case l == nil: 74 return NoLogicalType{} 75 default: 76 panic("invalid logical type") 77 } 78 } 79 80 // TimeUnitType is an enum for denoting whether a time based logical type 81 // is using milliseconds, microseconds or nanoseconds. 82 type TimeUnitType int 83 84 // Constants for the TimeUnitType 85 const ( 86 TimeUnitMillis TimeUnitType = iota 87 TimeUnitMicros 88 TimeUnitNanos 89 TimeUnitUnknown 90 ) 91 92 // LogicalType is the descriptor that defines the usage of a physical primitive 93 // type in the schema, such as an Interval, Date, etc. 94 type LogicalType interface { 95 // Returns true if a nested type like List or Map 96 IsNested() bool 97 // Returns true if this type can be serialized, ie: not Unknown/NoType/Interval 98 IsSerialized() bool 99 // Returns true if not NoLogicalType 100 IsValid() bool 101 // Returns true if it is NoType 102 IsNone() bool 103 // returns a string representation of the Logical Type 104 String() string 105 toThrift() *format.LogicalType 106 // Return the equivalent ConvertedType for legacy Parquet systems 107 ToConvertedType() (ConvertedType, DecimalMetadata) 108 // Returns true if the specified ConvertedType is compatible with this 109 // logical type 110 IsCompatible(ConvertedType, DecimalMetadata) bool 111 // Returns true if this logical type can be used with the provided physical type 112 IsApplicable(t parquet.Type, tlen int32) bool 113 // Returns true if the logical types are the same 114 Equals(LogicalType) bool 115 // Returns the default stat sort order for this logical type 116 SortOrder() SortOrder 117 } 118 119 // TemporalLogicalType is a smaller interface for Time based logical types 120 // like Time / Timestamp 121 type TemporalLogicalType interface { 122 LogicalType 123 IsAdjustedToUTC() bool 124 TimeUnit() TimeUnitType 125 } 126 127 // SortOrder mirrors the parquet.thrift sort order type 128 type SortOrder int8 129 130 // Constants for the Stat sort order definitions 131 const ( 132 SortSIGNED SortOrder = iota 133 SortUNSIGNED 134 SortUNKNOWN 135 ) 136 137 // DefaultSortOrder returns the default stat sort order for the given physical type 138 func DefaultSortOrder(primitive format.Type) SortOrder { 139 switch primitive { 140 case format.Type_BOOLEAN, format.Type_INT32, format.Type_INT64, format.Type_FLOAT, format.Type_DOUBLE: 141 return SortSIGNED 142 case format.Type_BYTE_ARRAY, format.Type_FIXED_LEN_BYTE_ARRAY: 143 return SortUNSIGNED 144 case format.Type_INT96: 145 fallthrough 146 default: 147 return SortUNKNOWN 148 } 149 } 150 151 // GetLogicalSortOrder returns the default sort order for this logical type 152 // or falls back to the default sort order for the physical type if not valid 153 func GetLogicalSortOrder(logical LogicalType, primitive format.Type) SortOrder { 154 switch { 155 case logical == nil || !logical.IsValid(): 156 return SortUNKNOWN 157 case logical.Equals(NoLogicalType{}): 158 return DefaultSortOrder(primitive) 159 default: 160 return logical.SortOrder() 161 } 162 } 163 164 type baseLogicalType struct{} 165 166 func (baseLogicalType) IsSerialized() bool { 167 return true 168 } 169 170 func (baseLogicalType) IsValid() bool { 171 return true 172 } 173 174 func (baseLogicalType) IsNested() bool { 175 return false 176 } 177 178 func (baseLogicalType) IsNone() bool { return false } 179 180 // StringLogicalType is a UTF8 string, only usable with ByteArray and FixedLenByteArray 181 type StringLogicalType struct{ baseLogicalType } 182 183 func (StringLogicalType) SortOrder() SortOrder { 184 return SortUNSIGNED 185 } 186 187 func (StringLogicalType) MarshalJSON() ([]byte, error) { 188 return json.Marshal(map[string]string{"Type": StringLogicalType{}.String()}) 189 } 190 191 func (StringLogicalType) String() string { 192 return "String" 193 } 194 195 func (StringLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 196 return ConvertedTypes.UTF8, DecimalMetadata{} 197 } 198 199 func (StringLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool { 200 return t == ConvertedTypes.UTF8 && !dec.IsSet 201 } 202 203 func (StringLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 204 return t == parquet.Types.ByteArray 205 } 206 207 func (StringLogicalType) toThrift() *format.LogicalType { 208 return &format.LogicalType{STRING: format.NewStringType()} 209 } 210 211 func (StringLogicalType) Equals(rhs LogicalType) bool { 212 _, ok := rhs.(StringLogicalType) 213 return ok 214 } 215 216 // MapLogicalType represents a mapped type 217 type MapLogicalType struct{ baseLogicalType } 218 219 func (MapLogicalType) SortOrder() SortOrder { 220 return SortUNKNOWN 221 } 222 223 func (MapLogicalType) MarshalJSON() ([]byte, error) { 224 return json.Marshal(map[string]string{"Type": MapLogicalType{}.String()}) 225 } 226 227 func (MapLogicalType) String() string { 228 return "Map" 229 } 230 231 func (MapLogicalType) IsNested() bool { 232 return true 233 } 234 235 func (MapLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 236 return ConvertedTypes.Map, DecimalMetadata{} 237 } 238 239 func (MapLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool { 240 return (t == ConvertedTypes.Map || t == ConvertedTypes.MapKeyValue) && !dec.IsSet 241 } 242 243 func (MapLogicalType) IsApplicable(parquet.Type, int32) bool { 244 return false 245 } 246 247 func (MapLogicalType) toThrift() *format.LogicalType { 248 return &format.LogicalType{MAP: format.NewMapType()} 249 } 250 251 func (MapLogicalType) Equals(rhs LogicalType) bool { 252 _, ok := rhs.(MapLogicalType) 253 return ok 254 } 255 256 func NewListLogicalType() LogicalType { 257 return ListLogicalType{} 258 } 259 260 // ListLogicalType is used for columns which are themselves nested lists 261 type ListLogicalType struct{ baseLogicalType } 262 263 func (ListLogicalType) SortOrder() SortOrder { 264 return SortUNKNOWN 265 } 266 267 func (ListLogicalType) MarshalJSON() ([]byte, error) { 268 return json.Marshal(map[string]string{"Type": ListLogicalType{}.String()}) 269 } 270 271 func (ListLogicalType) String() string { 272 return "List" 273 } 274 275 func (ListLogicalType) IsNested() bool { 276 return true 277 } 278 279 func (ListLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 280 return ConvertedTypes.List, DecimalMetadata{} 281 } 282 283 func (ListLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool { 284 return t == ConvertedTypes.List && !dec.IsSet 285 } 286 287 func (ListLogicalType) IsApplicable(parquet.Type, int32) bool { 288 return false 289 } 290 291 func (ListLogicalType) toThrift() *format.LogicalType { 292 return &format.LogicalType{LIST: format.NewListType()} 293 } 294 295 func (ListLogicalType) Equals(rhs LogicalType) bool { 296 _, ok := rhs.(ListLogicalType) 297 return ok 298 } 299 300 // EnumLogicalType is for representing an enum, which should be a byte array type 301 type EnumLogicalType struct{ baseLogicalType } 302 303 func (EnumLogicalType) SortOrder() SortOrder { 304 return SortUNSIGNED 305 } 306 307 func (EnumLogicalType) MarshalJSON() ([]byte, error) { 308 return json.Marshal(map[string]string{"Type": EnumLogicalType{}.String()}) 309 } 310 311 func (EnumLogicalType) String() string { 312 return "Enum" 313 } 314 315 func (EnumLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 316 return ConvertedTypes.Enum, DecimalMetadata{} 317 } 318 319 func (EnumLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool { 320 return t == ConvertedTypes.Enum && !dec.IsSet 321 } 322 323 func (EnumLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 324 return t == parquet.Types.ByteArray 325 } 326 327 func (EnumLogicalType) toThrift() *format.LogicalType { 328 return &format.LogicalType{ENUM: format.NewEnumType()} 329 } 330 331 func (EnumLogicalType) Equals(rhs LogicalType) bool { 332 _, ok := rhs.(EnumLogicalType) 333 return ok 334 } 335 336 // NewDecimalLogicalType returns a Decimal logical type with the given 337 // precision and scale. 338 // 339 // Panics if precision < 1 or scale is not in the range (0, precision) 340 func NewDecimalLogicalType(precision int32, scale int32) LogicalType { 341 if precision < 1 { 342 panic("parquet: precision must be greater than or equal to 1 for decimal logical type") 343 } 344 if scale < 0 || scale > precision { 345 panic("parquet: scale must be a non-negative integer that does not exceed precision for decimal logical type") 346 } 347 return &DecimalLogicalType{typ: &format.DecimalType{Precision: precision, Scale: scale}} 348 } 349 350 // DecimalLogicalType is used to represent a decimal value of a given 351 // precision and scale 352 type DecimalLogicalType struct { 353 baseLogicalType 354 typ *format.DecimalType 355 } 356 357 func (t DecimalLogicalType) Precision() int32 { 358 return t.typ.Precision 359 } 360 361 func (t DecimalLogicalType) Scale() int32 { 362 return t.typ.Scale 363 } 364 365 func (DecimalLogicalType) SortOrder() SortOrder { 366 return SortSIGNED 367 } 368 369 func (t DecimalLogicalType) MarshalJSON() ([]byte, error) { 370 return json.Marshal(map[string]interface{}{"Type": "Decimal", "precision": t.typ.Precision, "scale": t.typ.Scale}) 371 } 372 373 func (t DecimalLogicalType) String() string { 374 return fmt.Sprintf("Decimal(precision=%d, scale=%d)", t.typ.Precision, t.typ.Scale) 375 } 376 377 func (t DecimalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 378 return ConvertedTypes.Decimal, DecimalMetadata{IsSet: true, Scale: t.typ.GetScale(), Precision: t.typ.GetPrecision()} 379 } 380 381 func (t DecimalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 382 return c == ConvertedTypes.Decimal && 383 dec.IsSet && dec.Scale == t.typ.Scale && dec.Precision == t.typ.Precision 384 } 385 386 func (t DecimalLogicalType) IsApplicable(typ parquet.Type, tlen int32) bool { 387 switch typ { 388 case parquet.Types.Int32: 389 return 1 <= t.typ.Precision && t.typ.Precision <= 9 390 case parquet.Types.Int64: 391 if t.typ.Precision < 10 { 392 debug.Log("int64 used for decimal logical, precision is small enough to use int32") 393 } 394 return 1 <= t.typ.Precision && t.typ.Precision <= 18 395 case parquet.Types.FixedLenByteArray: 396 return t.typ.Precision <= int32(math.Floor(math.Log10(math.Pow(2.0, (8.0*float64(tlen)-1.0))))) 397 case parquet.Types.ByteArray: 398 return true 399 } 400 return false 401 } 402 403 func (t DecimalLogicalType) toThrift() *format.LogicalType { 404 return &format.LogicalType{DECIMAL: t.typ} 405 } 406 407 func (t DecimalLogicalType) Equals(rhs LogicalType) bool { 408 other, ok := rhs.(*DecimalLogicalType) 409 if !ok { 410 return false 411 } 412 return t.typ.Precision == other.typ.Precision && t.typ.Scale == other.typ.Scale 413 } 414 415 // DateLogicalType is an int32 representing the number of days since the Unix Epoch 416 // 1 January 1970 417 type DateLogicalType struct{ baseLogicalType } 418 419 func (DateLogicalType) SortOrder() SortOrder { 420 return SortSIGNED 421 } 422 423 func (DateLogicalType) MarshalJSON() ([]byte, error) { 424 return json.Marshal(map[string]string{"Type": DateLogicalType{}.String()}) 425 } 426 427 func (DateLogicalType) String() string { 428 return "Date" 429 } 430 431 func (DateLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 432 return ConvertedTypes.Date, DecimalMetadata{} 433 } 434 435 func (DateLogicalType) IsCompatible(t ConvertedType, dec DecimalMetadata) bool { 436 return t == ConvertedTypes.Date && !dec.IsSet 437 } 438 439 func (DateLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 440 return t == parquet.Types.Int32 441 } 442 443 func (DateLogicalType) toThrift() *format.LogicalType { 444 return &format.LogicalType{DATE: format.NewDateType()} 445 } 446 447 func (DateLogicalType) Equals(rhs LogicalType) bool { 448 _, ok := rhs.(DateLogicalType) 449 return ok 450 } 451 452 func timeUnitFromThrift(unit *format.TimeUnit) TimeUnitType { 453 switch { 454 case unit == nil: 455 return TimeUnitUnknown 456 case unit.IsSetMILLIS(): 457 return TimeUnitMillis 458 case unit.IsSetMICROS(): 459 return TimeUnitMicros 460 case unit.IsSetNANOS(): 461 return TimeUnitNanos 462 default: 463 return TimeUnitUnknown 464 } 465 } 466 467 func timeUnitToString(unit *format.TimeUnit) string { 468 switch { 469 case unit == nil: 470 return "unknown" 471 case unit.IsSetMILLIS(): 472 return "milliseconds" 473 case unit.IsSetMICROS(): 474 return "microseconds" 475 case unit.IsSetNANOS(): 476 return "nanoseconds" 477 default: 478 return "unknown" 479 } 480 } 481 482 func timeUnitFromString(v string) TimeUnitType { 483 switch v { 484 case "millis": 485 return TimeUnitMillis 486 case "micros": 487 return TimeUnitMicros 488 case "nanos": 489 return TimeUnitNanos 490 default: 491 return TimeUnitUnknown 492 } 493 } 494 495 func createTimeUnit(unit TimeUnitType) *format.TimeUnit { 496 tunit := format.NewTimeUnit() 497 switch unit { 498 case TimeUnitMicros: 499 tunit.MICROS = format.NewMicroSeconds() 500 case TimeUnitMillis: 501 tunit.MILLIS = format.NewMilliSeconds() 502 case TimeUnitNanos: 503 tunit.NANOS = format.NewNanoSeconds() 504 default: 505 panic("parquet: time unit must be one of MILLIS, MICROS, or NANOS for Time logical type") 506 } 507 return tunit 508 } 509 510 // NewTimeLogicalType returns a time type of the given unit. 511 func NewTimeLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType { 512 return &TimeLogicalType{typ: &format.TimeType{ 513 IsAdjustedToUTC: isAdjustedToUTC, 514 Unit: createTimeUnit(unit), 515 }} 516 } 517 518 // TimeLogicalType is a time type without a date and must be an 519 // int32 for milliseconds, or an int64 for micro or nano seconds. 520 type TimeLogicalType struct { 521 baseLogicalType 522 typ *format.TimeType 523 } 524 525 func (t TimeLogicalType) IsAdjustedToUTC() bool { 526 return t.typ.IsAdjustedToUTC 527 } 528 529 func (t TimeLogicalType) TimeUnit() TimeUnitType { 530 return timeUnitFromThrift(t.typ.Unit) 531 } 532 533 func (TimeLogicalType) SortOrder() SortOrder { 534 return SortSIGNED 535 } 536 537 func (t TimeLogicalType) MarshalJSON() ([]byte, error) { 538 return json.Marshal(map[string]interface{}{ 539 "Type": "Time", "isAdjustedToUTC": t.typ.IsAdjustedToUTC, "timeUnit": timeUnitToString(t.typ.GetUnit())}) 540 } 541 542 func (t TimeLogicalType) String() string { 543 return fmt.Sprintf("Time(isAdjustedToUTC=%t, timeUnit=%s)", t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit())) 544 } 545 546 func (t TimeLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 547 unit := timeUnitFromThrift(t.typ.Unit) 548 if t.typ.IsAdjustedToUTC { 549 switch unit { 550 case TimeUnitMillis: 551 return ConvertedTypes.TimeMillis, DecimalMetadata{} 552 case TimeUnitMicros: 553 return ConvertedTypes.TimeMicros, DecimalMetadata{} 554 } 555 } 556 return ConvertedTypes.None, DecimalMetadata{} 557 } 558 559 func (t TimeLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 560 if dec.IsSet { 561 return false 562 } 563 unit := timeUnitFromThrift(t.typ.Unit) 564 if t.typ.IsAdjustedToUTC { 565 switch unit { 566 case TimeUnitMillis: 567 return c == ConvertedTypes.TimeMillis 568 case TimeUnitMicros: 569 return c == ConvertedTypes.TimeMicros 570 } 571 } 572 573 return c == ConvertedTypes.None || c == ConvertedTypes.NA 574 } 575 576 func (t TimeLogicalType) IsApplicable(typ parquet.Type, _ int32) bool { 577 return (typ == parquet.Types.Int32 && t.typ.GetUnit().IsSetMILLIS()) || 578 (typ == parquet.Types.Int64 && 579 (t.typ.GetUnit().IsSetMICROS() || t.typ.GetUnit().IsSetNANOS())) 580 } 581 582 func (t TimeLogicalType) toThrift() *format.LogicalType { 583 return &format.LogicalType{TIME: t.typ} 584 } 585 586 func (t TimeLogicalType) Equals(rhs LogicalType) bool { 587 other, ok := rhs.(*TimeLogicalType) 588 if !ok { 589 return false 590 } 591 return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC && 592 timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit) 593 } 594 595 // NewTimestampLogicalType returns a logical timestamp type with "forceConverted" 596 // set to false 597 func NewTimestampLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType { 598 return &TimestampLogicalType{ 599 typ: &format.TimestampType{ 600 IsAdjustedToUTC: isAdjustedToUTC, 601 Unit: createTimeUnit(unit), 602 }, 603 forceConverted: false, 604 fromConverted: false, 605 } 606 } 607 608 // NewTimestampLogicalTypeForce returns a timestamp logical type with 609 // "forceConverted" set to true 610 func NewTimestampLogicalTypeForce(isAdjustedToUTC bool, unit TimeUnitType) LogicalType { 611 return &TimestampLogicalType{ 612 typ: &format.TimestampType{ 613 IsAdjustedToUTC: isAdjustedToUTC, 614 Unit: createTimeUnit(unit), 615 }, 616 forceConverted: true, 617 fromConverted: false, 618 } 619 } 620 621 // TimestampOpt options used with New Timestamp Logical Type 622 type TimestampOpt func(*TimestampLogicalType) 623 624 // WithTSIsAdjustedToUTC sets the IsAdjustedToUTC field of the timestamp type. 625 func WithTSIsAdjustedToUTC() TimestampOpt { 626 return func(t *TimestampLogicalType) { 627 t.typ.IsAdjustedToUTC = true 628 } 629 } 630 631 // WithTSTimeUnitType sets the time unit for the timestamp type 632 func WithTSTimeUnitType(unit TimeUnitType) TimestampOpt { 633 return func(t *TimestampLogicalType) { 634 t.typ.Unit = createTimeUnit(unit) 635 } 636 } 637 638 // WithTSForceConverted enable force converted mode 639 func WithTSForceConverted() TimestampOpt { 640 return func(t *TimestampLogicalType) { 641 t.forceConverted = true 642 } 643 } 644 645 // WithTSFromConverted enable the timestamp logical type to be 646 // constructed from a converted type. 647 func WithTSFromConverted() TimestampOpt { 648 return func(t *TimestampLogicalType) { 649 t.fromConverted = true 650 } 651 } 652 653 // NewTimestampLogicalTypeWithOpts creates a new TimestampLogicalType with the provided options. 654 // 655 // TimestampType Unit defaults to milliseconds (TimeUnitMillis) 656 func NewTimestampLogicalTypeWithOpts(opts ...TimestampOpt) LogicalType { 657 ts := &TimestampLogicalType{ 658 typ: &format.TimestampType{ 659 Unit: createTimeUnit(TimeUnitMillis), // default to milliseconds 660 }, 661 } 662 663 for _, o := range opts { 664 o(ts) 665 } 666 667 return ts 668 } 669 670 // TimestampLogicalType represents an int64 number that can be decoded 671 // into a year, month, day, hour, minute, second, and subsecond 672 type TimestampLogicalType struct { 673 baseLogicalType 674 typ *format.TimestampType 675 // forceConverted denotes whether or not the resulting serialized 676 // type when writing to parquet will be written as the legacy 677 // ConvertedType TIMESTAMP_MICROS/TIMESTAMP_MILLIS (true) 678 // or if it will write the proper current Logical Types (false, default) 679 forceConverted bool 680 // fromConverted denotes if the timestamp type was created by 681 // translating a legacy converted type of TIMESTAMP_MILLIS or 682 // TIMESTAMP_MICROS rather than by using the current logical 683 // types. Default is false. 684 fromConverted bool 685 } 686 687 func (t TimestampLogicalType) IsFromConvertedType() bool { 688 return t.fromConverted 689 } 690 691 func (t TimestampLogicalType) IsAdjustedToUTC() bool { 692 return t.typ.IsAdjustedToUTC 693 } 694 695 func (t TimestampLogicalType) TimeUnit() TimeUnitType { 696 return timeUnitFromThrift(t.typ.Unit) 697 } 698 699 func (TimestampLogicalType) SortOrder() SortOrder { 700 return SortSIGNED 701 } 702 703 func (t TimestampLogicalType) MarshalJSON() ([]byte, error) { 704 return json.Marshal(map[string]interface{}{ 705 "Type": "Timestamp", 706 "isAdjustedToUTC": t.typ.IsAdjustedToUTC, 707 "timeUnit": timeUnitToString(t.typ.GetUnit()), 708 "is_from_converted_type": t.fromConverted, 709 "force_set_converted_type": t.forceConverted, 710 }) 711 } 712 713 func (t TimestampLogicalType) IsSerialized() bool { 714 return !t.fromConverted 715 } 716 717 func (t TimestampLogicalType) String() string { 718 return fmt.Sprintf("Timestamp(isAdjustedToUTC=%t, timeUnit=%s, is_from_converted_type=%t, force_set_converted_type=%t)", 719 t.typ.GetIsAdjustedToUTC(), timeUnitToString(t.typ.GetUnit()), t.fromConverted, t.forceConverted) 720 } 721 722 func (t TimestampLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 723 unit := timeUnitFromThrift(t.typ.Unit) 724 if t.typ.IsAdjustedToUTC || t.forceConverted { 725 switch unit { 726 case TimeUnitMillis: 727 return ConvertedTypes.TimestampMillis, DecimalMetadata{} 728 case TimeUnitMicros: 729 return ConvertedTypes.TimestampMicros, DecimalMetadata{} 730 } 731 } 732 return ConvertedTypes.None, DecimalMetadata{} 733 } 734 735 func (t TimestampLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 736 if dec.IsSet { 737 return false 738 } 739 740 switch timeUnitFromThrift(t.typ.Unit) { 741 case TimeUnitMillis: 742 if t.typ.GetIsAdjustedToUTC() || t.forceConverted { 743 return c == ConvertedTypes.TimestampMillis 744 } 745 case TimeUnitMicros: 746 if t.typ.GetIsAdjustedToUTC() || t.forceConverted { 747 return c == ConvertedTypes.TimestampMicros 748 } 749 } 750 751 return c == ConvertedTypes.None || c == ConvertedTypes.NA 752 } 753 754 func (TimestampLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 755 return t == parquet.Types.Int64 756 } 757 758 func (t TimestampLogicalType) toThrift() *format.LogicalType { 759 return &format.LogicalType{TIMESTAMP: t.typ} 760 } 761 762 func (t TimestampLogicalType) Equals(rhs LogicalType) bool { 763 other, ok := rhs.(*TimestampLogicalType) 764 if !ok { 765 return false 766 } 767 return t.typ.IsAdjustedToUTC == other.typ.IsAdjustedToUTC && 768 timeUnitFromThrift(t.typ.Unit) == timeUnitFromThrift(other.typ.Unit) 769 } 770 771 // NewIntLogicalType creates an integer logical type of the desired bitwidth 772 // and whether it is signed or not. 773 // 774 // Bit width must be exactly 8, 16, 32 or 64 for an integer logical type 775 func NewIntLogicalType(bitWidth int8, signed bool) LogicalType { 776 switch bitWidth { 777 case 8, 16, 32, 64: 778 default: 779 panic("parquet: bit width must be exactly 8, 16, 32, or 64 for Int logical type") 780 } 781 return &IntLogicalType{ 782 typ: &format.IntType{ 783 BitWidth: bitWidth, 784 IsSigned: signed, 785 }, 786 } 787 } 788 789 // IntLogicalType represents an integer type of a specific bit width and 790 // is either signed or unsigned. 791 type IntLogicalType struct { 792 baseLogicalType 793 typ *format.IntType 794 } 795 796 func (t IntLogicalType) BitWidth() int8 { 797 return t.typ.BitWidth 798 } 799 800 func (t IntLogicalType) IsSigned() bool { 801 return t.typ.IsSigned 802 } 803 804 func (t IntLogicalType) SortOrder() SortOrder { 805 if t.typ.IsSigned { 806 return SortSIGNED 807 } 808 return SortUNSIGNED 809 } 810 811 func (t IntLogicalType) MarshalJSON() ([]byte, error) { 812 return json.Marshal(map[string]interface{}{ 813 "Type": "Int", "bitWidth": t.typ.BitWidth, "isSigned": t.typ.IsSigned, 814 }) 815 } 816 817 func (t IntLogicalType) String() string { 818 return fmt.Sprintf("Int(bitWidth=%d, isSigned=%t)", t.typ.GetBitWidth(), t.typ.GetIsSigned()) 819 } 820 821 func (t IntLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 822 var d DecimalMetadata 823 if t.typ.IsSigned { 824 switch t.typ.BitWidth { 825 case 8: 826 return ConvertedTypes.Int8, d 827 case 16: 828 return ConvertedTypes.Int16, d 829 case 32: 830 return ConvertedTypes.Int32, d 831 case 64: 832 return ConvertedTypes.Int64, d 833 } 834 } else { 835 switch t.typ.BitWidth { 836 case 8: 837 return ConvertedTypes.Uint8, d 838 case 16: 839 return ConvertedTypes.Uint16, d 840 case 32: 841 return ConvertedTypes.Uint32, d 842 case 64: 843 return ConvertedTypes.Uint64, d 844 } 845 } 846 return ConvertedTypes.None, d 847 } 848 849 func (t IntLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 850 if dec.IsSet { 851 return false 852 } 853 v, _ := t.ToConvertedType() 854 return c == v 855 } 856 857 func (t IntLogicalType) IsApplicable(typ parquet.Type, _ int32) bool { 858 return (typ == parquet.Types.Int32 && t.typ.GetBitWidth() <= 32) || 859 (typ == parquet.Types.Int64 && t.typ.GetBitWidth() == 64) 860 } 861 862 func (t IntLogicalType) toThrift() *format.LogicalType { 863 return &format.LogicalType{INTEGER: t.typ} 864 } 865 866 func (t IntLogicalType) Equals(rhs LogicalType) bool { 867 other, ok := rhs.(*IntLogicalType) 868 if !ok { 869 return false 870 } 871 872 return t.typ.GetIsSigned() == other.typ.GetIsSigned() && 873 t.typ.GetBitWidth() == other.typ.GetBitWidth() 874 } 875 876 // UnknownLogicalType is a type that is essentially a placeholder for when 877 // we don't know the type. 878 type UnknownLogicalType struct{ baseLogicalType } 879 880 func (UnknownLogicalType) SortOrder() SortOrder { 881 return SortUNKNOWN 882 } 883 884 func (UnknownLogicalType) MarshalJSON() ([]byte, error) { 885 return json.Marshal(map[string]string{"Type": UnknownLogicalType{}.String()}) 886 } 887 888 func (UnknownLogicalType) IsValid() bool { return false } 889 890 func (UnknownLogicalType) IsSerialized() bool { return false } 891 892 func (UnknownLogicalType) String() string { 893 return "Unknown" 894 } 895 896 func (UnknownLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 897 return ConvertedTypes.NA, DecimalMetadata{} 898 } 899 900 func (UnknownLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 901 return c == ConvertedTypes.NA && !dec.IsSet 902 } 903 904 func (UnknownLogicalType) IsApplicable(parquet.Type, int32) bool { return true } 905 906 func (UnknownLogicalType) toThrift() *format.LogicalType { 907 return &format.LogicalType{UNKNOWN: format.NewNullType()} 908 } 909 910 func (UnknownLogicalType) Equals(rhs LogicalType) bool { 911 _, ok := rhs.(UnknownLogicalType) 912 return ok 913 } 914 915 // JSONLogicalType represents a byte array column which is to be interpreted 916 // as a JSON string. 917 type JSONLogicalType struct{ baseLogicalType } 918 919 func (JSONLogicalType) SortOrder() SortOrder { 920 return SortUNSIGNED 921 } 922 923 func (JSONLogicalType) MarshalJSON() ([]byte, error) { 924 return json.Marshal(map[string]string{"Type": JSONLogicalType{}.String()}) 925 } 926 927 func (JSONLogicalType) String() string { 928 return "JSON" 929 } 930 931 func (JSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 932 return ConvertedTypes.JSON, DecimalMetadata{} 933 } 934 935 func (JSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 936 return c == ConvertedTypes.JSON && !dec.IsSet 937 } 938 939 func (JSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 940 return t == parquet.Types.ByteArray 941 } 942 943 func (JSONLogicalType) toThrift() *format.LogicalType { 944 return &format.LogicalType{JSON: format.NewJsonType()} 945 } 946 947 func (JSONLogicalType) Equals(rhs LogicalType) bool { 948 _, ok := rhs.(JSONLogicalType) 949 return ok 950 } 951 952 // BSONLogicalType represents a binary JSON string in the byte array 953 type BSONLogicalType struct{ baseLogicalType } 954 955 func (BSONLogicalType) SortOrder() SortOrder { 956 return SortUNSIGNED 957 } 958 959 func (BSONLogicalType) MarshalJSON() ([]byte, error) { 960 return json.Marshal(map[string]string{"Type": BSONLogicalType{}.String()}) 961 } 962 963 func (BSONLogicalType) String() string { 964 return "BSON" 965 } 966 967 func (BSONLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 968 return ConvertedTypes.BSON, DecimalMetadata{} 969 } 970 971 func (BSONLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 972 return c == ConvertedTypes.BSON && !dec.IsSet 973 } 974 975 func (BSONLogicalType) IsApplicable(t parquet.Type, _ int32) bool { 976 return t == parquet.Types.ByteArray 977 } 978 979 func (BSONLogicalType) toThrift() *format.LogicalType { 980 return &format.LogicalType{BSON: format.NewBsonType()} 981 } 982 983 func (BSONLogicalType) Equals(rhs LogicalType) bool { 984 _, ok := rhs.(BSONLogicalType) 985 return ok 986 } 987 988 // UUIDLogicalType can only be used with a FixedLength byte array column 989 // that is exactly 16 bytes long 990 type UUIDLogicalType struct{ baseLogicalType } 991 992 func (UUIDLogicalType) SortOrder() SortOrder { 993 return SortUNSIGNED 994 } 995 996 func (UUIDLogicalType) MarshalJSON() ([]byte, error) { 997 return json.Marshal(map[string]string{"Type": UUIDLogicalType{}.String()}) 998 } 999 1000 func (UUIDLogicalType) String() string { 1001 return "UUID" 1002 } 1003 1004 func (UUIDLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 1005 return ConvertedTypes.None, DecimalMetadata{} 1006 } 1007 1008 func (UUIDLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 1009 if dec.IsSet { 1010 return false 1011 } 1012 switch c { 1013 case ConvertedTypes.None, ConvertedTypes.NA: 1014 return true 1015 } 1016 return false 1017 } 1018 1019 func (UUIDLogicalType) IsApplicable(t parquet.Type, tlen int32) bool { 1020 return t == parquet.Types.FixedLenByteArray && tlen == 16 1021 } 1022 1023 func (UUIDLogicalType) toThrift() *format.LogicalType { 1024 return &format.LogicalType{UUID: format.NewUUIDType()} 1025 } 1026 1027 func (UUIDLogicalType) Equals(rhs LogicalType) bool { 1028 _, ok := rhs.(UUIDLogicalType) 1029 return ok 1030 } 1031 1032 // IntervalLogicalType is not yet in the thrift spec, but represents 1033 // an interval time and needs to be a fixed length byte array of 12 bytes 1034 type IntervalLogicalType struct{ baseLogicalType } 1035 1036 func (IntervalLogicalType) SortOrder() SortOrder { 1037 return SortUNKNOWN 1038 } 1039 1040 func (IntervalLogicalType) MarshalJSON() ([]byte, error) { 1041 return json.Marshal(map[string]string{"Type": IntervalLogicalType{}.String()}) 1042 } 1043 1044 func (IntervalLogicalType) String() string { 1045 return "Interval" 1046 } 1047 1048 func (IntervalLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 1049 return ConvertedTypes.Interval, DecimalMetadata{} 1050 } 1051 1052 func (IntervalLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 1053 return c == ConvertedTypes.Interval && !dec.IsSet 1054 } 1055 1056 func (IntervalLogicalType) IsApplicable(t parquet.Type, tlen int32) bool { 1057 return t == parquet.Types.FixedLenByteArray && tlen == 12 1058 } 1059 1060 func (IntervalLogicalType) toThrift() *format.LogicalType { 1061 panic("no parquet IntervalLogicalType yet implemented") 1062 } 1063 1064 func (IntervalLogicalType) Equals(rhs LogicalType) bool { 1065 _, ok := rhs.(IntervalLogicalType) 1066 return ok 1067 } 1068 1069 // Float16LogicalType can only be used with a FixedLength byte array column 1070 // that is exactly 2 bytes long 1071 type Float16LogicalType struct{ baseLogicalType } 1072 1073 func (Float16LogicalType) SortOrder() SortOrder { 1074 return SortSIGNED 1075 } 1076 1077 func (Float16LogicalType) MarshalJSON() ([]byte, error) { 1078 return json.Marshal(map[string]string{"Type": Float16LogicalType{}.String()}) 1079 } 1080 1081 func (Float16LogicalType) String() string { 1082 return "Float16" 1083 } 1084 1085 func (Float16LogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 1086 return ConvertedTypes.None, DecimalMetadata{} 1087 } 1088 1089 func (Float16LogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 1090 if dec.IsSet { 1091 return false 1092 } 1093 switch c { 1094 case ConvertedTypes.None, ConvertedTypes.NA: 1095 return true 1096 } 1097 return false 1098 } 1099 1100 func (Float16LogicalType) IsApplicable(t parquet.Type, tlen int32) bool { 1101 return t == parquet.Types.FixedLenByteArray && tlen == 2 1102 } 1103 1104 func (Float16LogicalType) toThrift() *format.LogicalType { 1105 return &format.LogicalType{FLOAT16: format.NewFloat16Type()} 1106 } 1107 1108 func (Float16LogicalType) Equals(rhs LogicalType) bool { 1109 _, ok := rhs.(Float16LogicalType) 1110 return ok 1111 } 1112 1113 type NullLogicalType struct{ baseLogicalType } 1114 1115 func (NullLogicalType) SortOrder() SortOrder { 1116 return SortUNKNOWN 1117 } 1118 1119 func (NullLogicalType) MarshalJSON() ([]byte, error) { 1120 return json.Marshal(map[string]string{"Type": NullLogicalType{}.String()}) 1121 } 1122 1123 func (NullLogicalType) String() string { 1124 return "Null" 1125 } 1126 1127 func (NullLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 1128 return ConvertedTypes.None, DecimalMetadata{} 1129 } 1130 1131 func (NullLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 1132 if dec.IsSet { 1133 return false 1134 } 1135 switch c { 1136 case ConvertedTypes.None, ConvertedTypes.NA: 1137 return true 1138 } 1139 return false 1140 } 1141 1142 func (NullLogicalType) IsApplicable(parquet.Type, int32) bool { 1143 return true 1144 } 1145 1146 func (NullLogicalType) toThrift() *format.LogicalType { 1147 return &format.LogicalType{UNKNOWN: format.NewNullType()} 1148 } 1149 1150 func (NullLogicalType) Equals(rhs LogicalType) bool { 1151 _, ok := rhs.(NullLogicalType) 1152 return ok 1153 } 1154 1155 type NoLogicalType struct{ baseLogicalType } 1156 1157 func (NoLogicalType) SortOrder() SortOrder { 1158 return SortUNKNOWN 1159 } 1160 1161 func (NoLogicalType) MarshalJSON() ([]byte, error) { 1162 return json.Marshal(map[string]string{"Type": NoLogicalType{}.String()}) 1163 } 1164 1165 func (NoLogicalType) IsSerialized() bool { return false } 1166 1167 func (NoLogicalType) String() string { 1168 return "None" 1169 } 1170 1171 func (NoLogicalType) ToConvertedType() (ConvertedType, DecimalMetadata) { 1172 return ConvertedTypes.None, DecimalMetadata{} 1173 } 1174 1175 func (NoLogicalType) IsCompatible(c ConvertedType, dec DecimalMetadata) bool { 1176 return c == ConvertedTypes.None && !dec.IsSet 1177 } 1178 1179 func (NoLogicalType) IsApplicable(parquet.Type, int32) bool { 1180 return true 1181 } 1182 1183 func (NoLogicalType) toThrift() *format.LogicalType { 1184 panic("cannot convert NoLogicalType to thrift") 1185 } 1186 1187 func (NoLogicalType) Equals(rhs LogicalType) bool { 1188 _, ok := rhs.(NoLogicalType) 1189 return ok 1190 } 1191 1192 func (NoLogicalType) IsNone() bool { return true }