github.com/apache/arrow/go/v16@v16.1.0/arrow/datatype_fixedwidth.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package arrow 18 19 import ( 20 "fmt" 21 "strconv" 22 "sync" 23 "time" 24 25 "github.com/apache/arrow/go/v16/internal/json" 26 27 "golang.org/x/xerrors" 28 ) 29 30 type BooleanType struct{} 31 32 func (t *BooleanType) ID() Type { return BOOL } 33 func (t *BooleanType) Name() string { return "bool" } 34 func (t *BooleanType) String() string { return "bool" } 35 func (t *BooleanType) Fingerprint() string { return typeFingerprint(t) } 36 func (BooleanType) Bytes() int { return 1 } 37 38 // BitWidth returns the number of bits required to store a single element of this data type in memory. 39 func (t *BooleanType) BitWidth() int { return 1 } 40 41 func (BooleanType) Layout() DataTypeLayout { 42 return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecBitmap()}} 43 } 44 45 type FixedSizeBinaryType struct { 46 ByteWidth int 47 } 48 49 func (*FixedSizeBinaryType) ID() Type { return FIXED_SIZE_BINARY } 50 func (*FixedSizeBinaryType) Name() string { return "fixed_size_binary" } 51 func (t *FixedSizeBinaryType) BitWidth() int { return 8 * t.ByteWidth } 52 func (t *FixedSizeBinaryType) Bytes() int { return t.ByteWidth } 53 func (t *FixedSizeBinaryType) Fingerprint() string { return typeFingerprint(t) } 54 func (t *FixedSizeBinaryType) String() string { 55 return "fixed_size_binary[" + strconv.Itoa(t.ByteWidth) + "]" 56 } 57 func (t *FixedSizeBinaryType) Layout() DataTypeLayout { 58 return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(t.ByteWidth)}} 59 } 60 61 type ( 62 Timestamp int64 63 Time32 int32 64 Time64 int64 65 TimeUnit int 66 Date32 int32 67 Date64 int64 68 Duration int64 69 ) 70 71 // Date32FromTime returns a Date32 value from a time object 72 func Date32FromTime(t time.Time) Date32 { 73 return Date32(t.Truncate(24*time.Hour).Unix() / int64((time.Hour * 24).Seconds())) 74 } 75 76 func (d Date32) ToTime() time.Time { 77 return time.Unix(0, 0).UTC().AddDate(0, 0, int(d)) 78 } 79 80 func (d Date32) FormattedString() string { 81 return d.ToTime().Format("2006-01-02") 82 } 83 84 // Date64FromTime returns a Date64 value from a time object 85 func Date64FromTime(t time.Time) Date64 { 86 // truncate to the start of the day to get the correct value 87 t = t.Truncate(24 * time.Hour) 88 return Date64(t.Unix()*1e3 + int64(t.Nanosecond())/1e6) 89 } 90 91 func (d Date64) ToTime() time.Time { 92 days := int(int64(d) / (time.Hour * 24).Milliseconds()) 93 return time.Unix(0, 0).UTC().AddDate(0, 0, days) 94 } 95 96 func (d Date64) FormattedString() string { 97 return d.ToTime().Format("2006-01-02") 98 } 99 100 // TimestampFromStringInLocation is like TimestampFromString, but treats the time instant 101 // as if it were in the provided timezone before converting to UTC for internal representation. 102 func TimestampFromStringInLocation(val string, unit TimeUnit, loc *time.Location) (Timestamp, bool, error) { 103 if len(val) < 10 { 104 return 0, false, fmt.Errorf("%w: invalid timestamp string", ErrInvalid) 105 } 106 107 var ( 108 format = "2006-01-02" 109 zoneFmt string 110 lenWithoutZone = len(val) 111 ) 112 113 if lenWithoutZone > 10 { 114 switch { 115 case val[len(val)-1] == 'Z': 116 zoneFmt = "Z" 117 lenWithoutZone-- 118 case val[len(val)-3] == '+' || val[len(val)-3] == '-': 119 zoneFmt = "-07" 120 lenWithoutZone -= 3 121 case val[len(val)-5] == '+' || val[len(val)-5] == '-': 122 zoneFmt = "-0700" 123 lenWithoutZone -= 5 124 case val[len(val)-6] == '+' || val[len(val)-6] == '-': 125 zoneFmt = "-07:00" 126 lenWithoutZone -= 6 127 } 128 } 129 130 switch { 131 case lenWithoutZone == 13: 132 format += string(val[10]) + "15" 133 case lenWithoutZone == 16: 134 format += string(val[10]) + "15:04" 135 case lenWithoutZone >= 19: 136 format += string(val[10]) + "15:04:05.999999999" 137 } 138 139 // error if we're truncating precision 140 // don't need a case for nano as time.Parse will already error if 141 // more than nanosecond precision is provided 142 switch { 143 case unit == Second && lenWithoutZone > 19: 144 return 0, zoneFmt != "", xerrors.New("provided more than second precision for timestamp[s]") 145 case unit == Millisecond && lenWithoutZone > 23: 146 return 0, zoneFmt != "", xerrors.New("provided more than millisecond precision for timestamp[ms]") 147 case unit == Microsecond && lenWithoutZone > 26: 148 return 0, zoneFmt != "", xerrors.New("provided more than microsecond precision for timestamp[us]") 149 } 150 151 format += zoneFmt 152 out, err := time.Parse(format, val) 153 if err != nil { 154 return 0, zoneFmt != "", fmt.Errorf("%w: %s", ErrInvalid, err) 155 } 156 if loc != time.UTC { 157 // convert to UTC by putting the same time instant in the desired location 158 // before converting to UTC 159 out = out.In(loc).UTC() 160 } 161 162 ts, err := TimestampFromTime(out, unit) 163 return ts, zoneFmt != "", err 164 } 165 166 // TimestampFromString parses a string and returns a timestamp for the given unit 167 // level. 168 // 169 // The timestamp should be in one of the following forms, [T] can be either T 170 // or a space, and [.zzzzzzzzz] can be either left out or up to 9 digits of 171 // fractions of a second. 172 // 173 // YYYY-MM-DD 174 // YYYY-MM-DD[T]HH 175 // YYYY-MM-DD[T]HH:MM 176 // YYYY-MM-DD[T]HH:MM:SS[.zzzzzzzz] 177 // 178 // You can also optionally have an ending Z to indicate UTC or indicate a specific 179 // timezone using ±HH, ±HHMM or ±HH:MM at the end of the string. 180 func TimestampFromString(val string, unit TimeUnit) (Timestamp, error) { 181 tm, _, err := TimestampFromStringInLocation(val, unit, time.UTC) 182 return tm, err 183 } 184 185 func (t Timestamp) ToTime(unit TimeUnit) time.Time { 186 switch unit { 187 case Second: 188 return time.Unix(int64(t), 0).UTC() 189 case Millisecond: 190 return time.UnixMilli(int64(t)).UTC() 191 case Microsecond: 192 return time.UnixMicro(int64(t)).UTC() 193 default: 194 return time.Unix(0, int64(t)).UTC() 195 } 196 } 197 198 // TimestampFromTime allows converting time.Time to Timestamp 199 func TimestampFromTime(val time.Time, unit TimeUnit) (Timestamp, error) { 200 switch unit { 201 case Second: 202 return Timestamp(val.Unix()), nil 203 case Millisecond: 204 return Timestamp(val.Unix()*1e3 + int64(val.Nanosecond())/1e6), nil 205 case Microsecond: 206 return Timestamp(val.Unix()*1e6 + int64(val.Nanosecond())/1e3), nil 207 case Nanosecond: 208 return Timestamp(val.UnixNano()), nil 209 default: 210 return 0, fmt.Errorf("%w: unexpected timestamp unit: %s", ErrInvalid, unit) 211 } 212 } 213 214 // Time32FromString parses a string to return a Time32 value in the given unit, 215 // unit needs to be only seconds or milliseconds and the string should be in the 216 // form of HH:MM or HH:MM:SS[.zzz] where the fractions of a second are optional. 217 func Time32FromString(val string, unit TimeUnit) (Time32, error) { 218 switch unit { 219 case Second: 220 if len(val) > 8 { 221 return 0, xerrors.New("cannot convert larger than second precision to time32s") 222 } 223 case Millisecond: 224 if len(val) > 12 { 225 return 0, xerrors.New("cannot convert larger than millisecond precision to time32ms") 226 } 227 case Microsecond, Nanosecond: 228 return 0, xerrors.New("time32 can only be seconds or milliseconds") 229 } 230 231 var ( 232 out time.Time 233 err error 234 ) 235 switch { 236 case len(val) == 5: 237 out, err = time.Parse("15:04", val) 238 default: 239 out, err = time.Parse("15:04:05.999", val) 240 } 241 if err != nil { 242 return 0, err 243 } 244 t := out.Sub(time.Date(0, 1, 1, 0, 0, 0, 0, time.UTC)) 245 if unit == Second { 246 return Time32(t.Seconds()), nil 247 } 248 return Time32(t.Milliseconds()), nil 249 } 250 251 func (t Time32) ToTime(unit TimeUnit) time.Time { 252 return time.Unix(0, int64(t)*int64(unit.Multiplier())).UTC() 253 } 254 255 func (t Time32) FormattedString(unit TimeUnit) string { 256 const baseFmt = "15:04:05" 257 tm := t.ToTime(unit) 258 switch unit { 259 case Second: 260 return tm.Format(baseFmt) 261 case Millisecond: 262 return tm.Format(baseFmt + ".000") 263 } 264 return "" 265 } 266 267 // Time64FromString parses a string to return a Time64 value in the given unit, 268 // unit needs to be only microseconds or nanoseconds and the string should be in the 269 // form of HH:MM or HH:MM:SS[.zzzzzzzzz] where the fractions of a second are optional. 270 func Time64FromString(val string, unit TimeUnit) (Time64, error) { 271 // don't need to check length for nanoseconds as Parse will already error 272 // if more than 9 digits are provided for the fractional second 273 switch unit { 274 case Microsecond: 275 if len(val) > 15 { 276 return 0, xerrors.New("cannot convert larger than microsecond precision to time64us") 277 } 278 case Second, Millisecond: 279 return 0, xerrors.New("time64 should only be microseconds or nanoseconds") 280 } 281 282 var ( 283 out time.Time 284 err error 285 ) 286 switch { 287 case len(val) == 5: 288 out, err = time.Parse("15:04", val) 289 default: 290 out, err = time.Parse("15:04:05.999999999", val) 291 } 292 if err != nil { 293 return 0, err 294 } 295 t := out.Sub(time.Date(0, 1, 1, 0, 0, 0, 0, time.UTC)) 296 if unit == Microsecond { 297 return Time64(t.Microseconds()), nil 298 } 299 return Time64(t.Nanoseconds()), nil 300 } 301 302 func (t Time64) ToTime(unit TimeUnit) time.Time { 303 return time.Unix(0, int64(t)*int64(unit.Multiplier())).UTC() 304 } 305 306 func (t Time64) FormattedString(unit TimeUnit) string { 307 const baseFmt = "15:04:05.000000" 308 tm := t.ToTime(unit) 309 switch unit { 310 case Microsecond: 311 return tm.Format(baseFmt) 312 case Nanosecond: 313 return tm.Format(baseFmt + "000") 314 } 315 return "" 316 } 317 318 const ( 319 Second TimeUnit = iota 320 Millisecond 321 Microsecond 322 Nanosecond 323 ) 324 325 var TimeUnitValues = []TimeUnit{Second, Millisecond, Microsecond, Nanosecond} 326 327 // Multiplier returns a time.Duration value to multiply by in order to 328 // convert the value into nanoseconds 329 func (u TimeUnit) Multiplier() time.Duration { 330 return [...]time.Duration{time.Second, time.Millisecond, time.Microsecond, time.Nanosecond}[uint(u)&3] 331 } 332 333 func (u TimeUnit) String() string { return [...]string{"s", "ms", "us", "ns"}[uint(u)&3] } 334 335 type TemporalWithUnit interface { 336 FixedWidthDataType 337 TimeUnit() TimeUnit 338 } 339 340 // TimestampType is encoded as a 64-bit signed integer since the UNIX epoch (2017-01-01T00:00:00Z). 341 // The zero-value is a second and time zone neutral. In Arrow semantics, time zone neutral does not 342 // represent a physical point in time, but rather a "wall clock" time that only has meaning within 343 // the context that produced it. In Go, time.Time can only represent instants; there is no notion 344 // of "wall clock" time. Therefore, time zone neutral timestamps are represented as UTC per Go 345 // conventions even though the Arrow type itself has no time zone. 346 type TimestampType struct { 347 Unit TimeUnit 348 TimeZone string 349 350 loc *time.Location 351 mx sync.RWMutex 352 } 353 354 func (*TimestampType) ID() Type { return TIMESTAMP } 355 func (*TimestampType) Name() string { return "timestamp" } 356 func (t *TimestampType) String() string { 357 switch len(t.TimeZone) { 358 case 0: 359 return "timestamp[" + t.Unit.String() + "]" 360 default: 361 return "timestamp[" + t.Unit.String() + ", tz=" + t.TimeZone + "]" 362 } 363 } 364 365 func (t *TimestampType) Fingerprint() string { 366 return fmt.Sprintf("%s%d:%s", typeFingerprint(t)+string(timeUnitFingerprint(t.Unit)), len(t.TimeZone), t.TimeZone) 367 } 368 369 // BitWidth returns the number of bits required to store a single element of this data type in memory. 370 func (*TimestampType) BitWidth() int { return 64 } 371 372 func (*TimestampType) Bytes() int { return Int64SizeBytes } 373 374 func (*TimestampType) Layout() DataTypeLayout { 375 return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(TimestampSizeBytes)}} 376 } 377 378 func (t *TimestampType) TimeUnit() TimeUnit { return t.Unit } 379 380 // ClearCachedLocation clears the cached time.Location object in the type. 381 // This should be called if you change the value of the TimeZone after having 382 // potentially called GetZone. 383 func (t *TimestampType) ClearCachedLocation() { 384 t.mx.Lock() 385 defer t.mx.Unlock() 386 t.loc = nil 387 } 388 389 // GetZone returns a *time.Location that represents the current TimeZone member 390 // of the TimestampType. If it is "", "UTC", or "utc", you'll get time.UTC. 391 // Otherwise it must either be a valid tzdata string such as "America/New_York" 392 // or of the format +HH:MM or -HH:MM indicating an absolute offset. 393 // 394 // The location object will be cached in the TimestampType for subsequent calls 395 // so if you change the value of TimeZone after calling this, make sure to call 396 // ClearCachedLocation. 397 func (t *TimestampType) GetZone() (*time.Location, error) { 398 t.mx.RLock() 399 if t.loc != nil { 400 defer t.mx.RUnlock() 401 return t.loc, nil 402 } 403 404 t.mx.RUnlock() 405 t.mx.Lock() 406 defer t.mx.Unlock() 407 // in case GetZone() was called in between releasing the read lock and 408 // getting the write lock 409 if t.loc != nil { 410 return t.loc, nil 411 } 412 // the TimeZone string is allowed to be either a valid tzdata string 413 // such as "America/New_York" or an absolute offset of the form -XX:XX 414 // or +XX:XX 415 // 416 // As such we have two methods we can try, first we'll try LoadLocation 417 // and if that fails, we'll test for an absolute offset. 418 if t.TimeZone == "" || t.TimeZone == "UTC" || t.TimeZone == "utc" { 419 t.loc = time.UTC 420 return time.UTC, nil 421 } 422 423 if loc, err := time.LoadLocation(t.TimeZone); err == nil { 424 t.loc = loc 425 return loc, err 426 } 427 428 // at this point we know that the timezone isn't empty, and didn't match 429 // anything in the tzdata names. So either it's an absolute offset 430 // or it's invalid. 431 timetz, err := time.Parse("-07:00", t.TimeZone) 432 if err != nil { 433 return time.UTC, fmt.Errorf("could not find timezone location for '%s'", t.TimeZone) 434 } 435 436 _, offset := timetz.Zone() 437 t.loc = time.FixedZone(t.TimeZone, offset) 438 return t.loc, nil 439 } 440 441 // GetToTimeFunc returns a function for converting an arrow.Timestamp value into a 442 // time.Time object with proper TimeZone and precision. If the TimeZone is invalid 443 // this will return an error. It calls GetZone to get the timezone for consistency. 444 func (t *TimestampType) GetToTimeFunc() (func(Timestamp) time.Time, error) { 445 tz, err := t.GetZone() 446 if err != nil { 447 return nil, err 448 } 449 450 return func(v Timestamp) time.Time { return v.ToTime(t.Unit).In(tz) }, nil 451 } 452 453 // Time32Type is encoded as a 32-bit signed integer, representing either seconds or milliseconds since midnight. 454 type Time32Type struct { 455 Unit TimeUnit 456 } 457 458 func (*Time32Type) ID() Type { return TIME32 } 459 func (*Time32Type) Name() string { return "time32" } 460 func (*Time32Type) BitWidth() int { return 32 } 461 func (*Time32Type) Bytes() int { return Int32SizeBytes } 462 func (t *Time32Type) String() string { return "time32[" + t.Unit.String() + "]" } 463 func (t *Time32Type) Fingerprint() string { 464 return typeFingerprint(t) + string(timeUnitFingerprint(t.Unit)) 465 } 466 467 func (Time32Type) Layout() DataTypeLayout { 468 return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Time32SizeBytes)}} 469 } 470 471 func (t *Time32Type) TimeUnit() TimeUnit { return t.Unit } 472 473 // Time64Type is encoded as a 64-bit signed integer, representing either microseconds or nanoseconds since midnight. 474 type Time64Type struct { 475 Unit TimeUnit 476 } 477 478 func (*Time64Type) ID() Type { return TIME64 } 479 func (*Time64Type) Name() string { return "time64" } 480 func (*Time64Type) BitWidth() int { return 64 } 481 func (*Time64Type) Bytes() int { return Int64SizeBytes } 482 func (t *Time64Type) String() string { return "time64[" + t.Unit.String() + "]" } 483 func (t *Time64Type) Fingerprint() string { 484 return typeFingerprint(t) + string(timeUnitFingerprint(t.Unit)) 485 } 486 487 func (Time64Type) Layout() DataTypeLayout { 488 return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Time64SizeBytes)}} 489 } 490 491 func (t *Time64Type) TimeUnit() TimeUnit { return t.Unit } 492 493 // DurationType is encoded as a 64-bit signed integer, representing an amount 494 // of elapsed time without any relation to a calendar artifact. 495 type DurationType struct { 496 Unit TimeUnit 497 } 498 499 func (*DurationType) ID() Type { return DURATION } 500 func (*DurationType) Name() string { return "duration" } 501 func (*DurationType) BitWidth() int { return 64 } 502 func (*DurationType) Bytes() int { return Int64SizeBytes } 503 func (t *DurationType) String() string { return "duration[" + t.Unit.String() + "]" } 504 func (t *DurationType) Fingerprint() string { 505 return typeFingerprint(t) + string(timeUnitFingerprint(t.Unit)) 506 } 507 508 func (DurationType) Layout() DataTypeLayout { 509 return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(DurationSizeBytes)}} 510 } 511 512 func (t *DurationType) TimeUnit() TimeUnit { return t.Unit } 513 514 // Float16Type represents a floating point value encoded with a 16-bit precision. 515 type Float16Type struct{} 516 517 func (t *Float16Type) ID() Type { return FLOAT16 } 518 func (t *Float16Type) Name() string { return "float16" } 519 func (t *Float16Type) String() string { return "float16" } 520 func (t *Float16Type) Fingerprint() string { return typeFingerprint(t) } 521 522 // BitWidth returns the number of bits required to store a single element of this data type in memory. 523 func (t *Float16Type) BitWidth() int { return 16 } 524 525 func (Float16Type) Bytes() int { return Float16SizeBytes } 526 527 func (Float16Type) Layout() DataTypeLayout { 528 return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Float16SizeBytes)}} 529 } 530 531 type DecimalType interface { 532 DataType 533 GetPrecision() int32 534 GetScale() int32 535 } 536 537 func NewDecimalType(id Type, prec, scale int32) (DecimalType, error) { 538 switch id { 539 case DECIMAL128: 540 return &Decimal128Type{Precision: prec, Scale: scale}, nil 541 case DECIMAL256: 542 return &Decimal256Type{Precision: prec, Scale: scale}, nil 543 default: 544 return nil, fmt.Errorf("%w: must use DECIMAL128 or DECIMAL256 to create a DecimalType", ErrInvalid) 545 } 546 } 547 548 // Decimal128Type represents a fixed-size 128-bit decimal type. 549 type Decimal128Type struct { 550 Precision int32 551 Scale int32 552 } 553 554 func (*Decimal128Type) ID() Type { return DECIMAL128 } 555 func (*Decimal128Type) Name() string { return "decimal" } 556 func (*Decimal128Type) BitWidth() int { return 128 } 557 func (*Decimal128Type) Bytes() int { return Decimal128SizeBytes } 558 func (t *Decimal128Type) String() string { 559 return fmt.Sprintf("%s(%d, %d)", t.Name(), t.Precision, t.Scale) 560 } 561 func (t *Decimal128Type) Fingerprint() string { 562 return fmt.Sprintf("%s[%d,%d,%d]", typeFingerprint(t), t.BitWidth(), t.Precision, t.Scale) 563 } 564 func (t *Decimal128Type) GetPrecision() int32 { return t.Precision } 565 func (t *Decimal128Type) GetScale() int32 { return t.Scale } 566 567 func (Decimal128Type) Layout() DataTypeLayout { 568 return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Decimal128SizeBytes)}} 569 } 570 571 // Decimal256Type represents a fixed-size 256-bit decimal type. 572 type Decimal256Type struct { 573 Precision int32 574 Scale int32 575 } 576 577 func (*Decimal256Type) ID() Type { return DECIMAL256 } 578 func (*Decimal256Type) Name() string { return "decimal256" } 579 func (*Decimal256Type) BitWidth() int { return 256 } 580 func (*Decimal256Type) Bytes() int { return Decimal256SizeBytes } 581 func (t *Decimal256Type) String() string { 582 return fmt.Sprintf("%s(%d, %d)", t.Name(), t.Precision, t.Scale) 583 } 584 func (t *Decimal256Type) Fingerprint() string { 585 return fmt.Sprintf("%s[%d,%d,%d]", typeFingerprint(t), t.BitWidth(), t.Precision, t.Scale) 586 } 587 func (t *Decimal256Type) GetPrecision() int32 { return t.Precision } 588 func (t *Decimal256Type) GetScale() int32 { return t.Scale } 589 590 func (Decimal256Type) Layout() DataTypeLayout { 591 return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Decimal256SizeBytes)}} 592 } 593 594 // MonthInterval represents a number of months. 595 type MonthInterval int32 596 597 func (m *MonthInterval) UnmarshalJSON(data []byte) error { 598 var val struct { 599 Months int32 `json:"months"` 600 } 601 if err := json.Unmarshal(data, &val); err != nil { 602 return err 603 } 604 605 *m = MonthInterval(val.Months) 606 return nil 607 } 608 609 func (m MonthInterval) MarshalJSON() ([]byte, error) { 610 return json.Marshal(struct { 611 Months int32 `json:"months"` 612 }{int32(m)}) 613 } 614 615 // MonthIntervalType is encoded as a 32-bit signed integer, 616 // representing a number of months. 617 type MonthIntervalType struct{} 618 619 func (*MonthIntervalType) ID() Type { return INTERVAL_MONTHS } 620 func (*MonthIntervalType) Name() string { return "month_interval" } 621 func (*MonthIntervalType) String() string { return "month_interval" } 622 func (*MonthIntervalType) Fingerprint() string { return typeIDFingerprint(INTERVAL_MONTHS) + "M" } 623 624 // BitWidth returns the number of bits required to store a single element of this data type in memory. 625 func (t *MonthIntervalType) BitWidth() int { return 32 } 626 627 func (MonthIntervalType) Bytes() int { return Int32SizeBytes } 628 func (MonthIntervalType) Layout() DataTypeLayout { 629 return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(MonthIntervalSizeBytes)}} 630 } 631 632 // DayTimeInterval represents a number of days and milliseconds (fraction of day). 633 type DayTimeInterval struct { 634 Days int32 `json:"days"` 635 Milliseconds int32 `json:"milliseconds"` 636 } 637 638 // DayTimeIntervalType is encoded as a pair of 32-bit signed integer, 639 // representing a number of days and milliseconds (fraction of day). 640 type DayTimeIntervalType struct{} 641 642 func (*DayTimeIntervalType) ID() Type { return INTERVAL_DAY_TIME } 643 func (*DayTimeIntervalType) Name() string { return "day_time_interval" } 644 func (*DayTimeIntervalType) String() string { return "day_time_interval" } 645 func (*DayTimeIntervalType) Fingerprint() string { return typeIDFingerprint(INTERVAL_DAY_TIME) + "d" } 646 647 // BitWidth returns the number of bits required to store a single element of this data type in memory. 648 func (t *DayTimeIntervalType) BitWidth() int { return 64 } 649 650 func (DayTimeIntervalType) Bytes() int { return DayTimeIntervalSizeBytes } 651 func (DayTimeIntervalType) Layout() DataTypeLayout { 652 return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(DayTimeIntervalSizeBytes)}} 653 } 654 655 // MonthDayNanoInterval represents a number of months, days and nanoseconds (fraction of day). 656 type MonthDayNanoInterval struct { 657 Months int32 `json:"months"` 658 Days int32 `json:"days"` 659 Nanoseconds int64 `json:"nanoseconds"` 660 } 661 662 // MonthDayNanoIntervalType is encoded as two signed 32-bit integers representing 663 // a number of months and a number of days, followed by a 64-bit integer representing 664 // the number of nanoseconds since midnight for fractions of a day. 665 type MonthDayNanoIntervalType struct{} 666 667 func (*MonthDayNanoIntervalType) ID() Type { return INTERVAL_MONTH_DAY_NANO } 668 func (*MonthDayNanoIntervalType) Name() string { return "month_day_nano_interval" } 669 func (*MonthDayNanoIntervalType) String() string { return "month_day_nano_interval" } 670 func (*MonthDayNanoIntervalType) Fingerprint() string { 671 return typeIDFingerprint(INTERVAL_MONTH_DAY_NANO) + "N" 672 } 673 674 // BitWidth returns the number of bits required to store a single element of this data type in memory. 675 func (*MonthDayNanoIntervalType) BitWidth() int { return 128 } 676 func (*MonthDayNanoIntervalType) Bytes() int { return MonthDayNanoIntervalSizeBytes } 677 func (MonthDayNanoIntervalType) Layout() DataTypeLayout { 678 return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(MonthDayNanoIntervalSizeBytes)}} 679 } 680 681 type TimestampConvertOp int8 682 683 const ( 684 ConvDIVIDE = iota 685 ConvMULTIPLY 686 ) 687 688 var timestampConversion = [...][4]struct { 689 op TimestampConvertOp 690 factor int64 691 }{ 692 Nanosecond: { 693 Nanosecond: {ConvMULTIPLY, int64(time.Nanosecond)}, 694 Microsecond: {ConvDIVIDE, int64(time.Microsecond)}, 695 Millisecond: {ConvDIVIDE, int64(time.Millisecond)}, 696 Second: {ConvDIVIDE, int64(time.Second)}, 697 }, 698 Microsecond: { 699 Nanosecond: {ConvMULTIPLY, int64(time.Microsecond)}, 700 Microsecond: {ConvMULTIPLY, 1}, 701 Millisecond: {ConvDIVIDE, int64(time.Millisecond / time.Microsecond)}, 702 Second: {ConvDIVIDE, int64(time.Second / time.Microsecond)}, 703 }, 704 Millisecond: { 705 Nanosecond: {ConvMULTIPLY, int64(time.Millisecond)}, 706 Microsecond: {ConvMULTIPLY, int64(time.Millisecond / time.Microsecond)}, 707 Millisecond: {ConvMULTIPLY, 1}, 708 Second: {ConvDIVIDE, int64(time.Second / time.Millisecond)}, 709 }, 710 Second: { 711 Nanosecond: {ConvMULTIPLY, int64(time.Second)}, 712 Microsecond: {ConvMULTIPLY, int64(time.Second / time.Microsecond)}, 713 Millisecond: {ConvMULTIPLY, int64(time.Second / time.Millisecond)}, 714 Second: {ConvMULTIPLY, 1}, 715 }, 716 } 717 718 func GetTimestampConvert(in, out TimeUnit) (op TimestampConvertOp, factor int64) { 719 conv := timestampConversion[int(in)][int(out)] 720 return conv.op, conv.factor 721 } 722 723 func ConvertTimestampValue(in, out TimeUnit, value int64) int64 { 724 conv := timestampConversion[int(in)][int(out)] 725 switch conv.op { 726 case ConvMULTIPLY: 727 return value * conv.factor 728 case ConvDIVIDE: 729 return value / conv.factor 730 } 731 732 return 0 733 } 734 735 // DictionaryType represents categorical or dictionary-encoded in-memory data 736 // It contains a dictionary-encoded value type (any type) and an index type 737 // (any integer type). 738 type DictionaryType struct { 739 IndexType DataType 740 ValueType DataType 741 Ordered bool 742 } 743 744 func (*DictionaryType) ID() Type { return DICTIONARY } 745 func (*DictionaryType) Name() string { return "dictionary" } 746 func (d *DictionaryType) BitWidth() int { return d.IndexType.(FixedWidthDataType).BitWidth() } 747 func (d *DictionaryType) Bytes() int { return d.IndexType.(FixedWidthDataType).Bytes() } 748 func (d *DictionaryType) String() string { 749 return fmt.Sprintf("%s<values=%s, indices=%s, ordered=%t>", 750 d.Name(), d.ValueType, d.IndexType, d.Ordered) 751 } 752 func (d *DictionaryType) Fingerprint() string { 753 indexFingerprint := d.IndexType.Fingerprint() 754 valueFingerprint := d.ValueType.Fingerprint() 755 ordered := "1" 756 if !d.Ordered { 757 ordered = "0" 758 } 759 760 if len(valueFingerprint) > 0 { 761 return typeFingerprint(d) + indexFingerprint + valueFingerprint + ordered 762 } 763 return ordered 764 } 765 766 func (d *DictionaryType) Layout() DataTypeLayout { 767 layout := d.IndexType.Layout() 768 layout.HasDict = true 769 return layout 770 } 771 772 var ( 773 FixedWidthTypes = struct { 774 Boolean FixedWidthDataType 775 Date32 FixedWidthDataType 776 Date64 FixedWidthDataType 777 DayTimeInterval FixedWidthDataType 778 Duration_s FixedWidthDataType 779 Duration_ms FixedWidthDataType 780 Duration_us FixedWidthDataType 781 Duration_ns FixedWidthDataType 782 Float16 FixedWidthDataType 783 MonthInterval FixedWidthDataType 784 Time32s FixedWidthDataType 785 Time32ms FixedWidthDataType 786 Time64us FixedWidthDataType 787 Time64ns FixedWidthDataType 788 Timestamp_s FixedWidthDataType 789 Timestamp_ms FixedWidthDataType 790 Timestamp_us FixedWidthDataType 791 Timestamp_ns FixedWidthDataType 792 MonthDayNanoInterval FixedWidthDataType 793 }{ 794 Boolean: &BooleanType{}, 795 Date32: &Date32Type{}, 796 Date64: &Date64Type{}, 797 DayTimeInterval: &DayTimeIntervalType{}, 798 Duration_s: &DurationType{Unit: Second}, 799 Duration_ms: &DurationType{Unit: Millisecond}, 800 Duration_us: &DurationType{Unit: Microsecond}, 801 Duration_ns: &DurationType{Unit: Nanosecond}, 802 Float16: &Float16Type{}, 803 MonthInterval: &MonthIntervalType{}, 804 Time32s: &Time32Type{Unit: Second}, 805 Time32ms: &Time32Type{Unit: Millisecond}, 806 Time64us: &Time64Type{Unit: Microsecond}, 807 Time64ns: &Time64Type{Unit: Nanosecond}, 808 Timestamp_s: &TimestampType{Unit: Second, TimeZone: "UTC"}, 809 Timestamp_ms: &TimestampType{Unit: Millisecond, TimeZone: "UTC"}, 810 Timestamp_us: &TimestampType{Unit: Microsecond, TimeZone: "UTC"}, 811 Timestamp_ns: &TimestampType{Unit: Nanosecond, TimeZone: "UTC"}, 812 MonthDayNanoInterval: &MonthDayNanoIntervalType{}, 813 } 814 815 _ FixedWidthDataType = (*FixedSizeBinaryType)(nil) 816 )