github.com/apache/arrow/go/v16@v16.1.0/arrow/datatype_fixedwidth.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package arrow
    18  
    19  import (
    20  	"fmt"
    21  	"strconv"
    22  	"sync"
    23  	"time"
    24  
    25  	"github.com/apache/arrow/go/v16/internal/json"
    26  
    27  	"golang.org/x/xerrors"
    28  )
    29  
    30  type BooleanType struct{}
    31  
    32  func (t *BooleanType) ID() Type            { return BOOL }
    33  func (t *BooleanType) Name() string        { return "bool" }
    34  func (t *BooleanType) String() string      { return "bool" }
    35  func (t *BooleanType) Fingerprint() string { return typeFingerprint(t) }
    36  func (BooleanType) Bytes() int             { return 1 }
    37  
    38  // BitWidth returns the number of bits required to store a single element of this data type in memory.
    39  func (t *BooleanType) BitWidth() int { return 1 }
    40  
    41  func (BooleanType) Layout() DataTypeLayout {
    42  	return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecBitmap()}}
    43  }
    44  
    45  type FixedSizeBinaryType struct {
    46  	ByteWidth int
    47  }
    48  
    49  func (*FixedSizeBinaryType) ID() Type              { return FIXED_SIZE_BINARY }
    50  func (*FixedSizeBinaryType) Name() string          { return "fixed_size_binary" }
    51  func (t *FixedSizeBinaryType) BitWidth() int       { return 8 * t.ByteWidth }
    52  func (t *FixedSizeBinaryType) Bytes() int          { return t.ByteWidth }
    53  func (t *FixedSizeBinaryType) Fingerprint() string { return typeFingerprint(t) }
    54  func (t *FixedSizeBinaryType) String() string {
    55  	return "fixed_size_binary[" + strconv.Itoa(t.ByteWidth) + "]"
    56  }
    57  func (t *FixedSizeBinaryType) Layout() DataTypeLayout {
    58  	return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(t.ByteWidth)}}
    59  }
    60  
    61  type (
    62  	Timestamp int64
    63  	Time32    int32
    64  	Time64    int64
    65  	TimeUnit  int
    66  	Date32    int32
    67  	Date64    int64
    68  	Duration  int64
    69  )
    70  
    71  // Date32FromTime returns a Date32 value from a time object
    72  func Date32FromTime(t time.Time) Date32 {
    73  	return Date32(t.Truncate(24*time.Hour).Unix() / int64((time.Hour * 24).Seconds()))
    74  }
    75  
    76  func (d Date32) ToTime() time.Time {
    77  	return time.Unix(0, 0).UTC().AddDate(0, 0, int(d))
    78  }
    79  
    80  func (d Date32) FormattedString() string {
    81  	return d.ToTime().Format("2006-01-02")
    82  }
    83  
    84  // Date64FromTime returns a Date64 value from a time object
    85  func Date64FromTime(t time.Time) Date64 {
    86  	// truncate to the start of the day to get the correct value
    87  	t = t.Truncate(24 * time.Hour)
    88  	return Date64(t.Unix()*1e3 + int64(t.Nanosecond())/1e6)
    89  }
    90  
    91  func (d Date64) ToTime() time.Time {
    92  	days := int(int64(d) / (time.Hour * 24).Milliseconds())
    93  	return time.Unix(0, 0).UTC().AddDate(0, 0, days)
    94  }
    95  
    96  func (d Date64) FormattedString() string {
    97  	return d.ToTime().Format("2006-01-02")
    98  }
    99  
   100  // TimestampFromStringInLocation is like TimestampFromString, but treats the time instant
   101  // as if it were in the provided timezone before converting to UTC for internal representation.
   102  func TimestampFromStringInLocation(val string, unit TimeUnit, loc *time.Location) (Timestamp, bool, error) {
   103  	if len(val) < 10 {
   104  		return 0, false, fmt.Errorf("%w: invalid timestamp string", ErrInvalid)
   105  	}
   106  
   107  	var (
   108  		format         = "2006-01-02"
   109  		zoneFmt        string
   110  		lenWithoutZone = len(val)
   111  	)
   112  
   113  	if lenWithoutZone > 10 {
   114  		switch {
   115  		case val[len(val)-1] == 'Z':
   116  			zoneFmt = "Z"
   117  			lenWithoutZone--
   118  		case val[len(val)-3] == '+' || val[len(val)-3] == '-':
   119  			zoneFmt = "-07"
   120  			lenWithoutZone -= 3
   121  		case val[len(val)-5] == '+' || val[len(val)-5] == '-':
   122  			zoneFmt = "-0700"
   123  			lenWithoutZone -= 5
   124  		case val[len(val)-6] == '+' || val[len(val)-6] == '-':
   125  			zoneFmt = "-07:00"
   126  			lenWithoutZone -= 6
   127  		}
   128  	}
   129  
   130  	switch {
   131  	case lenWithoutZone == 13:
   132  		format += string(val[10]) + "15"
   133  	case lenWithoutZone == 16:
   134  		format += string(val[10]) + "15:04"
   135  	case lenWithoutZone >= 19:
   136  		format += string(val[10]) + "15:04:05.999999999"
   137  	}
   138  
   139  	// error if we're truncating precision
   140  	// don't need a case for nano as time.Parse will already error if
   141  	// more than nanosecond precision is provided
   142  	switch {
   143  	case unit == Second && lenWithoutZone > 19:
   144  		return 0, zoneFmt != "", xerrors.New("provided more than second precision for timestamp[s]")
   145  	case unit == Millisecond && lenWithoutZone > 23:
   146  		return 0, zoneFmt != "", xerrors.New("provided more than millisecond precision for timestamp[ms]")
   147  	case unit == Microsecond && lenWithoutZone > 26:
   148  		return 0, zoneFmt != "", xerrors.New("provided more than microsecond precision for timestamp[us]")
   149  	}
   150  
   151  	format += zoneFmt
   152  	out, err := time.Parse(format, val)
   153  	if err != nil {
   154  		return 0, zoneFmt != "", fmt.Errorf("%w: %s", ErrInvalid, err)
   155  	}
   156  	if loc != time.UTC {
   157  		// convert to UTC by putting the same time instant in the desired location
   158  		// before converting to UTC
   159  		out = out.In(loc).UTC()
   160  	}
   161  
   162  	ts, err := TimestampFromTime(out, unit)
   163  	return ts, zoneFmt != "", err
   164  }
   165  
   166  // TimestampFromString parses a string and returns a timestamp for the given unit
   167  // level.
   168  //
   169  // The timestamp should be in one of the following forms, [T] can be either T
   170  // or a space, and [.zzzzzzzzz] can be either left out or up to 9 digits of
   171  // fractions of a second.
   172  //
   173  //	YYYY-MM-DD
   174  //	YYYY-MM-DD[T]HH
   175  //	YYYY-MM-DD[T]HH:MM
   176  //	YYYY-MM-DD[T]HH:MM:SS[.zzzzzzzz]
   177  //
   178  // You can also optionally have an ending Z to indicate UTC or indicate a specific
   179  // timezone using ±HH, ±HHMM or ±HH:MM at the end of the string.
   180  func TimestampFromString(val string, unit TimeUnit) (Timestamp, error) {
   181  	tm, _, err := TimestampFromStringInLocation(val, unit, time.UTC)
   182  	return tm, err
   183  }
   184  
   185  func (t Timestamp) ToTime(unit TimeUnit) time.Time {
   186  	switch unit {
   187  	case Second:
   188  		return time.Unix(int64(t), 0).UTC()
   189  	case Millisecond:
   190  		return time.UnixMilli(int64(t)).UTC()
   191  	case Microsecond:
   192  		return time.UnixMicro(int64(t)).UTC()
   193  	default:
   194  		return time.Unix(0, int64(t)).UTC()
   195  	}
   196  }
   197  
   198  // TimestampFromTime allows converting time.Time to Timestamp
   199  func TimestampFromTime(val time.Time, unit TimeUnit) (Timestamp, error) {
   200  	switch unit {
   201  	case Second:
   202  		return Timestamp(val.Unix()), nil
   203  	case Millisecond:
   204  		return Timestamp(val.Unix()*1e3 + int64(val.Nanosecond())/1e6), nil
   205  	case Microsecond:
   206  		return Timestamp(val.Unix()*1e6 + int64(val.Nanosecond())/1e3), nil
   207  	case Nanosecond:
   208  		return Timestamp(val.UnixNano()), nil
   209  	default:
   210  		return 0, fmt.Errorf("%w: unexpected timestamp unit: %s", ErrInvalid, unit)
   211  	}
   212  }
   213  
   214  // Time32FromString parses a string to return a Time32 value in the given unit,
   215  // unit needs to be only seconds or milliseconds and the string should be in the
   216  // form of HH:MM or HH:MM:SS[.zzz] where the fractions of a second are optional.
   217  func Time32FromString(val string, unit TimeUnit) (Time32, error) {
   218  	switch unit {
   219  	case Second:
   220  		if len(val) > 8 {
   221  			return 0, xerrors.New("cannot convert larger than second precision to time32s")
   222  		}
   223  	case Millisecond:
   224  		if len(val) > 12 {
   225  			return 0, xerrors.New("cannot convert larger than millisecond precision to time32ms")
   226  		}
   227  	case Microsecond, Nanosecond:
   228  		return 0, xerrors.New("time32 can only be seconds or milliseconds")
   229  	}
   230  
   231  	var (
   232  		out time.Time
   233  		err error
   234  	)
   235  	switch {
   236  	case len(val) == 5:
   237  		out, err = time.Parse("15:04", val)
   238  	default:
   239  		out, err = time.Parse("15:04:05.999", val)
   240  	}
   241  	if err != nil {
   242  		return 0, err
   243  	}
   244  	t := out.Sub(time.Date(0, 1, 1, 0, 0, 0, 0, time.UTC))
   245  	if unit == Second {
   246  		return Time32(t.Seconds()), nil
   247  	}
   248  	return Time32(t.Milliseconds()), nil
   249  }
   250  
   251  func (t Time32) ToTime(unit TimeUnit) time.Time {
   252  	return time.Unix(0, int64(t)*int64(unit.Multiplier())).UTC()
   253  }
   254  
   255  func (t Time32) FormattedString(unit TimeUnit) string {
   256  	const baseFmt = "15:04:05"
   257  	tm := t.ToTime(unit)
   258  	switch unit {
   259  	case Second:
   260  		return tm.Format(baseFmt)
   261  	case Millisecond:
   262  		return tm.Format(baseFmt + ".000")
   263  	}
   264  	return ""
   265  }
   266  
   267  // Time64FromString parses a string to return a Time64 value in the given unit,
   268  // unit needs to be only microseconds or nanoseconds and the string should be in the
   269  // form of HH:MM or HH:MM:SS[.zzzzzzzzz] where the fractions of a second are optional.
   270  func Time64FromString(val string, unit TimeUnit) (Time64, error) {
   271  	// don't need to check length for nanoseconds as Parse will already error
   272  	// if more than 9 digits are provided for the fractional second
   273  	switch unit {
   274  	case Microsecond:
   275  		if len(val) > 15 {
   276  			return 0, xerrors.New("cannot convert larger than microsecond precision to time64us")
   277  		}
   278  	case Second, Millisecond:
   279  		return 0, xerrors.New("time64 should only be microseconds or nanoseconds")
   280  	}
   281  
   282  	var (
   283  		out time.Time
   284  		err error
   285  	)
   286  	switch {
   287  	case len(val) == 5:
   288  		out, err = time.Parse("15:04", val)
   289  	default:
   290  		out, err = time.Parse("15:04:05.999999999", val)
   291  	}
   292  	if err != nil {
   293  		return 0, err
   294  	}
   295  	t := out.Sub(time.Date(0, 1, 1, 0, 0, 0, 0, time.UTC))
   296  	if unit == Microsecond {
   297  		return Time64(t.Microseconds()), nil
   298  	}
   299  	return Time64(t.Nanoseconds()), nil
   300  }
   301  
   302  func (t Time64) ToTime(unit TimeUnit) time.Time {
   303  	return time.Unix(0, int64(t)*int64(unit.Multiplier())).UTC()
   304  }
   305  
   306  func (t Time64) FormattedString(unit TimeUnit) string {
   307  	const baseFmt = "15:04:05.000000"
   308  	tm := t.ToTime(unit)
   309  	switch unit {
   310  	case Microsecond:
   311  		return tm.Format(baseFmt)
   312  	case Nanosecond:
   313  		return tm.Format(baseFmt + "000")
   314  	}
   315  	return ""
   316  }
   317  
   318  const (
   319  	Second TimeUnit = iota
   320  	Millisecond
   321  	Microsecond
   322  	Nanosecond
   323  )
   324  
   325  var TimeUnitValues = []TimeUnit{Second, Millisecond, Microsecond, Nanosecond}
   326  
   327  // Multiplier returns a time.Duration value to multiply by in order to
   328  // convert the value into nanoseconds
   329  func (u TimeUnit) Multiplier() time.Duration {
   330  	return [...]time.Duration{time.Second, time.Millisecond, time.Microsecond, time.Nanosecond}[uint(u)&3]
   331  }
   332  
   333  func (u TimeUnit) String() string { return [...]string{"s", "ms", "us", "ns"}[uint(u)&3] }
   334  
   335  type TemporalWithUnit interface {
   336  	FixedWidthDataType
   337  	TimeUnit() TimeUnit
   338  }
   339  
   340  // TimestampType is encoded as a 64-bit signed integer since the UNIX epoch (2017-01-01T00:00:00Z).
   341  // The zero-value is a second and time zone neutral. In Arrow semantics, time zone neutral does not
   342  // represent a physical point in time, but rather a "wall clock" time that only has meaning within
   343  // the context that produced it. In Go, time.Time can only represent instants; there is no notion
   344  // of "wall clock" time. Therefore, time zone neutral timestamps are represented as UTC per Go
   345  // conventions even though the Arrow type itself has no time zone.
   346  type TimestampType struct {
   347  	Unit     TimeUnit
   348  	TimeZone string
   349  
   350  	loc *time.Location
   351  	mx  sync.RWMutex
   352  }
   353  
   354  func (*TimestampType) ID() Type     { return TIMESTAMP }
   355  func (*TimestampType) Name() string { return "timestamp" }
   356  func (t *TimestampType) String() string {
   357  	switch len(t.TimeZone) {
   358  	case 0:
   359  		return "timestamp[" + t.Unit.String() + "]"
   360  	default:
   361  		return "timestamp[" + t.Unit.String() + ", tz=" + t.TimeZone + "]"
   362  	}
   363  }
   364  
   365  func (t *TimestampType) Fingerprint() string {
   366  	return fmt.Sprintf("%s%d:%s", typeFingerprint(t)+string(timeUnitFingerprint(t.Unit)), len(t.TimeZone), t.TimeZone)
   367  }
   368  
   369  // BitWidth returns the number of bits required to store a single element of this data type in memory.
   370  func (*TimestampType) BitWidth() int { return 64 }
   371  
   372  func (*TimestampType) Bytes() int { return Int64SizeBytes }
   373  
   374  func (*TimestampType) Layout() DataTypeLayout {
   375  	return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(TimestampSizeBytes)}}
   376  }
   377  
   378  func (t *TimestampType) TimeUnit() TimeUnit { return t.Unit }
   379  
   380  // ClearCachedLocation clears the cached time.Location object in the type.
   381  // This should be called if you change the value of the TimeZone after having
   382  // potentially called GetZone.
   383  func (t *TimestampType) ClearCachedLocation() {
   384  	t.mx.Lock()
   385  	defer t.mx.Unlock()
   386  	t.loc = nil
   387  }
   388  
   389  // GetZone returns a *time.Location that represents the current TimeZone member
   390  // of the TimestampType. If it is "", "UTC", or "utc", you'll get time.UTC.
   391  // Otherwise it must either be a valid tzdata string such as "America/New_York"
   392  // or of the format +HH:MM or -HH:MM indicating an absolute offset.
   393  //
   394  // The location object will be cached in the TimestampType for subsequent calls
   395  // so if you change the value of TimeZone after calling this, make sure to call
   396  // ClearCachedLocation.
   397  func (t *TimestampType) GetZone() (*time.Location, error) {
   398  	t.mx.RLock()
   399  	if t.loc != nil {
   400  		defer t.mx.RUnlock()
   401  		return t.loc, nil
   402  	}
   403  
   404  	t.mx.RUnlock()
   405  	t.mx.Lock()
   406  	defer t.mx.Unlock()
   407  	// in case GetZone() was called in between releasing the read lock and
   408  	// getting the write lock
   409  	if t.loc != nil {
   410  		return t.loc, nil
   411  	}
   412  	// the TimeZone string is allowed to be either a valid tzdata string
   413  	// such as "America/New_York" or an absolute offset of the form -XX:XX
   414  	// or +XX:XX
   415  	//
   416  	// As such we have two methods we can try, first we'll try LoadLocation
   417  	// and if that fails, we'll test for an absolute offset.
   418  	if t.TimeZone == "" || t.TimeZone == "UTC" || t.TimeZone == "utc" {
   419  		t.loc = time.UTC
   420  		return time.UTC, nil
   421  	}
   422  
   423  	if loc, err := time.LoadLocation(t.TimeZone); err == nil {
   424  		t.loc = loc
   425  		return loc, err
   426  	}
   427  
   428  	// at this point we know that the timezone isn't empty, and didn't match
   429  	// anything in the tzdata names. So either it's an absolute offset
   430  	// or it's invalid.
   431  	timetz, err := time.Parse("-07:00", t.TimeZone)
   432  	if err != nil {
   433  		return time.UTC, fmt.Errorf("could not find timezone location for '%s'", t.TimeZone)
   434  	}
   435  
   436  	_, offset := timetz.Zone()
   437  	t.loc = time.FixedZone(t.TimeZone, offset)
   438  	return t.loc, nil
   439  }
   440  
   441  // GetToTimeFunc returns a function for converting an arrow.Timestamp value into a
   442  // time.Time object with proper TimeZone and precision. If the TimeZone is invalid
   443  // this will return an error. It calls GetZone to get the timezone for consistency.
   444  func (t *TimestampType) GetToTimeFunc() (func(Timestamp) time.Time, error) {
   445  	tz, err := t.GetZone()
   446  	if err != nil {
   447  		return nil, err
   448  	}
   449  
   450  	return func(v Timestamp) time.Time { return v.ToTime(t.Unit).In(tz) }, nil
   451  }
   452  
   453  // Time32Type is encoded as a 32-bit signed integer, representing either seconds or milliseconds since midnight.
   454  type Time32Type struct {
   455  	Unit TimeUnit
   456  }
   457  
   458  func (*Time32Type) ID() Type         { return TIME32 }
   459  func (*Time32Type) Name() string     { return "time32" }
   460  func (*Time32Type) BitWidth() int    { return 32 }
   461  func (*Time32Type) Bytes() int       { return Int32SizeBytes }
   462  func (t *Time32Type) String() string { return "time32[" + t.Unit.String() + "]" }
   463  func (t *Time32Type) Fingerprint() string {
   464  	return typeFingerprint(t) + string(timeUnitFingerprint(t.Unit))
   465  }
   466  
   467  func (Time32Type) Layout() DataTypeLayout {
   468  	return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Time32SizeBytes)}}
   469  }
   470  
   471  func (t *Time32Type) TimeUnit() TimeUnit { return t.Unit }
   472  
   473  // Time64Type is encoded as a 64-bit signed integer, representing either microseconds or nanoseconds since midnight.
   474  type Time64Type struct {
   475  	Unit TimeUnit
   476  }
   477  
   478  func (*Time64Type) ID() Type         { return TIME64 }
   479  func (*Time64Type) Name() string     { return "time64" }
   480  func (*Time64Type) BitWidth() int    { return 64 }
   481  func (*Time64Type) Bytes() int       { return Int64SizeBytes }
   482  func (t *Time64Type) String() string { return "time64[" + t.Unit.String() + "]" }
   483  func (t *Time64Type) Fingerprint() string {
   484  	return typeFingerprint(t) + string(timeUnitFingerprint(t.Unit))
   485  }
   486  
   487  func (Time64Type) Layout() DataTypeLayout {
   488  	return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Time64SizeBytes)}}
   489  }
   490  
   491  func (t *Time64Type) TimeUnit() TimeUnit { return t.Unit }
   492  
   493  // DurationType is encoded as a 64-bit signed integer, representing an amount
   494  // of elapsed time without any relation to a calendar artifact.
   495  type DurationType struct {
   496  	Unit TimeUnit
   497  }
   498  
   499  func (*DurationType) ID() Type         { return DURATION }
   500  func (*DurationType) Name() string     { return "duration" }
   501  func (*DurationType) BitWidth() int    { return 64 }
   502  func (*DurationType) Bytes() int       { return Int64SizeBytes }
   503  func (t *DurationType) String() string { return "duration[" + t.Unit.String() + "]" }
   504  func (t *DurationType) Fingerprint() string {
   505  	return typeFingerprint(t) + string(timeUnitFingerprint(t.Unit))
   506  }
   507  
   508  func (DurationType) Layout() DataTypeLayout {
   509  	return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(DurationSizeBytes)}}
   510  }
   511  
   512  func (t *DurationType) TimeUnit() TimeUnit { return t.Unit }
   513  
   514  // Float16Type represents a floating point value encoded with a 16-bit precision.
   515  type Float16Type struct{}
   516  
   517  func (t *Float16Type) ID() Type            { return FLOAT16 }
   518  func (t *Float16Type) Name() string        { return "float16" }
   519  func (t *Float16Type) String() string      { return "float16" }
   520  func (t *Float16Type) Fingerprint() string { return typeFingerprint(t) }
   521  
   522  // BitWidth returns the number of bits required to store a single element of this data type in memory.
   523  func (t *Float16Type) BitWidth() int { return 16 }
   524  
   525  func (Float16Type) Bytes() int { return Float16SizeBytes }
   526  
   527  func (Float16Type) Layout() DataTypeLayout {
   528  	return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Float16SizeBytes)}}
   529  }
   530  
   531  type DecimalType interface {
   532  	DataType
   533  	GetPrecision() int32
   534  	GetScale() int32
   535  }
   536  
   537  func NewDecimalType(id Type, prec, scale int32) (DecimalType, error) {
   538  	switch id {
   539  	case DECIMAL128:
   540  		return &Decimal128Type{Precision: prec, Scale: scale}, nil
   541  	case DECIMAL256:
   542  		return &Decimal256Type{Precision: prec, Scale: scale}, nil
   543  	default:
   544  		return nil, fmt.Errorf("%w: must use DECIMAL128 or DECIMAL256 to create a DecimalType", ErrInvalid)
   545  	}
   546  }
   547  
   548  // Decimal128Type represents a fixed-size 128-bit decimal type.
   549  type Decimal128Type struct {
   550  	Precision int32
   551  	Scale     int32
   552  }
   553  
   554  func (*Decimal128Type) ID() Type      { return DECIMAL128 }
   555  func (*Decimal128Type) Name() string  { return "decimal" }
   556  func (*Decimal128Type) BitWidth() int { return 128 }
   557  func (*Decimal128Type) Bytes() int    { return Decimal128SizeBytes }
   558  func (t *Decimal128Type) String() string {
   559  	return fmt.Sprintf("%s(%d, %d)", t.Name(), t.Precision, t.Scale)
   560  }
   561  func (t *Decimal128Type) Fingerprint() string {
   562  	return fmt.Sprintf("%s[%d,%d,%d]", typeFingerprint(t), t.BitWidth(), t.Precision, t.Scale)
   563  }
   564  func (t *Decimal128Type) GetPrecision() int32 { return t.Precision }
   565  func (t *Decimal128Type) GetScale() int32     { return t.Scale }
   566  
   567  func (Decimal128Type) Layout() DataTypeLayout {
   568  	return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Decimal128SizeBytes)}}
   569  }
   570  
   571  // Decimal256Type represents a fixed-size 256-bit decimal type.
   572  type Decimal256Type struct {
   573  	Precision int32
   574  	Scale     int32
   575  }
   576  
   577  func (*Decimal256Type) ID() Type      { return DECIMAL256 }
   578  func (*Decimal256Type) Name() string  { return "decimal256" }
   579  func (*Decimal256Type) BitWidth() int { return 256 }
   580  func (*Decimal256Type) Bytes() int    { return Decimal256SizeBytes }
   581  func (t *Decimal256Type) String() string {
   582  	return fmt.Sprintf("%s(%d, %d)", t.Name(), t.Precision, t.Scale)
   583  }
   584  func (t *Decimal256Type) Fingerprint() string {
   585  	return fmt.Sprintf("%s[%d,%d,%d]", typeFingerprint(t), t.BitWidth(), t.Precision, t.Scale)
   586  }
   587  func (t *Decimal256Type) GetPrecision() int32 { return t.Precision }
   588  func (t *Decimal256Type) GetScale() int32     { return t.Scale }
   589  
   590  func (Decimal256Type) Layout() DataTypeLayout {
   591  	return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Decimal256SizeBytes)}}
   592  }
   593  
   594  // MonthInterval represents a number of months.
   595  type MonthInterval int32
   596  
   597  func (m *MonthInterval) UnmarshalJSON(data []byte) error {
   598  	var val struct {
   599  		Months int32 `json:"months"`
   600  	}
   601  	if err := json.Unmarshal(data, &val); err != nil {
   602  		return err
   603  	}
   604  
   605  	*m = MonthInterval(val.Months)
   606  	return nil
   607  }
   608  
   609  func (m MonthInterval) MarshalJSON() ([]byte, error) {
   610  	return json.Marshal(struct {
   611  		Months int32 `json:"months"`
   612  	}{int32(m)})
   613  }
   614  
   615  // MonthIntervalType is encoded as a 32-bit signed integer,
   616  // representing a number of months.
   617  type MonthIntervalType struct{}
   618  
   619  func (*MonthIntervalType) ID() Type            { return INTERVAL_MONTHS }
   620  func (*MonthIntervalType) Name() string        { return "month_interval" }
   621  func (*MonthIntervalType) String() string      { return "month_interval" }
   622  func (*MonthIntervalType) Fingerprint() string { return typeIDFingerprint(INTERVAL_MONTHS) + "M" }
   623  
   624  // BitWidth returns the number of bits required to store a single element of this data type in memory.
   625  func (t *MonthIntervalType) BitWidth() int { return 32 }
   626  
   627  func (MonthIntervalType) Bytes() int { return Int32SizeBytes }
   628  func (MonthIntervalType) Layout() DataTypeLayout {
   629  	return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(MonthIntervalSizeBytes)}}
   630  }
   631  
   632  // DayTimeInterval represents a number of days and milliseconds (fraction of day).
   633  type DayTimeInterval struct {
   634  	Days         int32 `json:"days"`
   635  	Milliseconds int32 `json:"milliseconds"`
   636  }
   637  
   638  // DayTimeIntervalType is encoded as a pair of 32-bit signed integer,
   639  // representing a number of days and milliseconds (fraction of day).
   640  type DayTimeIntervalType struct{}
   641  
   642  func (*DayTimeIntervalType) ID() Type            { return INTERVAL_DAY_TIME }
   643  func (*DayTimeIntervalType) Name() string        { return "day_time_interval" }
   644  func (*DayTimeIntervalType) String() string      { return "day_time_interval" }
   645  func (*DayTimeIntervalType) Fingerprint() string { return typeIDFingerprint(INTERVAL_DAY_TIME) + "d" }
   646  
   647  // BitWidth returns the number of bits required to store a single element of this data type in memory.
   648  func (t *DayTimeIntervalType) BitWidth() int { return 64 }
   649  
   650  func (DayTimeIntervalType) Bytes() int { return DayTimeIntervalSizeBytes }
   651  func (DayTimeIntervalType) Layout() DataTypeLayout {
   652  	return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(DayTimeIntervalSizeBytes)}}
   653  }
   654  
   655  // MonthDayNanoInterval represents a number of months, days and nanoseconds (fraction of day).
   656  type MonthDayNanoInterval struct {
   657  	Months      int32 `json:"months"`
   658  	Days        int32 `json:"days"`
   659  	Nanoseconds int64 `json:"nanoseconds"`
   660  }
   661  
   662  // MonthDayNanoIntervalType is encoded as two signed 32-bit integers representing
   663  // a number of months and a number of days, followed by a 64-bit integer representing
   664  // the number of nanoseconds since midnight for fractions of a day.
   665  type MonthDayNanoIntervalType struct{}
   666  
   667  func (*MonthDayNanoIntervalType) ID() Type       { return INTERVAL_MONTH_DAY_NANO }
   668  func (*MonthDayNanoIntervalType) Name() string   { return "month_day_nano_interval" }
   669  func (*MonthDayNanoIntervalType) String() string { return "month_day_nano_interval" }
   670  func (*MonthDayNanoIntervalType) Fingerprint() string {
   671  	return typeIDFingerprint(INTERVAL_MONTH_DAY_NANO) + "N"
   672  }
   673  
   674  // BitWidth returns the number of bits required to store a single element of this data type in memory.
   675  func (*MonthDayNanoIntervalType) BitWidth() int { return 128 }
   676  func (*MonthDayNanoIntervalType) Bytes() int    { return MonthDayNanoIntervalSizeBytes }
   677  func (MonthDayNanoIntervalType) Layout() DataTypeLayout {
   678  	return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(MonthDayNanoIntervalSizeBytes)}}
   679  }
   680  
   681  type TimestampConvertOp int8
   682  
   683  const (
   684  	ConvDIVIDE = iota
   685  	ConvMULTIPLY
   686  )
   687  
   688  var timestampConversion = [...][4]struct {
   689  	op     TimestampConvertOp
   690  	factor int64
   691  }{
   692  	Nanosecond: {
   693  		Nanosecond:  {ConvMULTIPLY, int64(time.Nanosecond)},
   694  		Microsecond: {ConvDIVIDE, int64(time.Microsecond)},
   695  		Millisecond: {ConvDIVIDE, int64(time.Millisecond)},
   696  		Second:      {ConvDIVIDE, int64(time.Second)},
   697  	},
   698  	Microsecond: {
   699  		Nanosecond:  {ConvMULTIPLY, int64(time.Microsecond)},
   700  		Microsecond: {ConvMULTIPLY, 1},
   701  		Millisecond: {ConvDIVIDE, int64(time.Millisecond / time.Microsecond)},
   702  		Second:      {ConvDIVIDE, int64(time.Second / time.Microsecond)},
   703  	},
   704  	Millisecond: {
   705  		Nanosecond:  {ConvMULTIPLY, int64(time.Millisecond)},
   706  		Microsecond: {ConvMULTIPLY, int64(time.Millisecond / time.Microsecond)},
   707  		Millisecond: {ConvMULTIPLY, 1},
   708  		Second:      {ConvDIVIDE, int64(time.Second / time.Millisecond)},
   709  	},
   710  	Second: {
   711  		Nanosecond:  {ConvMULTIPLY, int64(time.Second)},
   712  		Microsecond: {ConvMULTIPLY, int64(time.Second / time.Microsecond)},
   713  		Millisecond: {ConvMULTIPLY, int64(time.Second / time.Millisecond)},
   714  		Second:      {ConvMULTIPLY, 1},
   715  	},
   716  }
   717  
   718  func GetTimestampConvert(in, out TimeUnit) (op TimestampConvertOp, factor int64) {
   719  	conv := timestampConversion[int(in)][int(out)]
   720  	return conv.op, conv.factor
   721  }
   722  
   723  func ConvertTimestampValue(in, out TimeUnit, value int64) int64 {
   724  	conv := timestampConversion[int(in)][int(out)]
   725  	switch conv.op {
   726  	case ConvMULTIPLY:
   727  		return value * conv.factor
   728  	case ConvDIVIDE:
   729  		return value / conv.factor
   730  	}
   731  
   732  	return 0
   733  }
   734  
   735  // DictionaryType represents categorical or dictionary-encoded in-memory data
   736  // It contains a dictionary-encoded value type (any type) and an index type
   737  // (any integer type).
   738  type DictionaryType struct {
   739  	IndexType DataType
   740  	ValueType DataType
   741  	Ordered   bool
   742  }
   743  
   744  func (*DictionaryType) ID() Type        { return DICTIONARY }
   745  func (*DictionaryType) Name() string    { return "dictionary" }
   746  func (d *DictionaryType) BitWidth() int { return d.IndexType.(FixedWidthDataType).BitWidth() }
   747  func (d *DictionaryType) Bytes() int    { return d.IndexType.(FixedWidthDataType).Bytes() }
   748  func (d *DictionaryType) String() string {
   749  	return fmt.Sprintf("%s<values=%s, indices=%s, ordered=%t>",
   750  		d.Name(), d.ValueType, d.IndexType, d.Ordered)
   751  }
   752  func (d *DictionaryType) Fingerprint() string {
   753  	indexFingerprint := d.IndexType.Fingerprint()
   754  	valueFingerprint := d.ValueType.Fingerprint()
   755  	ordered := "1"
   756  	if !d.Ordered {
   757  		ordered = "0"
   758  	}
   759  
   760  	if len(valueFingerprint) > 0 {
   761  		return typeFingerprint(d) + indexFingerprint + valueFingerprint + ordered
   762  	}
   763  	return ordered
   764  }
   765  
   766  func (d *DictionaryType) Layout() DataTypeLayout {
   767  	layout := d.IndexType.Layout()
   768  	layout.HasDict = true
   769  	return layout
   770  }
   771  
   772  var (
   773  	FixedWidthTypes = struct {
   774  		Boolean              FixedWidthDataType
   775  		Date32               FixedWidthDataType
   776  		Date64               FixedWidthDataType
   777  		DayTimeInterval      FixedWidthDataType
   778  		Duration_s           FixedWidthDataType
   779  		Duration_ms          FixedWidthDataType
   780  		Duration_us          FixedWidthDataType
   781  		Duration_ns          FixedWidthDataType
   782  		Float16              FixedWidthDataType
   783  		MonthInterval        FixedWidthDataType
   784  		Time32s              FixedWidthDataType
   785  		Time32ms             FixedWidthDataType
   786  		Time64us             FixedWidthDataType
   787  		Time64ns             FixedWidthDataType
   788  		Timestamp_s          FixedWidthDataType
   789  		Timestamp_ms         FixedWidthDataType
   790  		Timestamp_us         FixedWidthDataType
   791  		Timestamp_ns         FixedWidthDataType
   792  		MonthDayNanoInterval FixedWidthDataType
   793  	}{
   794  		Boolean:              &BooleanType{},
   795  		Date32:               &Date32Type{},
   796  		Date64:               &Date64Type{},
   797  		DayTimeInterval:      &DayTimeIntervalType{},
   798  		Duration_s:           &DurationType{Unit: Second},
   799  		Duration_ms:          &DurationType{Unit: Millisecond},
   800  		Duration_us:          &DurationType{Unit: Microsecond},
   801  		Duration_ns:          &DurationType{Unit: Nanosecond},
   802  		Float16:              &Float16Type{},
   803  		MonthInterval:        &MonthIntervalType{},
   804  		Time32s:              &Time32Type{Unit: Second},
   805  		Time32ms:             &Time32Type{Unit: Millisecond},
   806  		Time64us:             &Time64Type{Unit: Microsecond},
   807  		Time64ns:             &Time64Type{Unit: Nanosecond},
   808  		Timestamp_s:          &TimestampType{Unit: Second, TimeZone: "UTC"},
   809  		Timestamp_ms:         &TimestampType{Unit: Millisecond, TimeZone: "UTC"},
   810  		Timestamp_us:         &TimestampType{Unit: Microsecond, TimeZone: "UTC"},
   811  		Timestamp_ns:         &TimestampType{Unit: Nanosecond, TimeZone: "UTC"},
   812  		MonthDayNanoInterval: &MonthDayNanoIntervalType{},
   813  	}
   814  
   815  	_ FixedWidthDataType = (*FixedSizeBinaryType)(nil)
   816  )