github.com/parquet-go/parquet-go@v0.20.0/type.go (about)

     1  package parquet
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/json"
     6  	"fmt"
     7  	"math/bits"
     8  	"reflect"
     9  	"time"
    10  	"unsafe"
    11  
    12  	"github.com/parquet-go/parquet-go/deprecated"
    13  	"github.com/parquet-go/parquet-go/encoding"
    14  	"github.com/parquet-go/parquet-go/format"
    15  	"github.com/parquet-go/parquet-go/internal/unsafecast"
    16  )
    17  
    18  // Kind is an enumeration type representing the physical types supported by the
    19  // parquet type system.
    20  type Kind int8
    21  
    22  const (
    23  	Boolean           Kind = Kind(format.Boolean)
    24  	Int32             Kind = Kind(format.Int32)
    25  	Int64             Kind = Kind(format.Int64)
    26  	Int96             Kind = Kind(format.Int96)
    27  	Float             Kind = Kind(format.Float)
    28  	Double            Kind = Kind(format.Double)
    29  	ByteArray         Kind = Kind(format.ByteArray)
    30  	FixedLenByteArray Kind = Kind(format.FixedLenByteArray)
    31  )
    32  
    33  // String returns a human-readable representation of the physical type.
    34  func (k Kind) String() string { return format.Type(k).String() }
    35  
    36  // Value constructs a value from k and v.
    37  //
    38  // The method panics if the data is not a valid representation of the value
    39  // kind; for example, if the kind is Int32 but the data is not 4 bytes long.
    40  func (k Kind) Value(v []byte) Value {
    41  	x, err := parseValue(k, v)
    42  	if err != nil {
    43  		panic(err)
    44  	}
    45  	return x
    46  }
    47  
    48  // The Type interface represents logical types of the parquet type system.
    49  //
    50  // Types are immutable and therefore safe to access from multiple goroutines.
    51  type Type interface {
    52  	// Returns a human-readable representation of the parquet type.
    53  	String() string
    54  
    55  	// Returns the Kind value representing the underlying physical type.
    56  	//
    57  	// The method panics if it is called on a group type.
    58  	Kind() Kind
    59  
    60  	// For integer and floating point physical types, the method returns the
    61  	// size of values in bits.
    62  	//
    63  	// For fixed-length byte arrays, the method returns the size of elements
    64  	// in bytes.
    65  	//
    66  	// For other types, the value is zero.
    67  	Length() int
    68  
    69  	// Returns an estimation of the number of bytes required to hold the given
    70  	// number of values of this type in memory.
    71  	//
    72  	// The method returns zero for group types.
    73  	EstimateSize(numValues int) int
    74  
    75  	// Returns an estimation of the number of values of this type that can be
    76  	// held in the given byte size.
    77  	//
    78  	// The method returns zero for group types.
    79  	EstimateNumValues(size int) int
    80  
    81  	// Compares two values and returns a negative integer if a < b, positive if
    82  	// a > b, or zero if a == b.
    83  	//
    84  	// The values' Kind must match the type, otherwise the result is undefined.
    85  	//
    86  	// The method panics if it is called on a group type.
    87  	Compare(a, b Value) int
    88  
    89  	// ColumnOrder returns the type's column order. For group types, this method
    90  	// returns nil.
    91  	//
    92  	// The order describes the comparison logic implemented by the Less method.
    93  	//
    94  	// As an optimization, the method may return the same pointer across
    95  	// multiple calls. Applications must treat the returned value as immutable,
    96  	// mutating the value will result in undefined behavior.
    97  	ColumnOrder() *format.ColumnOrder
    98  
    99  	// Returns the physical type as a *format.Type value. For group types, this
   100  	// method returns nil.
   101  	//
   102  	// As an optimization, the method may return the same pointer across
   103  	// multiple calls. Applications must treat the returned value as immutable,
   104  	// mutating the value will result in undefined behavior.
   105  	PhysicalType() *format.Type
   106  
   107  	// Returns the logical type as a *format.LogicalType value. When the logical
   108  	// type is unknown, the method returns nil.
   109  	//
   110  	// As an optimization, the method may return the same pointer across
   111  	// multiple calls. Applications must treat the returned value as immutable,
   112  	// mutating the value will result in undefined behavior.
   113  	LogicalType() *format.LogicalType
   114  
   115  	// Returns the logical type's equivalent converted type. When there are
   116  	// no equivalent converted type, the method returns nil.
   117  	//
   118  	// As an optimization, the method may return the same pointer across
   119  	// multiple calls. Applications must treat the returned value as immutable,
   120  	// mutating the value will result in undefined behavior.
   121  	ConvertedType() *deprecated.ConvertedType
   122  
   123  	// Creates a column indexer for values of this type.
   124  	//
   125  	// The size limit is a hint to the column indexer that it is allowed to
   126  	// truncate the page boundaries to the given size. Only BYTE_ARRAY and
   127  	// FIXED_LEN_BYTE_ARRAY types currently take this value into account.
   128  	//
   129  	// A value of zero or less means no limits.
   130  	//
   131  	// The method panics if it is called on a group type.
   132  	NewColumnIndexer(sizeLimit int) ColumnIndexer
   133  
   134  	// Creates a row group buffer column for values of this type.
   135  	//
   136  	// Column buffers are created using the index of the column they are
   137  	// accumulating values in memory for (relative to the parent schema),
   138  	// and the size of their memory buffer.
   139  	//
   140  	// The application may give an estimate of the number of values it expects
   141  	// to write to the buffer as second argument. This estimate helps set the
   142  	// initialize buffer capacity but is not a hard limit, the underlying memory
   143  	// buffer will grown as needed to allow more values to be written. Programs
   144  	// may use the Size method of the column buffer (or the parent row group,
   145  	// when relevant) to determine how many bytes are being used, and perform a
   146  	// flush of the buffers to a storage layer.
   147  	//
   148  	// The method panics if it is called on a group type.
   149  	NewColumnBuffer(columnIndex, numValues int) ColumnBuffer
   150  
   151  	// Creates a dictionary holding values of this type.
   152  	//
   153  	// The dictionary retains the data buffer, it does not make a copy of it.
   154  	// If the application needs to share ownership of the memory buffer, it must
   155  	// ensure that it will not be modified while the page is in use, or it must
   156  	// make a copy of it prior to creating the dictionary.
   157  	//
   158  	// The method panics if the data type does not correspond to the parquet
   159  	// type it is called on.
   160  	NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary
   161  
   162  	// Creates a page belonging to a column at the given index, backed by the
   163  	// data buffer.
   164  	//
   165  	// The page retains the data buffer, it does not make a copy of it. If the
   166  	// application needs to share ownership of the memory buffer, it must ensure
   167  	// that it will not be modified while the page is in use, or it must make a
   168  	// copy of it prior to creating the page.
   169  	//
   170  	// The method panics if the data type does not correspond to the parquet
   171  	// type it is called on.
   172  	NewPage(columnIndex, numValues int, data encoding.Values) Page
   173  
   174  	// Creates an encoding.Values instance backed by the given buffers.
   175  	//
   176  	// The offsets is only used by BYTE_ARRAY types, where it represents the
   177  	// positions of each variable length value in the values buffer.
   178  	//
   179  	// The following expression creates an empty instance for any type:
   180  	//
   181  	//		values := typ.NewValues(nil, nil)
   182  	//
   183  	// The method panics if it is called on group types.
   184  	NewValues(values []byte, offsets []uint32) encoding.Values
   185  
   186  	// Assuming the src buffer contains PLAIN encoded values of the type it is
   187  	// called on, applies the given encoding and produces the output to the dst
   188  	// buffer passed as first argument by dispatching the call to one of the
   189  	// encoding methods.
   190  	Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error)
   191  
   192  	// Assuming the src buffer contains values encoding in the given encoding,
   193  	// decodes the input and produces the encoded values into the dst output
   194  	// buffer passed as first argument by dispatching the call to one of the
   195  	// encoding methods.
   196  	Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error)
   197  
   198  	// Returns an estimation of the output size after decoding the values passed
   199  	// as first argument with the given encoding.
   200  	//
   201  	// For most types, this is similar to calling EstimateSize with the known
   202  	// number of encoded values. For variable size types, using this method may
   203  	// provide a more precise result since it can inspect the input buffer.
   204  	EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int
   205  
   206  	// Assigns a Parquet value to a Go value. Returns an error if assignment is
   207  	// not possible. The source Value must be an expected logical type for the
   208  	// receiver. This can be accomplished using ConvertValue.
   209  	AssignValue(dst reflect.Value, src Value) error
   210  
   211  	// Convert a Parquet Value of the given Type into a Parquet Value that is
   212  	// compatible with the receiver. The returned Value is suitable to be passed
   213  	// to AssignValue.
   214  	ConvertValue(val Value, typ Type) (Value, error)
   215  }
   216  
   217  var (
   218  	BooleanType   Type = booleanType{}
   219  	Int32Type     Type = int32Type{}
   220  	Int64Type     Type = int64Type{}
   221  	Int96Type     Type = int96Type{}
   222  	FloatType     Type = floatType{}
   223  	DoubleType    Type = doubleType{}
   224  	ByteArrayType Type = byteArrayType{}
   225  )
   226  
   227  // In the current parquet version supported by this library, only type-defined
   228  // orders are supported.
   229  var typeDefinedColumnOrder = format.ColumnOrder{
   230  	TypeOrder: new(format.TypeDefinedOrder),
   231  }
   232  
   233  var physicalTypes = [...]format.Type{
   234  	0: format.Boolean,
   235  	1: format.Int32,
   236  	2: format.Int64,
   237  	3: format.Int96,
   238  	4: format.Float,
   239  	5: format.Double,
   240  	6: format.ByteArray,
   241  	7: format.FixedLenByteArray,
   242  }
   243  
   244  var convertedTypes = [...]deprecated.ConvertedType{
   245  	0:  deprecated.UTF8,
   246  	1:  deprecated.Map,
   247  	2:  deprecated.MapKeyValue,
   248  	3:  deprecated.List,
   249  	4:  deprecated.Enum,
   250  	5:  deprecated.Decimal,
   251  	6:  deprecated.Date,
   252  	7:  deprecated.TimeMillis,
   253  	8:  deprecated.TimeMicros,
   254  	9:  deprecated.TimestampMillis,
   255  	10: deprecated.TimestampMicros,
   256  	11: deprecated.Uint8,
   257  	12: deprecated.Uint16,
   258  	13: deprecated.Uint32,
   259  	14: deprecated.Uint64,
   260  	15: deprecated.Int8,
   261  	16: deprecated.Int16,
   262  	17: deprecated.Int32,
   263  	18: deprecated.Int64,
   264  	19: deprecated.Json,
   265  	20: deprecated.Bson,
   266  	21: deprecated.Interval,
   267  }
   268  
   269  type booleanType struct{}
   270  
   271  func (t booleanType) String() string                           { return "BOOLEAN" }
   272  func (t booleanType) Kind() Kind                               { return Boolean }
   273  func (t booleanType) Length() int                              { return 1 }
   274  func (t booleanType) EstimateSize(n int) int                   { return (n + 7) / 8 }
   275  func (t booleanType) EstimateNumValues(n int) int              { return 8 * n }
   276  func (t booleanType) Compare(a, b Value) int                   { return compareBool(a.boolean(), b.boolean()) }
   277  func (t booleanType) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
   278  func (t booleanType) LogicalType() *format.LogicalType         { return nil }
   279  func (t booleanType) ConvertedType() *deprecated.ConvertedType { return nil }
   280  func (t booleanType) PhysicalType() *format.Type               { return &physicalTypes[Boolean] }
   281  
   282  func (t booleanType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   283  	return newBooleanColumnIndexer()
   284  }
   285  
   286  func (t booleanType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   287  	return newBooleanColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   288  }
   289  
   290  func (t booleanType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
   291  	return newBooleanDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   292  }
   293  
   294  func (t booleanType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
   295  	return newBooleanPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   296  }
   297  
   298  func (t booleanType) NewValues(values []byte, _ []uint32) encoding.Values {
   299  	return encoding.BooleanValues(values)
   300  }
   301  
   302  func (t booleanType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
   303  	return encoding.EncodeBoolean(dst, src, enc)
   304  }
   305  
   306  func (t booleanType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
   307  	return encoding.DecodeBoolean(dst, src, enc)
   308  }
   309  
   310  func (t booleanType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
   311  	return t.EstimateSize(numValues)
   312  }
   313  
   314  func (t booleanType) AssignValue(dst reflect.Value, src Value) error {
   315  	v := src.boolean()
   316  	switch dst.Kind() {
   317  	case reflect.Bool:
   318  		dst.SetBool(v)
   319  	default:
   320  		dst.Set(reflect.ValueOf(v))
   321  	}
   322  	return nil
   323  }
   324  
   325  func (t booleanType) ConvertValue(val Value, typ Type) (Value, error) {
   326  	switch typ.(type) {
   327  	case *stringType:
   328  		return convertStringToBoolean(val)
   329  	}
   330  	switch typ.Kind() {
   331  	case Boolean:
   332  		return val, nil
   333  	case Int32:
   334  		return convertInt32ToBoolean(val)
   335  	case Int64:
   336  		return convertInt64ToBoolean(val)
   337  	case Int96:
   338  		return convertInt96ToBoolean(val)
   339  	case Float:
   340  		return convertFloatToBoolean(val)
   341  	case Double:
   342  		return convertDoubleToBoolean(val)
   343  	case ByteArray, FixedLenByteArray:
   344  		return convertByteArrayToBoolean(val)
   345  	default:
   346  		return makeValueKind(Boolean), nil
   347  	}
   348  }
   349  
   350  type int32Type struct{}
   351  
   352  func (t int32Type) String() string                           { return "INT32" }
   353  func (t int32Type) Kind() Kind                               { return Int32 }
   354  func (t int32Type) Length() int                              { return 32 }
   355  func (t int32Type) EstimateSize(n int) int                   { return 4 * n }
   356  func (t int32Type) EstimateNumValues(n int) int              { return n / 4 }
   357  func (t int32Type) Compare(a, b Value) int                   { return compareInt32(a.int32(), b.int32()) }
   358  func (t int32Type) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
   359  func (t int32Type) LogicalType() *format.LogicalType         { return nil }
   360  func (t int32Type) ConvertedType() *deprecated.ConvertedType { return nil }
   361  func (t int32Type) PhysicalType() *format.Type               { return &physicalTypes[Int32] }
   362  
   363  func (t int32Type) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   364  	return newInt32ColumnIndexer()
   365  }
   366  
   367  func (t int32Type) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   368  	return newInt32ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   369  }
   370  
   371  func (t int32Type) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
   372  	return newInt32Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   373  }
   374  
   375  func (t int32Type) NewPage(columnIndex, numValues int, data encoding.Values) Page {
   376  	return newInt32Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   377  }
   378  
   379  func (t int32Type) NewValues(values []byte, _ []uint32) encoding.Values {
   380  	return encoding.Int32ValuesFromBytes(values)
   381  }
   382  
   383  func (t int32Type) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
   384  	return encoding.EncodeInt32(dst, src, enc)
   385  }
   386  
   387  func (t int32Type) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
   388  	return encoding.DecodeInt32(dst, src, enc)
   389  }
   390  
   391  func (t int32Type) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
   392  	return t.EstimateSize(numValues)
   393  }
   394  
   395  func (t int32Type) AssignValue(dst reflect.Value, src Value) error {
   396  	v := src.int32()
   397  	switch dst.Kind() {
   398  	case reflect.Int8, reflect.Int16, reflect.Int32:
   399  		dst.SetInt(int64(v))
   400  	case reflect.Uint8, reflect.Uint16, reflect.Uint32:
   401  		dst.SetUint(uint64(v))
   402  	default:
   403  		dst.Set(reflect.ValueOf(v))
   404  	}
   405  	return nil
   406  }
   407  
   408  func (t int32Type) ConvertValue(val Value, typ Type) (Value, error) {
   409  	switch typ.(type) {
   410  	case *stringType:
   411  		return convertStringToInt32(val)
   412  	}
   413  	switch typ.Kind() {
   414  	case Boolean:
   415  		return convertBooleanToInt32(val)
   416  	case Int32:
   417  		return val, nil
   418  	case Int64:
   419  		return convertInt64ToInt32(val)
   420  	case Int96:
   421  		return convertInt96ToInt32(val)
   422  	case Float:
   423  		return convertFloatToInt32(val)
   424  	case Double:
   425  		return convertDoubleToInt32(val)
   426  	case ByteArray, FixedLenByteArray:
   427  		return convertByteArrayToInt32(val)
   428  	default:
   429  		return makeValueKind(Int32), nil
   430  	}
   431  }
   432  
   433  type int64Type struct{}
   434  
   435  func (t int64Type) String() string                           { return "INT64" }
   436  func (t int64Type) Kind() Kind                               { return Int64 }
   437  func (t int64Type) Length() int                              { return 64 }
   438  func (t int64Type) EstimateSize(n int) int                   { return 8 * n }
   439  func (t int64Type) EstimateNumValues(n int) int              { return n / 8 }
   440  func (t int64Type) Compare(a, b Value) int                   { return compareInt64(a.int64(), b.int64()) }
   441  func (t int64Type) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
   442  func (t int64Type) LogicalType() *format.LogicalType         { return nil }
   443  func (t int64Type) ConvertedType() *deprecated.ConvertedType { return nil }
   444  func (t int64Type) PhysicalType() *format.Type               { return &physicalTypes[Int64] }
   445  
   446  func (t int64Type) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   447  	return newInt64ColumnIndexer()
   448  }
   449  
   450  func (t int64Type) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   451  	return newInt64ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   452  }
   453  
   454  func (t int64Type) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
   455  	return newInt64Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   456  }
   457  
   458  func (t int64Type) NewPage(columnIndex, numValues int, data encoding.Values) Page {
   459  	return newInt64Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   460  }
   461  
   462  func (t int64Type) NewValues(values []byte, _ []uint32) encoding.Values {
   463  	return encoding.Int64ValuesFromBytes(values)
   464  }
   465  
   466  func (t int64Type) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
   467  	return encoding.EncodeInt64(dst, src, enc)
   468  }
   469  
   470  func (t int64Type) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
   471  	return encoding.DecodeInt64(dst, src, enc)
   472  }
   473  
   474  func (t int64Type) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
   475  	return t.EstimateSize(numValues)
   476  }
   477  
   478  func (t int64Type) AssignValue(dst reflect.Value, src Value) error {
   479  	v := src.int64()
   480  	switch dst.Kind() {
   481  	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int:
   482  		dst.SetInt(v)
   483  	case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint, reflect.Uintptr:
   484  		dst.SetUint(uint64(v))
   485  	default:
   486  		dst.Set(reflect.ValueOf(v))
   487  	}
   488  	return nil
   489  }
   490  
   491  func (t int64Type) ConvertValue(val Value, typ Type) (Value, error) {
   492  	switch typ.(type) {
   493  	case *stringType:
   494  		return convertStringToInt64(val)
   495  	}
   496  	switch typ.Kind() {
   497  	case Boolean:
   498  		return convertBooleanToInt64(val)
   499  	case Int32:
   500  		return convertInt32ToInt64(val)
   501  	case Int64:
   502  		return val, nil
   503  	case Int96:
   504  		return convertInt96ToInt64(val)
   505  	case Float:
   506  		return convertFloatToInt64(val)
   507  	case Double:
   508  		return convertDoubleToInt64(val)
   509  	case ByteArray, FixedLenByteArray:
   510  		return convertByteArrayToInt64(val)
   511  	default:
   512  		return makeValueKind(Int64), nil
   513  	}
   514  }
   515  
   516  type int96Type struct{}
   517  
   518  func (t int96Type) String() string { return "INT96" }
   519  
   520  func (t int96Type) Kind() Kind                               { return Int96 }
   521  func (t int96Type) Length() int                              { return 96 }
   522  func (t int96Type) EstimateSize(n int) int                   { return 12 * n }
   523  func (t int96Type) EstimateNumValues(n int) int              { return n / 12 }
   524  func (t int96Type) Compare(a, b Value) int                   { return compareInt96(a.int96(), b.int96()) }
   525  func (t int96Type) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
   526  func (t int96Type) LogicalType() *format.LogicalType         { return nil }
   527  func (t int96Type) ConvertedType() *deprecated.ConvertedType { return nil }
   528  func (t int96Type) PhysicalType() *format.Type               { return &physicalTypes[Int96] }
   529  
   530  func (t int96Type) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   531  	return newInt96ColumnIndexer()
   532  }
   533  
   534  func (t int96Type) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   535  	return newInt96ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   536  }
   537  
   538  func (t int96Type) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
   539  	return newInt96Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   540  }
   541  
   542  func (t int96Type) NewPage(columnIndex, numValues int, data encoding.Values) Page {
   543  	return newInt96Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   544  }
   545  
   546  func (t int96Type) NewValues(values []byte, _ []uint32) encoding.Values {
   547  	return encoding.Int96ValuesFromBytes(values)
   548  }
   549  
   550  func (t int96Type) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
   551  	return encoding.EncodeInt96(dst, src, enc)
   552  }
   553  
   554  func (t int96Type) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
   555  	return encoding.DecodeInt96(dst, src, enc)
   556  }
   557  
   558  func (t int96Type) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
   559  	return t.EstimateSize(numValues)
   560  }
   561  
   562  func (t int96Type) AssignValue(dst reflect.Value, src Value) error {
   563  	v := src.Int96()
   564  	dst.Set(reflect.ValueOf(v))
   565  	return nil
   566  }
   567  
   568  func (t int96Type) ConvertValue(val Value, typ Type) (Value, error) {
   569  	switch typ.(type) {
   570  	case *stringType:
   571  		return convertStringToInt96(val)
   572  	}
   573  	switch typ.Kind() {
   574  	case Boolean:
   575  		return convertBooleanToInt96(val)
   576  	case Int32:
   577  		return convertInt32ToInt96(val)
   578  	case Int64:
   579  		return convertInt64ToInt96(val)
   580  	case Int96:
   581  		return val, nil
   582  	case Float:
   583  		return convertFloatToInt96(val)
   584  	case Double:
   585  		return convertDoubleToInt96(val)
   586  	case ByteArray, FixedLenByteArray:
   587  		return convertByteArrayToInt96(val)
   588  	default:
   589  		return makeValueKind(Int96), nil
   590  	}
   591  }
   592  
   593  type floatType struct{}
   594  
   595  func (t floatType) String() string                           { return "FLOAT" }
   596  func (t floatType) Kind() Kind                               { return Float }
   597  func (t floatType) Length() int                              { return 32 }
   598  func (t floatType) EstimateSize(n int) int                   { return 4 * n }
   599  func (t floatType) EstimateNumValues(n int) int              { return n / 4 }
   600  func (t floatType) Compare(a, b Value) int                   { return compareFloat32(a.float(), b.float()) }
   601  func (t floatType) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
   602  func (t floatType) LogicalType() *format.LogicalType         { return nil }
   603  func (t floatType) ConvertedType() *deprecated.ConvertedType { return nil }
   604  func (t floatType) PhysicalType() *format.Type               { return &physicalTypes[Float] }
   605  
   606  func (t floatType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   607  	return newFloatColumnIndexer()
   608  }
   609  
   610  func (t floatType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   611  	return newFloatColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   612  }
   613  
   614  func (t floatType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
   615  	return newFloatDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   616  }
   617  
   618  func (t floatType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
   619  	return newFloatPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   620  }
   621  
   622  func (t floatType) NewValues(values []byte, _ []uint32) encoding.Values {
   623  	return encoding.FloatValuesFromBytes(values)
   624  }
   625  
   626  func (t floatType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
   627  	return encoding.EncodeFloat(dst, src, enc)
   628  }
   629  
   630  func (t floatType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
   631  	return encoding.DecodeFloat(dst, src, enc)
   632  }
   633  
   634  func (t floatType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
   635  	return t.EstimateSize(numValues)
   636  }
   637  
   638  func (t floatType) AssignValue(dst reflect.Value, src Value) error {
   639  	v := src.float()
   640  	switch dst.Kind() {
   641  	case reflect.Float32, reflect.Float64:
   642  		dst.SetFloat(float64(v))
   643  	default:
   644  		dst.Set(reflect.ValueOf(v))
   645  	}
   646  	return nil
   647  }
   648  
   649  func (t floatType) ConvertValue(val Value, typ Type) (Value, error) {
   650  	switch typ.(type) {
   651  	case *stringType:
   652  		return convertStringToFloat(val)
   653  	}
   654  	switch typ.Kind() {
   655  	case Boolean:
   656  		return convertBooleanToFloat(val)
   657  	case Int32:
   658  		return convertInt32ToFloat(val)
   659  	case Int64:
   660  		return convertInt64ToFloat(val)
   661  	case Int96:
   662  		return convertInt96ToFloat(val)
   663  	case Float:
   664  		return val, nil
   665  	case Double:
   666  		return convertDoubleToFloat(val)
   667  	case ByteArray, FixedLenByteArray:
   668  		return convertByteArrayToFloat(val)
   669  	default:
   670  		return makeValueKind(Float), nil
   671  	}
   672  }
   673  
   674  type doubleType struct{}
   675  
   676  func (t doubleType) String() string                           { return "DOUBLE" }
   677  func (t doubleType) Kind() Kind                               { return Double }
   678  func (t doubleType) Length() int                              { return 64 }
   679  func (t doubleType) EstimateSize(n int) int                   { return 8 * n }
   680  func (t doubleType) EstimateNumValues(n int) int              { return n / 8 }
   681  func (t doubleType) Compare(a, b Value) int                   { return compareFloat64(a.double(), b.double()) }
   682  func (t doubleType) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
   683  func (t doubleType) LogicalType() *format.LogicalType         { return nil }
   684  func (t doubleType) ConvertedType() *deprecated.ConvertedType { return nil }
   685  func (t doubleType) PhysicalType() *format.Type               { return &physicalTypes[Double] }
   686  
   687  func (t doubleType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   688  	return newDoubleColumnIndexer()
   689  }
   690  
   691  func (t doubleType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   692  	return newDoubleColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   693  }
   694  
   695  func (t doubleType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
   696  	return newDoubleDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   697  }
   698  
   699  func (t doubleType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
   700  	return newDoublePage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   701  }
   702  
   703  func (t doubleType) NewValues(values []byte, _ []uint32) encoding.Values {
   704  	return encoding.DoubleValuesFromBytes(values)
   705  }
   706  
   707  func (t doubleType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
   708  	return encoding.EncodeDouble(dst, src, enc)
   709  }
   710  
   711  func (t doubleType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
   712  	return encoding.DecodeDouble(dst, src, enc)
   713  }
   714  
   715  func (t doubleType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
   716  	return t.EstimateSize(numValues)
   717  }
   718  
   719  func (t doubleType) AssignValue(dst reflect.Value, src Value) error {
   720  	v := src.double()
   721  	switch dst.Kind() {
   722  	case reflect.Float32, reflect.Float64:
   723  		dst.SetFloat(v)
   724  	default:
   725  		dst.Set(reflect.ValueOf(v))
   726  	}
   727  	return nil
   728  }
   729  
   730  func (t doubleType) ConvertValue(val Value, typ Type) (Value, error) {
   731  	switch typ.(type) {
   732  	case *stringType:
   733  		return convertStringToDouble(val)
   734  	}
   735  	switch typ.Kind() {
   736  	case Boolean:
   737  		return convertBooleanToDouble(val)
   738  	case Int32:
   739  		return convertInt32ToDouble(val)
   740  	case Int64:
   741  		return convertInt64ToDouble(val)
   742  	case Int96:
   743  		return convertInt96ToDouble(val)
   744  	case Float:
   745  		return convertFloatToDouble(val)
   746  	case Double:
   747  		return val, nil
   748  	case ByteArray, FixedLenByteArray:
   749  		return convertByteArrayToDouble(val)
   750  	default:
   751  		return makeValueKind(Double), nil
   752  	}
   753  }
   754  
   755  type byteArrayType struct{}
   756  
   757  func (t byteArrayType) String() string                           { return "BYTE_ARRAY" }
   758  func (t byteArrayType) Kind() Kind                               { return ByteArray }
   759  func (t byteArrayType) Length() int                              { return 0 }
   760  func (t byteArrayType) EstimateSize(n int) int                   { return estimatedSizeOfByteArrayValues * n }
   761  func (t byteArrayType) EstimateNumValues(n int) int              { return n / estimatedSizeOfByteArrayValues }
   762  func (t byteArrayType) Compare(a, b Value) int                   { return bytes.Compare(a.byteArray(), b.byteArray()) }
   763  func (t byteArrayType) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
   764  func (t byteArrayType) LogicalType() *format.LogicalType         { return nil }
   765  func (t byteArrayType) ConvertedType() *deprecated.ConvertedType { return nil }
   766  func (t byteArrayType) PhysicalType() *format.Type               { return &physicalTypes[ByteArray] }
   767  
   768  func (t byteArrayType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   769  	return newByteArrayColumnIndexer(sizeLimit)
   770  }
   771  
   772  func (t byteArrayType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   773  	return newByteArrayColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   774  }
   775  
   776  func (t byteArrayType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
   777  	return newByteArrayDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   778  }
   779  
   780  func (t byteArrayType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
   781  	return newByteArrayPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   782  }
   783  
   784  func (t byteArrayType) NewValues(values []byte, offsets []uint32) encoding.Values {
   785  	return encoding.ByteArrayValues(values, offsets)
   786  }
   787  
   788  func (t byteArrayType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
   789  	return encoding.EncodeByteArray(dst, src, enc)
   790  }
   791  
   792  func (t byteArrayType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
   793  	return encoding.DecodeByteArray(dst, src, enc)
   794  }
   795  
   796  func (t byteArrayType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
   797  	return enc.EstimateDecodeByteArraySize(src)
   798  }
   799  
   800  func (t byteArrayType) AssignValue(dst reflect.Value, src Value) error {
   801  	v := src.byteArray()
   802  	switch dst.Kind() {
   803  	case reflect.String:
   804  		dst.SetString(string(v))
   805  	case reflect.Slice:
   806  		dst.SetBytes(copyBytes(v))
   807  	default:
   808  		val := reflect.ValueOf(string(v))
   809  		dst.Set(val)
   810  	}
   811  	return nil
   812  }
   813  
   814  func (t byteArrayType) ConvertValue(val Value, typ Type) (Value, error) {
   815  	switch typ.Kind() {
   816  	case Boolean:
   817  		return convertBooleanToByteArray(val)
   818  	case Int32:
   819  		return convertInt32ToByteArray(val)
   820  	case Int64:
   821  		return convertInt64ToByteArray(val)
   822  	case Int96:
   823  		return convertInt96ToByteArray(val)
   824  	case Float:
   825  		return convertFloatToByteArray(val)
   826  	case Double:
   827  		return convertDoubleToByteArray(val)
   828  	case ByteArray, FixedLenByteArray:
   829  		return val, nil
   830  	default:
   831  		return makeValueKind(ByteArray), nil
   832  	}
   833  }
   834  
   835  type fixedLenByteArrayType struct{ length int }
   836  
   837  func (t fixedLenByteArrayType) String() string {
   838  	return fmt.Sprintf("FIXED_LEN_BYTE_ARRAY(%d)", t.length)
   839  }
   840  
   841  func (t fixedLenByteArrayType) Kind() Kind { return FixedLenByteArray }
   842  
   843  func (t fixedLenByteArrayType) Length() int { return t.length }
   844  
   845  func (t fixedLenByteArrayType) EstimateSize(n int) int { return t.length * n }
   846  
   847  func (t fixedLenByteArrayType) EstimateNumValues(n int) int { return n / t.length }
   848  
   849  func (t fixedLenByteArrayType) Compare(a, b Value) int {
   850  	return bytes.Compare(a.byteArray(), b.byteArray())
   851  }
   852  
   853  func (t fixedLenByteArrayType) ColumnOrder() *format.ColumnOrder { return &typeDefinedColumnOrder }
   854  
   855  func (t fixedLenByteArrayType) LogicalType() *format.LogicalType { return nil }
   856  
   857  func (t fixedLenByteArrayType) ConvertedType() *deprecated.ConvertedType { return nil }
   858  
   859  func (t fixedLenByteArrayType) PhysicalType() *format.Type { return &physicalTypes[FixedLenByteArray] }
   860  
   861  func (t fixedLenByteArrayType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   862  	return newFixedLenByteArrayColumnIndexer(t.length, sizeLimit)
   863  }
   864  
   865  func (t fixedLenByteArrayType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   866  	return newFixedLenByteArrayColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   867  }
   868  
   869  func (t fixedLenByteArrayType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
   870  	return newFixedLenByteArrayDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   871  }
   872  
   873  func (t fixedLenByteArrayType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
   874  	return newFixedLenByteArrayPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   875  }
   876  
   877  func (t fixedLenByteArrayType) NewValues(values []byte, _ []uint32) encoding.Values {
   878  	return encoding.FixedLenByteArrayValues(values, t.length)
   879  }
   880  
   881  func (t fixedLenByteArrayType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
   882  	return encoding.EncodeFixedLenByteArray(dst, src, enc)
   883  }
   884  
   885  func (t fixedLenByteArrayType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
   886  	return encoding.DecodeFixedLenByteArray(dst, src, enc)
   887  }
   888  
   889  func (t fixedLenByteArrayType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
   890  	return t.EstimateSize(numValues)
   891  }
   892  
   893  func (t fixedLenByteArrayType) AssignValue(dst reflect.Value, src Value) error {
   894  	v := src.byteArray()
   895  	switch dst.Kind() {
   896  	case reflect.Array:
   897  		if dst.Type().Elem().Kind() == reflect.Uint8 && dst.Len() == len(v) {
   898  			// This code could be implemented as a call to reflect.Copy but
   899  			// it would require creating a reflect.Value from v which causes
   900  			// the heap allocation to pack the []byte value. To avoid this
   901  			// overhead we instead convert the reflect.Value holding the
   902  			// destination array into a byte slice which allows us to use
   903  			// a more efficient call to copy.
   904  			d := unsafe.Slice((*byte)(unsafecast.PointerOfValue(dst)), len(v))
   905  			copy(d, v)
   906  			return nil
   907  		}
   908  	case reflect.Slice:
   909  		dst.SetBytes(copyBytes(v))
   910  		return nil
   911  	}
   912  
   913  	val := reflect.ValueOf(copyBytes(v))
   914  	dst.Set(val)
   915  	return nil
   916  }
   917  
   918  func (t fixedLenByteArrayType) ConvertValue(val Value, typ Type) (Value, error) {
   919  	switch typ.(type) {
   920  	case *stringType:
   921  		return convertStringToFixedLenByteArray(val, t.length)
   922  	}
   923  	switch typ.Kind() {
   924  	case Boolean:
   925  		return convertBooleanToFixedLenByteArray(val, t.length)
   926  	case Int32:
   927  		return convertInt32ToFixedLenByteArray(val, t.length)
   928  	case Int64:
   929  		return convertInt64ToFixedLenByteArray(val, t.length)
   930  	case Int96:
   931  		return convertInt96ToFixedLenByteArray(val, t.length)
   932  	case Float:
   933  		return convertFloatToFixedLenByteArray(val, t.length)
   934  	case Double:
   935  		return convertDoubleToFixedLenByteArray(val, t.length)
   936  	case ByteArray, FixedLenByteArray:
   937  		return convertByteArrayToFixedLenByteArray(val, t.length)
   938  	default:
   939  		return makeValueBytes(FixedLenByteArray, make([]byte, t.length)), nil
   940  	}
   941  }
   942  
   943  type uint32Type struct{ int32Type }
   944  
   945  func (t uint32Type) Compare(a, b Value) int {
   946  	return compareUint32(a.uint32(), b.uint32())
   947  }
   948  
   949  func (t uint32Type) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   950  	return newUint32ColumnIndexer()
   951  }
   952  
   953  func (t uint32Type) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   954  	return newUint32ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   955  }
   956  
   957  func (t uint32Type) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
   958  	return newUint32Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   959  }
   960  
   961  func (t uint32Type) NewPage(columnIndex, numValues int, data encoding.Values) Page {
   962  	return newUint32Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   963  }
   964  
   965  type uint64Type struct{ int64Type }
   966  
   967  func (t uint64Type) Compare(a, b Value) int {
   968  	return compareUint64(a.uint64(), b.uint64())
   969  }
   970  
   971  func (t uint64Type) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   972  	return newUint64ColumnIndexer()
   973  }
   974  
   975  func (t uint64Type) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   976  	return newUint64ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   977  }
   978  
   979  func (t uint64Type) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
   980  	return newUint64Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   981  }
   982  
   983  func (t uint64Type) NewPage(columnIndex, numValues int, data encoding.Values) Page {
   984  	return newUint64Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   985  }
   986  
   987  // BE128 stands for "big-endian 128 bits". This type is used as a special case
   988  // for fixed-length byte arrays of 16 bytes, which are commonly used to
   989  // represent columns of random unique identifiers such as UUIDs.
   990  //
   991  // Comparisons of BE128 values use the natural byte order, the zeroth byte is
   992  // the most significant byte.
   993  //
   994  // The special case is intended to provide optimizations based on the knowledge
   995  // that the values are 16 bytes long. Stronger type checking can also be applied
   996  // by the compiler when using [16]byte values rather than []byte, reducing the
   997  // risk of errors on these common code paths.
   998  type be128Type struct{}
   999  
  1000  func (t be128Type) String() string { return "FIXED_LEN_BYTE_ARRAY(16)" }
  1001  
  1002  func (t be128Type) Kind() Kind { return FixedLenByteArray }
  1003  
  1004  func (t be128Type) Length() int { return 16 }
  1005  
  1006  func (t be128Type) EstimateSize(n int) int { return 16 * n }
  1007  
  1008  func (t be128Type) EstimateNumValues(n int) int { return n / 16 }
  1009  
  1010  func (t be128Type) Compare(a, b Value) int { return compareBE128(a.be128(), b.be128()) }
  1011  
  1012  func (t be128Type) ColumnOrder() *format.ColumnOrder { return &typeDefinedColumnOrder }
  1013  
  1014  func (t be128Type) LogicalType() *format.LogicalType { return nil }
  1015  
  1016  func (t be128Type) ConvertedType() *deprecated.ConvertedType { return nil }
  1017  
  1018  func (t be128Type) PhysicalType() *format.Type { return &physicalTypes[FixedLenByteArray] }
  1019  
  1020  func (t be128Type) NewColumnIndexer(sizeLimit int) ColumnIndexer {
  1021  	return newBE128ColumnIndexer()
  1022  }
  1023  
  1024  func (t be128Type) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
  1025  	return newBE128ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
  1026  }
  1027  
  1028  func (t be128Type) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
  1029  	return newBE128Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
  1030  }
  1031  
  1032  func (t be128Type) NewPage(columnIndex, numValues int, data encoding.Values) Page {
  1033  	return newBE128Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
  1034  }
  1035  
  1036  func (t be128Type) NewValues(values []byte, _ []uint32) encoding.Values {
  1037  	return encoding.FixedLenByteArrayValues(values, 16)
  1038  }
  1039  
  1040  func (t be128Type) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
  1041  	return encoding.EncodeFixedLenByteArray(dst, src, enc)
  1042  }
  1043  
  1044  func (t be128Type) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
  1045  	return encoding.DecodeFixedLenByteArray(dst, src, enc)
  1046  }
  1047  
  1048  func (t be128Type) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
  1049  	return t.EstimateSize(numValues)
  1050  }
  1051  
  1052  func (t be128Type) AssignValue(dst reflect.Value, src Value) error {
  1053  	return fixedLenByteArrayType{length: 16}.AssignValue(dst, src)
  1054  }
  1055  
  1056  func (t be128Type) ConvertValue(val Value, typ Type) (Value, error) {
  1057  	return fixedLenByteArrayType{length: 16}.ConvertValue(val, typ)
  1058  }
  1059  
  1060  // FixedLenByteArrayType constructs a type for fixed-length values of the given
  1061  // size (in bytes).
  1062  func FixedLenByteArrayType(length int) Type {
  1063  	switch length {
  1064  	case 16:
  1065  		return be128Type{}
  1066  	default:
  1067  		return fixedLenByteArrayType{length: length}
  1068  	}
  1069  }
  1070  
  1071  // Int constructs a leaf node of signed integer logical type of the given bit
  1072  // width.
  1073  //
  1074  // The bit width must be one of 8, 16, 32, 64, or the function will panic.
  1075  func Int(bitWidth int) Node {
  1076  	return Leaf(integerType(bitWidth, &signedIntTypes))
  1077  }
  1078  
  1079  // Uint constructs a leaf node of unsigned integer logical type of the given
  1080  // bit width.
  1081  //
  1082  // The bit width must be one of 8, 16, 32, 64, or the function will panic.
  1083  func Uint(bitWidth int) Node {
  1084  	return Leaf(integerType(bitWidth, &unsignedIntTypes))
  1085  }
  1086  
  1087  func integerType(bitWidth int, types *[4]intType) *intType {
  1088  	switch bitWidth {
  1089  	case 8:
  1090  		return &types[0]
  1091  	case 16:
  1092  		return &types[1]
  1093  	case 32:
  1094  		return &types[2]
  1095  	case 64:
  1096  		return &types[3]
  1097  	default:
  1098  		panic(fmt.Sprintf("cannot create a %d bits parquet integer node", bitWidth))
  1099  	}
  1100  }
  1101  
  1102  var signedIntTypes = [...]intType{
  1103  	{BitWidth: 8, IsSigned: true},
  1104  	{BitWidth: 16, IsSigned: true},
  1105  	{BitWidth: 32, IsSigned: true},
  1106  	{BitWidth: 64, IsSigned: true},
  1107  }
  1108  
  1109  var unsignedIntTypes = [...]intType{
  1110  	{BitWidth: 8, IsSigned: false},
  1111  	{BitWidth: 16, IsSigned: false},
  1112  	{BitWidth: 32, IsSigned: false},
  1113  	{BitWidth: 64, IsSigned: false},
  1114  }
  1115  
  1116  type intType format.IntType
  1117  
  1118  func (t *intType) baseType() Type {
  1119  	if t.IsSigned {
  1120  		if t.BitWidth == 64 {
  1121  			return int64Type{}
  1122  		} else {
  1123  			return int32Type{}
  1124  		}
  1125  	} else {
  1126  		if t.BitWidth == 64 {
  1127  			return uint64Type{}
  1128  		} else {
  1129  			return uint32Type{}
  1130  		}
  1131  	}
  1132  }
  1133  
  1134  func (t *intType) String() string { return (*format.IntType)(t).String() }
  1135  
  1136  func (t *intType) Kind() Kind { return t.baseType().Kind() }
  1137  
  1138  func (t *intType) Length() int { return int(t.BitWidth) }
  1139  
  1140  func (t *intType) EstimateSize(n int) int { return (int(t.BitWidth) / 8) * n }
  1141  
  1142  func (t *intType) EstimateNumValues(n int) int { return n / (int(t.BitWidth) / 8) }
  1143  
  1144  func (t *intType) Compare(a, b Value) int {
  1145  	// This code is similar to t.baseType().Compare(a,b) but comparison methods
  1146  	// tend to be invoked a lot (e.g. when sorting) so avoiding the interface
  1147  	// indirection in this case yields much better throughput in some cases.
  1148  	if t.BitWidth == 64 {
  1149  		i1 := a.int64()
  1150  		i2 := b.int64()
  1151  		if t.IsSigned {
  1152  			return compareInt64(i1, i2)
  1153  		} else {
  1154  			return compareUint64(uint64(i1), uint64(i2))
  1155  		}
  1156  	} else {
  1157  		i1 := a.int32()
  1158  		i2 := b.int32()
  1159  		if t.IsSigned {
  1160  			return compareInt32(i1, i2)
  1161  		} else {
  1162  			return compareUint32(uint32(i1), uint32(i2))
  1163  		}
  1164  	}
  1165  }
  1166  
  1167  func (t *intType) ColumnOrder() *format.ColumnOrder { return t.baseType().ColumnOrder() }
  1168  
  1169  func (t *intType) PhysicalType() *format.Type { return t.baseType().PhysicalType() }
  1170  
  1171  func (t *intType) LogicalType() *format.LogicalType {
  1172  	return &format.LogicalType{Integer: (*format.IntType)(t)}
  1173  }
  1174  
  1175  func (t *intType) ConvertedType() *deprecated.ConvertedType {
  1176  	convertedType := bits.Len8(uint8(t.BitWidth)/8) - 1 // 8=>0, 16=>1, 32=>2, 64=>4
  1177  	if t.IsSigned {
  1178  		convertedType += int(deprecated.Int8)
  1179  	} else {
  1180  		convertedType += int(deprecated.Uint8)
  1181  	}
  1182  	return &convertedTypes[convertedType]
  1183  }
  1184  
  1185  func (t *intType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
  1186  	return t.baseType().NewColumnIndexer(sizeLimit)
  1187  }
  1188  
  1189  func (t *intType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
  1190  	return t.baseType().NewColumnBuffer(columnIndex, numValues)
  1191  }
  1192  
  1193  func (t *intType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
  1194  	return t.baseType().NewDictionary(columnIndex, numValues, data)
  1195  }
  1196  
  1197  func (t *intType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
  1198  	return t.baseType().NewPage(columnIndex, numValues, data)
  1199  }
  1200  
  1201  func (t *intType) NewValues(values []byte, offsets []uint32) encoding.Values {
  1202  	return t.baseType().NewValues(values, offsets)
  1203  }
  1204  
  1205  func (t *intType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
  1206  	return t.baseType().Encode(dst, src, enc)
  1207  }
  1208  
  1209  func (t *intType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
  1210  	return t.baseType().Decode(dst, src, enc)
  1211  }
  1212  
  1213  func (t *intType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
  1214  	return t.baseType().EstimateDecodeSize(numValues, src, enc)
  1215  }
  1216  
  1217  func (t *intType) AssignValue(dst reflect.Value, src Value) error {
  1218  	if t.BitWidth == 64 {
  1219  		return int64Type{}.AssignValue(dst, src)
  1220  	} else {
  1221  		return int32Type{}.AssignValue(dst, src)
  1222  	}
  1223  }
  1224  
  1225  func (t *intType) ConvertValue(val Value, typ Type) (Value, error) {
  1226  	if t.BitWidth == 64 {
  1227  		return int64Type{}.ConvertValue(val, typ)
  1228  	} else {
  1229  		return int32Type{}.ConvertValue(val, typ)
  1230  	}
  1231  }
  1232  
  1233  // Decimal constructs a leaf node of decimal logical type with the given
  1234  // scale, precision, and underlying type.
  1235  //
  1236  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#decimal
  1237  func Decimal(scale, precision int, typ Type) Node {
  1238  	switch typ.Kind() {
  1239  	case Int32, Int64, FixedLenByteArray:
  1240  	default:
  1241  		panic("DECIMAL node must annotate Int32, Int64 or FixedLenByteArray but got " + typ.String())
  1242  	}
  1243  	return Leaf(&decimalType{
  1244  		decimal: format.DecimalType{
  1245  			Scale:     int32(scale),
  1246  			Precision: int32(precision),
  1247  		},
  1248  		Type: typ,
  1249  	})
  1250  }
  1251  
  1252  type decimalType struct {
  1253  	decimal format.DecimalType
  1254  	Type
  1255  }
  1256  
  1257  func (t *decimalType) String() string { return t.decimal.String() }
  1258  
  1259  func (t *decimalType) LogicalType() *format.LogicalType {
  1260  	return &format.LogicalType{Decimal: &t.decimal}
  1261  }
  1262  
  1263  func (t *decimalType) ConvertedType() *deprecated.ConvertedType {
  1264  	return &convertedTypes[deprecated.Decimal]
  1265  }
  1266  
  1267  // String constructs a leaf node of UTF8 logical type.
  1268  //
  1269  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#string
  1270  func String() Node { return Leaf(&stringType{}) }
  1271  
  1272  type stringType format.StringType
  1273  
  1274  func (t *stringType) String() string { return (*format.StringType)(t).String() }
  1275  
  1276  func (t *stringType) Kind() Kind { return ByteArray }
  1277  
  1278  func (t *stringType) Length() int { return 0 }
  1279  
  1280  func (t *stringType) EstimateSize(n int) int { return byteArrayType{}.EstimateSize(n) }
  1281  
  1282  func (t *stringType) EstimateNumValues(n int) int { return byteArrayType{}.EstimateNumValues(n) }
  1283  
  1284  func (t *stringType) Compare(a, b Value) int {
  1285  	return bytes.Compare(a.byteArray(), b.byteArray())
  1286  }
  1287  
  1288  func (t *stringType) ColumnOrder() *format.ColumnOrder {
  1289  	return &typeDefinedColumnOrder
  1290  }
  1291  
  1292  func (t *stringType) PhysicalType() *format.Type {
  1293  	return &physicalTypes[ByteArray]
  1294  }
  1295  
  1296  func (t *stringType) LogicalType() *format.LogicalType {
  1297  	return &format.LogicalType{UTF8: (*format.StringType)(t)}
  1298  }
  1299  
  1300  func (t *stringType) ConvertedType() *deprecated.ConvertedType {
  1301  	return &convertedTypes[deprecated.UTF8]
  1302  }
  1303  
  1304  func (t *stringType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
  1305  	return newByteArrayColumnIndexer(sizeLimit)
  1306  }
  1307  
  1308  func (t *stringType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
  1309  	return newByteArrayDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
  1310  }
  1311  
  1312  func (t *stringType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
  1313  	return newByteArrayColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
  1314  }
  1315  
  1316  func (t *stringType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
  1317  	return newByteArrayPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
  1318  }
  1319  
  1320  func (t *stringType) NewValues(values []byte, offsets []uint32) encoding.Values {
  1321  	return encoding.ByteArrayValues(values, offsets)
  1322  }
  1323  
  1324  func (t *stringType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
  1325  	return encoding.EncodeByteArray(dst, src, enc)
  1326  }
  1327  
  1328  func (t *stringType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
  1329  	return encoding.DecodeByteArray(dst, src, enc)
  1330  }
  1331  
  1332  func (t *stringType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
  1333  	return byteArrayType{}.EstimateDecodeSize(numValues, src, enc)
  1334  }
  1335  
  1336  func (t *stringType) AssignValue(dst reflect.Value, src Value) error {
  1337  	return byteArrayType{}.AssignValue(dst, src)
  1338  }
  1339  
  1340  func (t *stringType) ConvertValue(val Value, typ Type) (Value, error) {
  1341  	switch t2 := typ.(type) {
  1342  	case *dateType:
  1343  		return convertDateToString(val)
  1344  	case *timeType:
  1345  		tz := t2.tz()
  1346  		if t2.Unit.Micros != nil {
  1347  			return convertTimeMicrosToString(val, tz)
  1348  		} else {
  1349  			return convertTimeMillisToString(val, tz)
  1350  		}
  1351  	}
  1352  	switch typ.Kind() {
  1353  	case Boolean:
  1354  		return convertBooleanToString(val)
  1355  	case Int32:
  1356  		return convertInt32ToString(val)
  1357  	case Int64:
  1358  		return convertInt64ToString(val)
  1359  	case Int96:
  1360  		return convertInt96ToString(val)
  1361  	case Float:
  1362  		return convertFloatToString(val)
  1363  	case Double:
  1364  		return convertDoubleToString(val)
  1365  	case ByteArray:
  1366  		return val, nil
  1367  	case FixedLenByteArray:
  1368  		return convertFixedLenByteArrayToString(val)
  1369  	default:
  1370  		return makeValueKind(ByteArray), nil
  1371  	}
  1372  }
  1373  
  1374  // UUID constructs a leaf node of UUID logical type.
  1375  //
  1376  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#uuid
  1377  func UUID() Node { return Leaf(&uuidType{}) }
  1378  
  1379  type uuidType format.UUIDType
  1380  
  1381  func (t *uuidType) String() string { return (*format.UUIDType)(t).String() }
  1382  
  1383  func (t *uuidType) Kind() Kind { return be128Type{}.Kind() }
  1384  
  1385  func (t *uuidType) Length() int { return be128Type{}.Length() }
  1386  
  1387  func (t *uuidType) EstimateSize(n int) int { return be128Type{}.EstimateSize(n) }
  1388  
  1389  func (t *uuidType) EstimateNumValues(n int) int { return be128Type{}.EstimateNumValues(n) }
  1390  
  1391  func (t *uuidType) Compare(a, b Value) int { return be128Type{}.Compare(a, b) }
  1392  
  1393  func (t *uuidType) ColumnOrder() *format.ColumnOrder { return &typeDefinedColumnOrder }
  1394  
  1395  func (t *uuidType) PhysicalType() *format.Type { return &physicalTypes[FixedLenByteArray] }
  1396  
  1397  func (t *uuidType) LogicalType() *format.LogicalType {
  1398  	return &format.LogicalType{UUID: (*format.UUIDType)(t)}
  1399  }
  1400  
  1401  func (t *uuidType) ConvertedType() *deprecated.ConvertedType { return nil }
  1402  
  1403  func (t *uuidType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
  1404  	return be128Type{}.NewColumnIndexer(sizeLimit)
  1405  }
  1406  
  1407  func (t *uuidType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
  1408  	return be128Type{}.NewDictionary(columnIndex, numValues, data)
  1409  }
  1410  
  1411  func (t *uuidType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
  1412  	return be128Type{}.NewColumnBuffer(columnIndex, numValues)
  1413  }
  1414  
  1415  func (t *uuidType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
  1416  	return be128Type{}.NewPage(columnIndex, numValues, data)
  1417  }
  1418  
  1419  func (t *uuidType) NewValues(values []byte, offsets []uint32) encoding.Values {
  1420  	return be128Type{}.NewValues(values, offsets)
  1421  }
  1422  
  1423  func (t *uuidType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
  1424  	return be128Type{}.Encode(dst, src, enc)
  1425  }
  1426  
  1427  func (t *uuidType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
  1428  	return be128Type{}.Decode(dst, src, enc)
  1429  }
  1430  
  1431  func (t *uuidType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
  1432  	return be128Type{}.EstimateDecodeSize(numValues, src, enc)
  1433  }
  1434  
  1435  func (t *uuidType) AssignValue(dst reflect.Value, src Value) error {
  1436  	return be128Type{}.AssignValue(dst, src)
  1437  }
  1438  
  1439  func (t *uuidType) ConvertValue(val Value, typ Type) (Value, error) {
  1440  	return be128Type{}.ConvertValue(val, typ)
  1441  }
  1442  
  1443  // Enum constructs a leaf node with a logical type representing enumerations.
  1444  //
  1445  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#enum
  1446  func Enum() Node { return Leaf(&enumType{}) }
  1447  
  1448  type enumType format.EnumType
  1449  
  1450  func (t *enumType) String() string { return (*format.EnumType)(t).String() }
  1451  
  1452  func (t *enumType) Kind() Kind { return new(stringType).Kind() }
  1453  
  1454  func (t *enumType) Length() int { return new(stringType).Length() }
  1455  
  1456  func (t *enumType) EstimateSize(n int) int { return new(stringType).EstimateSize(n) }
  1457  
  1458  func (t *enumType) EstimateNumValues(n int) int { return new(stringType).EstimateNumValues(n) }
  1459  
  1460  func (t *enumType) Compare(a, b Value) int { return new(stringType).Compare(a, b) }
  1461  
  1462  func (t *enumType) ColumnOrder() *format.ColumnOrder { return new(stringType).ColumnOrder() }
  1463  
  1464  func (t *enumType) PhysicalType() *format.Type { return new(stringType).PhysicalType() }
  1465  
  1466  func (t *enumType) LogicalType() *format.LogicalType {
  1467  	return &format.LogicalType{Enum: (*format.EnumType)(t)}
  1468  }
  1469  
  1470  func (t *enumType) ConvertedType() *deprecated.ConvertedType {
  1471  	return &convertedTypes[deprecated.Enum]
  1472  }
  1473  
  1474  func (t *enumType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
  1475  	return new(stringType).NewColumnIndexer(sizeLimit)
  1476  }
  1477  
  1478  func (t *enumType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
  1479  	return new(stringType).NewDictionary(columnIndex, numValues, data)
  1480  }
  1481  
  1482  func (t *enumType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
  1483  	return new(stringType).NewColumnBuffer(columnIndex, numValues)
  1484  }
  1485  
  1486  func (t *enumType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
  1487  	return new(stringType).NewPage(columnIndex, numValues, data)
  1488  }
  1489  
  1490  func (t *enumType) NewValues(values []byte, offsets []uint32) encoding.Values {
  1491  	return new(stringType).NewValues(values, offsets)
  1492  }
  1493  
  1494  func (t *enumType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
  1495  	return new(stringType).Encode(dst, src, enc)
  1496  }
  1497  
  1498  func (t *enumType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
  1499  	return new(stringType).Decode(dst, src, enc)
  1500  }
  1501  
  1502  func (t *enumType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
  1503  	return new(stringType).EstimateDecodeSize(numValues, src, enc)
  1504  }
  1505  
  1506  func (t *enumType) AssignValue(dst reflect.Value, src Value) error {
  1507  	return new(stringType).AssignValue(dst, src)
  1508  }
  1509  
  1510  func (t *enumType) ConvertValue(val Value, typ Type) (Value, error) {
  1511  	switch typ.(type) {
  1512  	case *byteArrayType, *stringType, *enumType:
  1513  		return val, nil
  1514  	default:
  1515  		return val, invalidConversion(val, "ENUM", typ.String())
  1516  	}
  1517  }
  1518  
  1519  // JSON constructs a leaf node of JSON logical type.
  1520  //
  1521  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#json
  1522  func JSON() Node { return Leaf(&jsonType{}) }
  1523  
  1524  type jsonType format.JsonType
  1525  
  1526  func (t *jsonType) String() string { return (*format.JsonType)(t).String() }
  1527  
  1528  func (t *jsonType) Kind() Kind { return byteArrayType{}.Kind() }
  1529  
  1530  func (t *jsonType) Length() int { return byteArrayType{}.Length() }
  1531  
  1532  func (t *jsonType) EstimateSize(n int) int { return byteArrayType{}.EstimateSize(n) }
  1533  
  1534  func (t *jsonType) EstimateNumValues(n int) int { return byteArrayType{}.EstimateNumValues(n) }
  1535  
  1536  func (t *jsonType) Compare(a, b Value) int { return byteArrayType{}.Compare(a, b) }
  1537  
  1538  func (t *jsonType) ColumnOrder() *format.ColumnOrder { return byteArrayType{}.ColumnOrder() }
  1539  
  1540  func (t *jsonType) PhysicalType() *format.Type { return byteArrayType{}.PhysicalType() }
  1541  
  1542  func (t *jsonType) LogicalType() *format.LogicalType {
  1543  	return &format.LogicalType{Json: (*format.JsonType)(t)}
  1544  }
  1545  
  1546  func (t *jsonType) ConvertedType() *deprecated.ConvertedType {
  1547  	return &convertedTypes[deprecated.Json]
  1548  }
  1549  
  1550  func (t *jsonType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
  1551  	return byteArrayType{}.NewColumnIndexer(sizeLimit)
  1552  }
  1553  
  1554  func (t *jsonType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
  1555  	return byteArrayType{}.NewDictionary(columnIndex, numValues, data)
  1556  }
  1557  
  1558  func (t *jsonType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
  1559  	return byteArrayType{}.NewColumnBuffer(columnIndex, numValues)
  1560  }
  1561  
  1562  func (t *jsonType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
  1563  	return byteArrayType{}.NewPage(columnIndex, numValues, data)
  1564  }
  1565  
  1566  func (t *jsonType) NewValues(values []byte, offsets []uint32) encoding.Values {
  1567  	return byteArrayType{}.NewValues(values, offsets)
  1568  }
  1569  
  1570  func (t *jsonType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
  1571  	return byteArrayType{}.Encode(dst, src, enc)
  1572  }
  1573  
  1574  func (t *jsonType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
  1575  	return byteArrayType{}.Decode(dst, src, enc)
  1576  }
  1577  
  1578  func (t *jsonType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
  1579  	return byteArrayType{}.EstimateDecodeSize(numValues, src, enc)
  1580  }
  1581  
  1582  func (t *jsonType) AssignValue(dst reflect.Value, src Value) error {
  1583  	// Assign value using ByteArrayType for BC...
  1584  	switch dst.Kind() {
  1585  	case reflect.String:
  1586  		return byteArrayType{}.AssignValue(dst, src)
  1587  	case reflect.Slice:
  1588  		if dst.Type().Elem().Kind() == reflect.Uint8 {
  1589  			return byteArrayType{}.AssignValue(dst, src)
  1590  		}
  1591  	}
  1592  
  1593  	// Otherwise handle with json.Unmarshal
  1594  	b := src.byteArray()
  1595  	val := reflect.New(dst.Type()).Elem()
  1596  	err := json.Unmarshal(b, val.Addr().Interface())
  1597  	if err != nil {
  1598  		return err
  1599  	}
  1600  	dst.Set(val)
  1601  	return nil
  1602  }
  1603  
  1604  func (t *jsonType) ConvertValue(val Value, typ Type) (Value, error) {
  1605  	switch typ.(type) {
  1606  	case *byteArrayType, *stringType, *jsonType:
  1607  		return val, nil
  1608  	default:
  1609  		return val, invalidConversion(val, "JSON", typ.String())
  1610  	}
  1611  }
  1612  
  1613  // BSON constructs a leaf node of BSON logical type.
  1614  //
  1615  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#bson
  1616  func BSON() Node { return Leaf(&bsonType{}) }
  1617  
  1618  type bsonType format.BsonType
  1619  
  1620  func (t *bsonType) String() string { return (*format.BsonType)(t).String() }
  1621  
  1622  func (t *bsonType) Kind() Kind { return byteArrayType{}.Kind() }
  1623  
  1624  func (t *bsonType) Length() int { return byteArrayType{}.Length() }
  1625  
  1626  func (t *bsonType) EstimateSize(n int) int { return byteArrayType{}.EstimateSize(n) }
  1627  
  1628  func (t *bsonType) EstimateNumValues(n int) int { return byteArrayType{}.EstimateNumValues(n) }
  1629  
  1630  func (t *bsonType) Compare(a, b Value) int { return byteArrayType{}.Compare(a, b) }
  1631  
  1632  func (t *bsonType) ColumnOrder() *format.ColumnOrder { return byteArrayType{}.ColumnOrder() }
  1633  
  1634  func (t *bsonType) PhysicalType() *format.Type { return byteArrayType{}.PhysicalType() }
  1635  
  1636  func (t *bsonType) LogicalType() *format.LogicalType {
  1637  	return &format.LogicalType{Bson: (*format.BsonType)(t)}
  1638  }
  1639  
  1640  func (t *bsonType) ConvertedType() *deprecated.ConvertedType {
  1641  	return &convertedTypes[deprecated.Bson]
  1642  }
  1643  
  1644  func (t *bsonType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
  1645  	return byteArrayType{}.NewColumnIndexer(sizeLimit)
  1646  }
  1647  
  1648  func (t *bsonType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
  1649  	return byteArrayType{}.NewDictionary(columnIndex, numValues, data)
  1650  }
  1651  
  1652  func (t *bsonType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
  1653  	return byteArrayType{}.NewColumnBuffer(columnIndex, numValues)
  1654  }
  1655  
  1656  func (t *bsonType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
  1657  	return byteArrayType{}.NewPage(columnIndex, numValues, data)
  1658  }
  1659  
  1660  func (t *bsonType) NewValues(values []byte, offsets []uint32) encoding.Values {
  1661  	return byteArrayType{}.NewValues(values, offsets)
  1662  }
  1663  
  1664  func (t *bsonType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
  1665  	return byteArrayType{}.Encode(dst, src, enc)
  1666  }
  1667  
  1668  func (t *bsonType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
  1669  	return byteArrayType{}.Decode(dst, src, enc)
  1670  }
  1671  
  1672  func (t *bsonType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
  1673  	return byteArrayType{}.EstimateDecodeSize(numValues, src, enc)
  1674  }
  1675  
  1676  func (t *bsonType) AssignValue(dst reflect.Value, src Value) error {
  1677  	return byteArrayType{}.AssignValue(dst, src)
  1678  }
  1679  
  1680  func (t *bsonType) ConvertValue(val Value, typ Type) (Value, error) {
  1681  	switch typ.(type) {
  1682  	case *byteArrayType, *bsonType:
  1683  		return val, nil
  1684  	default:
  1685  		return val, invalidConversion(val, "BSON", typ.String())
  1686  	}
  1687  }
  1688  
  1689  // Date constructs a leaf node of DATE logical type.
  1690  //
  1691  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#date
  1692  func Date() Node { return Leaf(&dateType{}) }
  1693  
  1694  type dateType format.DateType
  1695  
  1696  func (t *dateType) String() string { return (*format.DateType)(t).String() }
  1697  
  1698  func (t *dateType) Kind() Kind { return int32Type{}.Kind() }
  1699  
  1700  func (t *dateType) Length() int { return int32Type{}.Length() }
  1701  
  1702  func (t *dateType) EstimateSize(n int) int { return int32Type{}.EstimateSize(n) }
  1703  
  1704  func (t *dateType) EstimateNumValues(n int) int { return int32Type{}.EstimateNumValues(n) }
  1705  
  1706  func (t *dateType) Compare(a, b Value) int { return int32Type{}.Compare(a, b) }
  1707  
  1708  func (t *dateType) ColumnOrder() *format.ColumnOrder { return int32Type{}.ColumnOrder() }
  1709  
  1710  func (t *dateType) PhysicalType() *format.Type { return int32Type{}.PhysicalType() }
  1711  
  1712  func (t *dateType) LogicalType() *format.LogicalType {
  1713  	return &format.LogicalType{Date: (*format.DateType)(t)}
  1714  }
  1715  
  1716  func (t *dateType) ConvertedType() *deprecated.ConvertedType {
  1717  	return &convertedTypes[deprecated.Date]
  1718  }
  1719  
  1720  func (t *dateType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
  1721  	return int32Type{}.NewColumnIndexer(sizeLimit)
  1722  }
  1723  
  1724  func (t *dateType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
  1725  	return int32Type{}.NewDictionary(columnIndex, numValues, data)
  1726  }
  1727  
  1728  func (t *dateType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
  1729  	return int32Type{}.NewColumnBuffer(columnIndex, numValues)
  1730  }
  1731  
  1732  func (t *dateType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
  1733  	return int32Type{}.NewPage(columnIndex, numValues, data)
  1734  }
  1735  
  1736  func (t *dateType) NewValues(values []byte, offsets []uint32) encoding.Values {
  1737  	return int32Type{}.NewValues(values, offsets)
  1738  }
  1739  
  1740  func (t *dateType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
  1741  	return int32Type{}.Encode(dst, src, enc)
  1742  }
  1743  
  1744  func (t *dateType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
  1745  	return int32Type{}.Decode(dst, src, enc)
  1746  }
  1747  
  1748  func (t *dateType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
  1749  	return int32Type{}.EstimateDecodeSize(numValues, src, enc)
  1750  }
  1751  
  1752  func (t *dateType) AssignValue(dst reflect.Value, src Value) error {
  1753  	return int32Type{}.AssignValue(dst, src)
  1754  }
  1755  
  1756  func (t *dateType) ConvertValue(val Value, typ Type) (Value, error) {
  1757  	switch src := typ.(type) {
  1758  	case *stringType:
  1759  		return convertStringToDate(val, time.UTC)
  1760  	case *timestampType:
  1761  		return convertTimestampToDate(val, src.Unit, src.tz())
  1762  	}
  1763  	return int32Type{}.ConvertValue(val, typ)
  1764  }
  1765  
  1766  // TimeUnit represents units of time in the parquet type system.
  1767  type TimeUnit interface {
  1768  	// Returns the precision of the time unit as a time.Duration value.
  1769  	Duration() time.Duration
  1770  	// Converts the TimeUnit value to its representation in the parquet thrift
  1771  	// format.
  1772  	TimeUnit() format.TimeUnit
  1773  }
  1774  
  1775  var (
  1776  	Millisecond TimeUnit = &millisecond{}
  1777  	Microsecond TimeUnit = &microsecond{}
  1778  	Nanosecond  TimeUnit = &nanosecond{}
  1779  )
  1780  
  1781  type millisecond format.MilliSeconds
  1782  
  1783  func (u *millisecond) Duration() time.Duration { return time.Millisecond }
  1784  func (u *millisecond) TimeUnit() format.TimeUnit {
  1785  	return format.TimeUnit{Millis: (*format.MilliSeconds)(u)}
  1786  }
  1787  
  1788  type microsecond format.MicroSeconds
  1789  
  1790  func (u *microsecond) Duration() time.Duration { return time.Microsecond }
  1791  func (u *microsecond) TimeUnit() format.TimeUnit {
  1792  	return format.TimeUnit{Micros: (*format.MicroSeconds)(u)}
  1793  }
  1794  
  1795  type nanosecond format.NanoSeconds
  1796  
  1797  func (u *nanosecond) Duration() time.Duration { return time.Nanosecond }
  1798  func (u *nanosecond) TimeUnit() format.TimeUnit {
  1799  	return format.TimeUnit{Nanos: (*format.NanoSeconds)(u)}
  1800  }
  1801  
  1802  // Time constructs a leaf node of TIME logical type.
  1803  //
  1804  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#time
  1805  func Time(unit TimeUnit) Node {
  1806  	return Leaf(&timeType{IsAdjustedToUTC: true, Unit: unit.TimeUnit()})
  1807  }
  1808  
  1809  type timeType format.TimeType
  1810  
  1811  func (t *timeType) tz() *time.Location {
  1812  	if t.IsAdjustedToUTC {
  1813  		return time.UTC
  1814  	} else {
  1815  		return time.Local
  1816  	}
  1817  }
  1818  
  1819  func (t *timeType) baseType() Type {
  1820  	if t.useInt32() {
  1821  		return int32Type{}
  1822  	} else {
  1823  		return int64Type{}
  1824  	}
  1825  }
  1826  
  1827  func (t *timeType) useInt32() bool { return t.Unit.Millis != nil }
  1828  
  1829  func (t *timeType) useInt64() bool { return t.Unit.Micros != nil }
  1830  
  1831  func (t *timeType) String() string { return (*format.TimeType)(t).String() }
  1832  
  1833  func (t *timeType) Kind() Kind { return t.baseType().Kind() }
  1834  
  1835  func (t *timeType) Length() int { return t.baseType().Length() }
  1836  
  1837  func (t *timeType) EstimateSize(n int) int { return t.baseType().EstimateSize(n) }
  1838  
  1839  func (t *timeType) EstimateNumValues(n int) int { return t.baseType().EstimateNumValues(n) }
  1840  
  1841  func (t *timeType) Compare(a, b Value) int { return t.baseType().Compare(a, b) }
  1842  
  1843  func (t *timeType) ColumnOrder() *format.ColumnOrder { return t.baseType().ColumnOrder() }
  1844  
  1845  func (t *timeType) PhysicalType() *format.Type { return t.baseType().PhysicalType() }
  1846  
  1847  func (t *timeType) LogicalType() *format.LogicalType {
  1848  	return &format.LogicalType{Time: (*format.TimeType)(t)}
  1849  }
  1850  
  1851  func (t *timeType) ConvertedType() *deprecated.ConvertedType {
  1852  	switch {
  1853  	case t.useInt32():
  1854  		return &convertedTypes[deprecated.TimeMillis]
  1855  	case t.useInt64():
  1856  		return &convertedTypes[deprecated.TimeMicros]
  1857  	default:
  1858  		return nil
  1859  	}
  1860  }
  1861  
  1862  func (t *timeType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
  1863  	return t.baseType().NewColumnIndexer(sizeLimit)
  1864  }
  1865  
  1866  func (t *timeType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
  1867  	return t.baseType().NewColumnBuffer(columnIndex, numValues)
  1868  }
  1869  
  1870  func (t *timeType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
  1871  	return t.baseType().NewDictionary(columnIndex, numValues, data)
  1872  }
  1873  
  1874  func (t *timeType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
  1875  	return t.baseType().NewPage(columnIndex, numValues, data)
  1876  }
  1877  
  1878  func (t *timeType) NewValues(values []byte, offset []uint32) encoding.Values {
  1879  	return t.baseType().NewValues(values, offset)
  1880  }
  1881  
  1882  func (t *timeType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
  1883  	return t.baseType().Encode(dst, src, enc)
  1884  }
  1885  
  1886  func (t *timeType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
  1887  	return t.baseType().Decode(dst, src, enc)
  1888  }
  1889  
  1890  func (t *timeType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
  1891  	return t.baseType().EstimateDecodeSize(numValues, src, enc)
  1892  }
  1893  
  1894  func (t *timeType) AssignValue(dst reflect.Value, src Value) error {
  1895  	return t.baseType().AssignValue(dst, src)
  1896  }
  1897  
  1898  func (t *timeType) ConvertValue(val Value, typ Type) (Value, error) {
  1899  	switch src := typ.(type) {
  1900  	case *stringType:
  1901  		tz := t.tz()
  1902  		if t.Unit.Micros != nil {
  1903  			return convertStringToTimeMicros(val, tz)
  1904  		} else {
  1905  			return convertStringToTimeMillis(val, tz)
  1906  		}
  1907  	case *timestampType:
  1908  		tz := t.tz()
  1909  		if t.Unit.Micros != nil {
  1910  			return convertTimestampToTimeMicros(val, src.Unit, src.tz(), tz)
  1911  		} else {
  1912  			return convertTimestampToTimeMillis(val, src.Unit, src.tz(), tz)
  1913  		}
  1914  	}
  1915  	return t.baseType().ConvertValue(val, typ)
  1916  }
  1917  
  1918  // Timestamp constructs of leaf node of TIMESTAMP logical type.
  1919  //
  1920  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#timestamp
  1921  func Timestamp(unit TimeUnit) Node {
  1922  	return Leaf(&timestampType{IsAdjustedToUTC: true, Unit: unit.TimeUnit()})
  1923  }
  1924  
  1925  type timestampType format.TimestampType
  1926  
  1927  func (t *timestampType) tz() *time.Location {
  1928  	if t.IsAdjustedToUTC {
  1929  		return time.UTC
  1930  	} else {
  1931  		return time.Local
  1932  	}
  1933  }
  1934  
  1935  func (t *timestampType) String() string { return (*format.TimestampType)(t).String() }
  1936  
  1937  func (t *timestampType) Kind() Kind { return int64Type{}.Kind() }
  1938  
  1939  func (t *timestampType) Length() int { return int64Type{}.Length() }
  1940  
  1941  func (t *timestampType) EstimateSize(n int) int { return int64Type{}.EstimateSize(n) }
  1942  
  1943  func (t *timestampType) EstimateNumValues(n int) int { return int64Type{}.EstimateNumValues(n) }
  1944  
  1945  func (t *timestampType) Compare(a, b Value) int { return int64Type{}.Compare(a, b) }
  1946  
  1947  func (t *timestampType) ColumnOrder() *format.ColumnOrder { return int64Type{}.ColumnOrder() }
  1948  
  1949  func (t *timestampType) PhysicalType() *format.Type { return int64Type{}.PhysicalType() }
  1950  
  1951  func (t *timestampType) LogicalType() *format.LogicalType {
  1952  	return &format.LogicalType{Timestamp: (*format.TimestampType)(t)}
  1953  }
  1954  
  1955  func (t *timestampType) ConvertedType() *deprecated.ConvertedType {
  1956  	switch {
  1957  	case t.Unit.Millis != nil:
  1958  		return &convertedTypes[deprecated.TimestampMillis]
  1959  	case t.Unit.Micros != nil:
  1960  		return &convertedTypes[deprecated.TimestampMicros]
  1961  	default:
  1962  		return nil
  1963  	}
  1964  }
  1965  
  1966  func (t *timestampType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
  1967  	return int64Type{}.NewColumnIndexer(sizeLimit)
  1968  }
  1969  
  1970  func (t *timestampType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
  1971  	return int64Type{}.NewDictionary(columnIndex, numValues, data)
  1972  }
  1973  
  1974  func (t *timestampType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
  1975  	return int64Type{}.NewColumnBuffer(columnIndex, numValues)
  1976  }
  1977  
  1978  func (t *timestampType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
  1979  	return int64Type{}.NewPage(columnIndex, numValues, data)
  1980  }
  1981  
  1982  func (t *timestampType) NewValues(values []byte, offsets []uint32) encoding.Values {
  1983  	return int64Type{}.NewValues(values, offsets)
  1984  }
  1985  
  1986  func (t *timestampType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
  1987  	return int64Type{}.Encode(dst, src, enc)
  1988  }
  1989  
  1990  func (t *timestampType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
  1991  	return int64Type{}.Decode(dst, src, enc)
  1992  }
  1993  
  1994  func (t *timestampType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
  1995  	return int64Type{}.EstimateDecodeSize(numValues, src, enc)
  1996  }
  1997  
  1998  func (t *timestampType) AssignValue(dst reflect.Value, src Value) error {
  1999  	switch dst.Type() {
  2000  	case reflect.TypeOf(time.Time{}):
  2001  		unit := Nanosecond.TimeUnit()
  2002  		lt := t.LogicalType()
  2003  		if lt != nil && lt.Timestamp != nil {
  2004  			unit = lt.Timestamp.Unit
  2005  		}
  2006  
  2007  		nanos := src.int64()
  2008  		switch {
  2009  		case unit.Millis != nil:
  2010  			nanos = nanos * 1e6
  2011  		case unit.Micros != nil:
  2012  			nanos = nanos * 1e3
  2013  		}
  2014  
  2015  		val := time.Unix(0, nanos).UTC()
  2016  		dst.Set(reflect.ValueOf(val))
  2017  		return nil
  2018  	default:
  2019  		return int64Type{}.AssignValue(dst, src)
  2020  	}
  2021  }
  2022  
  2023  func (t *timestampType) ConvertValue(val Value, typ Type) (Value, error) {
  2024  	switch src := typ.(type) {
  2025  	case *timestampType:
  2026  		return convertTimestampToTimestamp(val, src.Unit, t.Unit)
  2027  	case *dateType:
  2028  		return convertDateToTimestamp(val, t.Unit, t.tz())
  2029  	}
  2030  	return int64Type{}.ConvertValue(val, typ)
  2031  }
  2032  
  2033  // List constructs a node of LIST logical type.
  2034  //
  2035  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
  2036  func List(of Node) Node {
  2037  	return listNode{Group{"list": Repeated(Group{"element": of})}}
  2038  }
  2039  
  2040  type listNode struct{ Group }
  2041  
  2042  func (listNode) Type() Type { return &listType{} }
  2043  
  2044  type listType format.ListType
  2045  
  2046  func (t *listType) String() string { return (*format.ListType)(t).String() }
  2047  
  2048  func (t *listType) Kind() Kind { panic("cannot call Kind on parquet LIST type") }
  2049  
  2050  func (t *listType) Length() int { return 0 }
  2051  
  2052  func (t *listType) EstimateSize(int) int { return 0 }
  2053  
  2054  func (t *listType) EstimateNumValues(int) int { return 0 }
  2055  
  2056  func (t *listType) Compare(Value, Value) int { panic("cannot compare values on parquet LIST type") }
  2057  
  2058  func (t *listType) ColumnOrder() *format.ColumnOrder { return nil }
  2059  
  2060  func (t *listType) PhysicalType() *format.Type { return nil }
  2061  
  2062  func (t *listType) LogicalType() *format.LogicalType {
  2063  	return &format.LogicalType{List: (*format.ListType)(t)}
  2064  }
  2065  
  2066  func (t *listType) ConvertedType() *deprecated.ConvertedType {
  2067  	return &convertedTypes[deprecated.List]
  2068  }
  2069  
  2070  func (t *listType) NewColumnIndexer(int) ColumnIndexer {
  2071  	panic("create create column indexer from parquet LIST type")
  2072  }
  2073  
  2074  func (t *listType) NewDictionary(int, int, encoding.Values) Dictionary {
  2075  	panic("cannot create dictionary from parquet LIST type")
  2076  }
  2077  
  2078  func (t *listType) NewColumnBuffer(int, int) ColumnBuffer {
  2079  	panic("cannot create column buffer from parquet LIST type")
  2080  }
  2081  
  2082  func (t *listType) NewPage(int, int, encoding.Values) Page {
  2083  	panic("cannot create page from parquet LIST type")
  2084  }
  2085  
  2086  func (t *listType) NewValues(values []byte, _ []uint32) encoding.Values {
  2087  	panic("cannot create values from parquet LIST type")
  2088  }
  2089  
  2090  func (t *listType) Encode(_ []byte, _ encoding.Values, _ encoding.Encoding) ([]byte, error) {
  2091  	panic("cannot encode parquet LIST type")
  2092  }
  2093  
  2094  func (t *listType) Decode(_ encoding.Values, _ []byte, _ encoding.Encoding) (encoding.Values, error) {
  2095  	panic("cannot decode parquet LIST type")
  2096  }
  2097  
  2098  func (t *listType) EstimateDecodeSize(_ int, _ []byte, _ encoding.Encoding) int {
  2099  	panic("cannot estimate decode size of parquet LIST type")
  2100  }
  2101  
  2102  func (t *listType) AssignValue(reflect.Value, Value) error {
  2103  	panic("cannot assign value to a parquet LIST type")
  2104  }
  2105  
  2106  func (t *listType) ConvertValue(Value, Type) (Value, error) {
  2107  	panic("cannot convert value to a parquet LIST type")
  2108  }
  2109  
  2110  // Map constructs a node of MAP logical type.
  2111  //
  2112  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#maps
  2113  func Map(key, value Node) Node {
  2114  	return mapNode{Group{
  2115  		"key_value": Repeated(Group{
  2116  			"key":   Required(key),
  2117  			"value": value,
  2118  		}),
  2119  	}}
  2120  }
  2121  
  2122  type mapNode struct{ Group }
  2123  
  2124  func (mapNode) Type() Type { return &mapType{} }
  2125  
  2126  type mapType format.MapType
  2127  
  2128  func (t *mapType) String() string { return (*format.MapType)(t).String() }
  2129  
  2130  func (t *mapType) Kind() Kind { panic("cannot call Kind on parquet MAP type") }
  2131  
  2132  func (t *mapType) Length() int { return 0 }
  2133  
  2134  func (t *mapType) EstimateSize(int) int { return 0 }
  2135  
  2136  func (t *mapType) EstimateNumValues(int) int { return 0 }
  2137  
  2138  func (t *mapType) Compare(Value, Value) int { panic("cannot compare values on parquet MAP type") }
  2139  
  2140  func (t *mapType) ColumnOrder() *format.ColumnOrder { return nil }
  2141  
  2142  func (t *mapType) PhysicalType() *format.Type { return nil }
  2143  
  2144  func (t *mapType) LogicalType() *format.LogicalType {
  2145  	return &format.LogicalType{Map: (*format.MapType)(t)}
  2146  }
  2147  
  2148  func (t *mapType) ConvertedType() *deprecated.ConvertedType {
  2149  	return &convertedTypes[deprecated.Map]
  2150  }
  2151  
  2152  func (t *mapType) NewColumnIndexer(int) ColumnIndexer {
  2153  	panic("create create column indexer from parquet MAP type")
  2154  }
  2155  
  2156  func (t *mapType) NewDictionary(int, int, encoding.Values) Dictionary {
  2157  	panic("cannot create dictionary from parquet MAP type")
  2158  }
  2159  
  2160  func (t *mapType) NewColumnBuffer(int, int) ColumnBuffer {
  2161  	panic("cannot create column buffer from parquet MAP type")
  2162  }
  2163  
  2164  func (t *mapType) NewPage(int, int, encoding.Values) Page {
  2165  	panic("cannot create page from parquet MAP type")
  2166  }
  2167  
  2168  func (t *mapType) NewValues(values []byte, _ []uint32) encoding.Values {
  2169  	panic("cannot create values from parquet MAP type")
  2170  }
  2171  
  2172  func (t *mapType) Encode(_ []byte, _ encoding.Values, _ encoding.Encoding) ([]byte, error) {
  2173  	panic("cannot encode parquet MAP type")
  2174  }
  2175  
  2176  func (t *mapType) Decode(_ encoding.Values, _ []byte, _ encoding.Encoding) (encoding.Values, error) {
  2177  	panic("cannot decode parquet MAP type")
  2178  }
  2179  
  2180  func (t *mapType) EstimateDecodeSize(_ int, _ []byte, _ encoding.Encoding) int {
  2181  	panic("cannot estimate decode size of parquet MAP type")
  2182  }
  2183  
  2184  func (t *mapType) AssignValue(reflect.Value, Value) error {
  2185  	panic("cannot assign value to a parquet MAP type")
  2186  }
  2187  
  2188  func (t *mapType) ConvertValue(Value, Type) (Value, error) {
  2189  	panic("cannot convert value to a parquet MAP type")
  2190  }
  2191  
  2192  type nullType format.NullType
  2193  
  2194  func (t *nullType) String() string { return (*format.NullType)(t).String() }
  2195  
  2196  func (t *nullType) Kind() Kind { return -1 }
  2197  
  2198  func (t *nullType) Length() int { return 0 }
  2199  
  2200  func (t *nullType) EstimateSize(int) int { return 0 }
  2201  
  2202  func (t *nullType) EstimateNumValues(int) int { return 0 }
  2203  
  2204  func (t *nullType) Compare(Value, Value) int { panic("cannot compare values on parquet NULL type") }
  2205  
  2206  func (t *nullType) ColumnOrder() *format.ColumnOrder { return nil }
  2207  
  2208  func (t *nullType) PhysicalType() *format.Type { return nil }
  2209  
  2210  func (t *nullType) LogicalType() *format.LogicalType {
  2211  	return &format.LogicalType{Unknown: (*format.NullType)(t)}
  2212  }
  2213  
  2214  func (t *nullType) ConvertedType() *deprecated.ConvertedType { return nil }
  2215  
  2216  func (t *nullType) NewColumnIndexer(int) ColumnIndexer {
  2217  	panic("create create column indexer from parquet NULL type")
  2218  }
  2219  
  2220  func (t *nullType) NewDictionary(int, int, encoding.Values) Dictionary {
  2221  	panic("cannot create dictionary from parquet NULL type")
  2222  }
  2223  
  2224  func (t *nullType) NewColumnBuffer(int, int) ColumnBuffer {
  2225  	panic("cannot create column buffer from parquet NULL type")
  2226  }
  2227  
  2228  func (t *nullType) NewPage(columnIndex, numValues int, _ encoding.Values) Page {
  2229  	return newNullPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
  2230  }
  2231  
  2232  func (t *nullType) NewValues(_ []byte, _ []uint32) encoding.Values {
  2233  	return encoding.Values{}
  2234  }
  2235  
  2236  func (t *nullType) Encode(dst []byte, _ encoding.Values, _ encoding.Encoding) ([]byte, error) {
  2237  	return dst[:0], nil
  2238  }
  2239  
  2240  func (t *nullType) Decode(dst encoding.Values, _ []byte, _ encoding.Encoding) (encoding.Values, error) {
  2241  	return dst, nil
  2242  }
  2243  
  2244  func (t *nullType) EstimateDecodeSize(_ int, _ []byte, _ encoding.Encoding) int {
  2245  	return 0
  2246  }
  2247  
  2248  func (t *nullType) AssignValue(reflect.Value, Value) error {
  2249  	return nil
  2250  }
  2251  
  2252  func (t *nullType) ConvertValue(val Value, _ Type) (Value, error) {
  2253  	return val, nil
  2254  }
  2255  
  2256  type groupType struct{}
  2257  
  2258  func (groupType) String() string { return "group" }
  2259  
  2260  func (groupType) Kind() Kind {
  2261  	panic("cannot call Kind on parquet group")
  2262  }
  2263  
  2264  func (groupType) Compare(Value, Value) int {
  2265  	panic("cannot compare values on parquet group")
  2266  }
  2267  
  2268  func (groupType) NewColumnIndexer(int) ColumnIndexer {
  2269  	panic("cannot create column indexer from parquet group")
  2270  }
  2271  
  2272  func (groupType) NewDictionary(int, int, encoding.Values) Dictionary {
  2273  	panic("cannot create dictionary from parquet group")
  2274  }
  2275  
  2276  func (t groupType) NewColumnBuffer(int, int) ColumnBuffer {
  2277  	panic("cannot create column buffer from parquet group")
  2278  }
  2279  
  2280  func (t groupType) NewPage(int, int, encoding.Values) Page {
  2281  	panic("cannot create page from parquet group")
  2282  }
  2283  
  2284  func (t groupType) NewValues(_ []byte, _ []uint32) encoding.Values {
  2285  	panic("cannot create values from parquet group")
  2286  }
  2287  
  2288  func (groupType) Encode(_ []byte, _ encoding.Values, _ encoding.Encoding) ([]byte, error) {
  2289  	panic("cannot encode parquet group")
  2290  }
  2291  
  2292  func (groupType) Decode(_ encoding.Values, _ []byte, _ encoding.Encoding) (encoding.Values, error) {
  2293  	panic("cannot decode parquet group")
  2294  }
  2295  
  2296  func (groupType) EstimateDecodeSize(_ int, _ []byte, _ encoding.Encoding) int {
  2297  	panic("cannot estimate decode size of parquet group")
  2298  }
  2299  
  2300  func (groupType) AssignValue(reflect.Value, Value) error {
  2301  	panic("cannot assign value to a parquet group")
  2302  }
  2303  
  2304  func (t groupType) ConvertValue(Value, Type) (Value, error) {
  2305  	panic("cannot convert value to a parquet group")
  2306  }
  2307  
  2308  func (groupType) Length() int { return 0 }
  2309  
  2310  func (groupType) EstimateSize(int) int { return 0 }
  2311  
  2312  func (groupType) EstimateNumValues(int) int { return 0 }
  2313  
  2314  func (groupType) ColumnOrder() *format.ColumnOrder { return nil }
  2315  
  2316  func (groupType) PhysicalType() *format.Type { return nil }
  2317  
  2318  func (groupType) LogicalType() *format.LogicalType { return nil }
  2319  
  2320  func (groupType) ConvertedType() *deprecated.ConvertedType { return nil }
  2321  
  2322  func checkTypeKindEqual(to, from Type) error {
  2323  	if to.Kind() != from.Kind() {
  2324  		return fmt.Errorf("cannot convert from parquet value of type %s to %s", from, to)
  2325  	}
  2326  	return nil
  2327  }