github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/type.go (about)

     1  package parquet
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"math/bits"
     7  	"time"
     8  
     9  	"github.com/vc42/parquet-go/deprecated"
    10  	"github.com/vc42/parquet-go/encoding"
    11  	"github.com/vc42/parquet-go/format"
    12  )
    13  
    14  // Kind is an enumeration type representing the physical types supported by the
    15  // parquet type system.
    16  type Kind int8
    17  
    18  const (
    19  	Boolean           Kind = Kind(format.Boolean)
    20  	Int32             Kind = Kind(format.Int32)
    21  	Int64             Kind = Kind(format.Int64)
    22  	Int96             Kind = Kind(format.Int96)
    23  	Float             Kind = Kind(format.Float)
    24  	Double            Kind = Kind(format.Double)
    25  	ByteArray         Kind = Kind(format.ByteArray)
    26  	FixedLenByteArray Kind = Kind(format.FixedLenByteArray)
    27  )
    28  
    29  // String returns a human-readable representation of the physical type.
    30  func (k Kind) String() string { return format.Type(k).String() }
    31  
    32  // Value constructs a value from k and v.
    33  //
    34  // The method panics if the data is not a valid representation of the value
    35  // kind; for example, if the kind is Int32 but the data is not 4 bytes long.
    36  func (k Kind) Value(v []byte) Value {
    37  	x, err := parseValue(k, v)
    38  	if err != nil {
    39  		panic(err)
    40  	}
    41  	return x
    42  }
    43  
    44  // The Type interface represents logical types of the parquet type system.
    45  //
    46  // Types are immutable and therefore safe to access from multiple goroutines.
    47  type Type interface {
    48  	// Returns a human-readable representation of the parquet type.
    49  	String() string
    50  
    51  	// Returns the Kind value representing the underlying physical type.
    52  	//
    53  	// The method panics if it is called on a group type.
    54  	Kind() Kind
    55  
    56  	// For integer and floating point physical types, the method returns the
    57  	// size of values in bits.
    58  	//
    59  	// For fixed-length byte arrays, the method returns the size of elements
    60  	// in bytes.
    61  	//
    62  	// For other types, the value is zero.
    63  	Length() int
    64  
    65  	// Returns an estimation of the number of bytes required to hold the given
    66  	// number of values of this type in memory.
    67  	//
    68  	// The method returns zero for group types.
    69  	EstimateSize(numValues int) int64
    70  
    71  	// Compares two values and returns a negative integer if a < b, positive if
    72  	// a > b, or zero if a == b.
    73  	//
    74  	// The values' Kind must match the type, otherwise the result is undefined.
    75  	//
    76  	// The method panics if it is called on a group type.
    77  	Compare(a, b Value) int
    78  
    79  	// ColumnOrder returns the type's column order. For group types, this method
    80  	// returns nil.
    81  	//
    82  	// The order describes the comparison logic implemented by the Less method.
    83  	//
    84  	// As an optimization, the method may return the same pointer across
    85  	// multiple calls. Applications must treat the returned value as immutable,
    86  	// mutating the value will result in undefined behavior.
    87  	ColumnOrder() *format.ColumnOrder
    88  
    89  	// Returns the physical type as a *format.Type value. For group types, this
    90  	// method returns nil.
    91  	//
    92  	// As an optimization, the method may return the same pointer across
    93  	// multiple calls. Applications must treat the returned value as immutable,
    94  	// mutating the value will result in undefined behavior.
    95  	PhysicalType() *format.Type
    96  
    97  	// Returns the logical type as a *format.LogicalType value. When the logical
    98  	// type is unknown, the method returns nil.
    99  	//
   100  	// As an optimization, the method may return the same pointer across
   101  	// multiple calls. Applications must treat the returned value as immutable,
   102  	// mutating the value will result in undefined behavior.
   103  	LogicalType() *format.LogicalType
   104  
   105  	// Returns the logical type's equivalent converted type. When there are
   106  	// no equivalent converted type, the method returns nil.
   107  	//
   108  	// As an optimization, the method may return the same pointer across
   109  	// multiple calls. Applications must treat the returned value as immutable,
   110  	// mutating the value will result in undefined behavior.
   111  	ConvertedType() *deprecated.ConvertedType
   112  
   113  	// Creates a column indexer for values of this type.
   114  	//
   115  	// The size limit is a hint to the column indexer that it is allowed to
   116  	// truncate the page boundaries to the given size. Only BYTE_ARRAY and
   117  	// FIXED_LEN_BYTE_ARRAY types currently take this value into account.
   118  	//
   119  	// A value of zero or less means no limits.
   120  	//
   121  	// The method panics if it is called on a group type.
   122  	NewColumnIndexer(sizeLimit int) ColumnIndexer
   123  
   124  	// Creates a row group buffer column for values of this type.
   125  	//
   126  	// Column buffers are created using the index of the column they are
   127  	// accumulating values in memory for (relative to the parent schema),
   128  	// and the size of their memory buffer.
   129  	//
   130  	// The application may give an estimate of the number of values it expects
   131  	// to write to the buffer as second argument. This estimate helps set the
   132  	// initialize buffer capacity but is not a hard limit, the underlying memory
   133  	// buffer will grown as needed to allow more values to be written. Programs
   134  	// may use the Size method of the column buffer (or the parent row group,
   135  	// when relevant) to determine how many bytes are being used, and perform a
   136  	// flush of the buffers to a storage layer.
   137  	//
   138  	// The method panics if it is called on a group type.
   139  	NewColumnBuffer(columnIndex, numValues int) ColumnBuffer
   140  
   141  	// Creates a dictionary holding values of this type.
   142  	//
   143  	// If the length of data is not zero, it must contain PLAIN encoded values
   144  	// of the dictionary.
   145  	//
   146  	// The dictionary retains the data buffer, it does not make a copy of it.
   147  	// If the application needs to share ownership of the memory buffer, it must
   148  	// ensure that it will not be modified while the page is in use, or it must
   149  	// make a copy of it prior to creating the dictionary.
   150  	//
   151  	// The method panics if it is called on a group type.
   152  	NewDictionary(columnIndex, numValues int, data []byte) Dictionary
   153  
   154  	// Creates a page belonging to a column at the given index, backed by the
   155  	// data buffer.
   156  	//
   157  	// If the length of data is not zero, it must contain PLAIN encoded values
   158  	// of the page.
   159  	//
   160  	// The page retains the data buffer, it does not make a copy of it. If the
   161  	// application needs to share ownership of the memory buffer, it must ensure
   162  	// that it will not be modified while the page is in use, or it must make a
   163  	// copy of it prior to creating the page.
   164  	//
   165  	// The method panics if the data is not a valid PLAIN encoded representation
   166  	// of the page values.
   167  	NewPage(columnIndex, numValues int, data []byte) Page
   168  
   169  	// Assuming the src buffer contains PLAIN encoded values of the type it is
   170  	// called on, applies the given encoding and produces the output to the dst
   171  	// buffer passed as first argument by dispatching the call to one of the
   172  	// encoding methods.
   173  	Encode(dst, src []byte, enc encoding.Encoding) ([]byte, error)
   174  
   175  	// Assuming the src buffer contains values encoding in the given encoding,
   176  	// decodes the input and produces the PLAIN encoded values into the dst
   177  	// output buffer passed as first argument by dispatching the call to one
   178  	// of the encoding methods.
   179  	Decode(dst, src []byte, enc encoding.Encoding) ([]byte, error)
   180  }
   181  
   182  var (
   183  	BooleanType   Type = booleanType{}
   184  	Int32Type     Type = int32Type{}
   185  	Int64Type     Type = int64Type{}
   186  	Int96Type     Type = int96Type{}
   187  	FloatType     Type = floatType{}
   188  	DoubleType    Type = doubleType{}
   189  	ByteArrayType Type = byteArrayType{}
   190  )
   191  
   192  // In the current parquet version supported by this library, only type-defined
   193  // orders are supported.
   194  var typeDefinedColumnOrder = format.ColumnOrder{
   195  	TypeOrder: new(format.TypeDefinedOrder),
   196  }
   197  
   198  var physicalTypes = [...]format.Type{
   199  	0: format.Boolean,
   200  	1: format.Int32,
   201  	2: format.Int64,
   202  	3: format.Int96,
   203  	4: format.Float,
   204  	5: format.Double,
   205  	6: format.ByteArray,
   206  	7: format.FixedLenByteArray,
   207  }
   208  
   209  var convertedTypes = [...]deprecated.ConvertedType{
   210  	0:  deprecated.UTF8,
   211  	1:  deprecated.Map,
   212  	2:  deprecated.MapKeyValue,
   213  	3:  deprecated.List,
   214  	4:  deprecated.Enum,
   215  	5:  deprecated.Decimal,
   216  	6:  deprecated.Date,
   217  	7:  deprecated.TimeMillis,
   218  	8:  deprecated.TimeMicros,
   219  	9:  deprecated.TimestampMillis,
   220  	10: deprecated.TimestampMicros,
   221  	11: deprecated.Uint8,
   222  	12: deprecated.Uint16,
   223  	13: deprecated.Uint32,
   224  	14: deprecated.Uint64,
   225  	15: deprecated.Int8,
   226  	16: deprecated.Int16,
   227  	17: deprecated.Int32,
   228  	18: deprecated.Int64,
   229  	19: deprecated.Json,
   230  	20: deprecated.Bson,
   231  	21: deprecated.Interval,
   232  }
   233  
   234  type booleanType struct{}
   235  
   236  func (t booleanType) String() string                           { return "BOOLEAN" }
   237  func (t booleanType) Kind() Kind                               { return Boolean }
   238  func (t booleanType) Length() int                              { return 1 }
   239  func (t booleanType) EstimateSize(n int) int64                 { return (int64(n) + 7) / 8 }
   240  func (t booleanType) Compare(a, b Value) int                   { return compareBool(a.Boolean(), b.Boolean()) }
   241  func (t booleanType) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
   242  func (t booleanType) LogicalType() *format.LogicalType         { return nil }
   243  func (t booleanType) ConvertedType() *deprecated.ConvertedType { return nil }
   244  func (t booleanType) PhysicalType() *format.Type               { return &physicalTypes[Boolean] }
   245  
   246  func (t booleanType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   247  	return newBooleanColumnIndexer()
   248  }
   249  
   250  func (t booleanType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   251  	return newBooleanColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   252  }
   253  
   254  func (t booleanType) NewDictionary(columnIndex, numValues int, data []byte) Dictionary {
   255  	return newBooleanDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   256  }
   257  
   258  func (t booleanType) NewPage(columnIndex, numValues int, data []byte) Page {
   259  	return newBooleanPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   260  }
   261  
   262  func (t booleanType) Encode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   263  	return enc.EncodeBoolean(dst, src)
   264  }
   265  
   266  func (t booleanType) Decode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   267  	return enc.DecodeBoolean(dst, src)
   268  }
   269  
   270  type int32Type struct{}
   271  
   272  func (t int32Type) String() string                           { return "INT32" }
   273  func (t int32Type) Kind() Kind                               { return Int32 }
   274  func (t int32Type) Length() int                              { return 32 }
   275  func (t int32Type) EstimateSize(n int) int64                 { return 4 * int64(n) }
   276  func (t int32Type) Compare(a, b Value) int                   { return compareInt32(a.Int32(), b.Int32()) }
   277  func (t int32Type) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
   278  func (t int32Type) LogicalType() *format.LogicalType         { return nil }
   279  func (t int32Type) ConvertedType() *deprecated.ConvertedType { return nil }
   280  func (t int32Type) PhysicalType() *format.Type               { return &physicalTypes[Int32] }
   281  
   282  func (t int32Type) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   283  	return newInt32ColumnIndexer()
   284  }
   285  
   286  func (t int32Type) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   287  	return newInt32ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   288  }
   289  
   290  func (t int32Type) NewDictionary(columnIndex, numValues int, data []byte) Dictionary {
   291  	return newInt32Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   292  }
   293  
   294  func (t int32Type) NewPage(columnIndex, numValues int, data []byte) Page {
   295  	return newInt32Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   296  }
   297  
   298  func (t int32Type) Encode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   299  	return enc.EncodeInt32(dst, src)
   300  }
   301  
   302  func (t int32Type) Decode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   303  	return enc.DecodeInt32(dst, src)
   304  }
   305  
   306  type int64Type struct{}
   307  
   308  func (t int64Type) String() string                           { return "INT64" }
   309  func (t int64Type) Kind() Kind                               { return Int64 }
   310  func (t int64Type) Length() int                              { return 64 }
   311  func (t int64Type) EstimateSize(n int) int64                 { return 8 * int64(n) }
   312  func (t int64Type) Compare(a, b Value) int                   { return compareInt64(a.Int64(), b.Int64()) }
   313  func (t int64Type) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
   314  func (t int64Type) LogicalType() *format.LogicalType         { return nil }
   315  func (t int64Type) ConvertedType() *deprecated.ConvertedType { return nil }
   316  func (t int64Type) PhysicalType() *format.Type               { return &physicalTypes[Int64] }
   317  
   318  func (t int64Type) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   319  	return newInt64ColumnIndexer()
   320  }
   321  
   322  func (t int64Type) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   323  	return newInt64ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   324  }
   325  
   326  func (t int64Type) NewDictionary(columnIndex, numValues int, data []byte) Dictionary {
   327  	return newInt64Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   328  }
   329  
   330  func (t int64Type) NewPage(columnIndex, numValues int, data []byte) Page {
   331  	return newInt64Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   332  }
   333  
   334  func (t int64Type) Encode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   335  	return enc.EncodeInt64(dst, src)
   336  }
   337  
   338  func (t int64Type) Decode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   339  	return enc.DecodeInt64(dst, src)
   340  }
   341  
   342  type int96Type struct{}
   343  
   344  func (t int96Type) String() string { return "INT96" }
   345  
   346  func (t int96Type) Kind() Kind                               { return Int96 }
   347  func (t int96Type) Length() int                              { return 96 }
   348  func (t int96Type) EstimateSize(n int) int64                 { return 12 * int64(n) }
   349  func (t int96Type) Compare(a, b Value) int                   { return compareInt96(a.Int96(), b.Int96()) }
   350  func (t int96Type) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
   351  func (t int96Type) LogicalType() *format.LogicalType         { return nil }
   352  func (t int96Type) ConvertedType() *deprecated.ConvertedType { return nil }
   353  func (t int96Type) PhysicalType() *format.Type               { return &physicalTypes[Int96] }
   354  
   355  func (t int96Type) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   356  	return newInt96ColumnIndexer()
   357  }
   358  
   359  func (t int96Type) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   360  	return newInt96ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   361  }
   362  
   363  func (t int96Type) NewDictionary(columnIndex, numValues int, data []byte) Dictionary {
   364  	return newInt96Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   365  }
   366  
   367  func (t int96Type) NewPage(columnIndex, numValues int, data []byte) Page {
   368  	return newInt96Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   369  }
   370  
   371  func (t int96Type) Encode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   372  	return enc.EncodeInt96(dst, src)
   373  }
   374  
   375  func (t int96Type) Decode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   376  	return enc.DecodeInt96(dst, src)
   377  }
   378  
   379  type floatType struct{}
   380  
   381  func (t floatType) String() string                           { return "FLOAT" }
   382  func (t floatType) Kind() Kind                               { return Float }
   383  func (t floatType) Length() int                              { return 32 }
   384  func (t floatType) EstimateSize(n int) int64                 { return 4 * int64(n) }
   385  func (t floatType) Compare(a, b Value) int                   { return compareFloat32(a.Float(), b.Float()) }
   386  func (t floatType) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
   387  func (t floatType) LogicalType() *format.LogicalType         { return nil }
   388  func (t floatType) ConvertedType() *deprecated.ConvertedType { return nil }
   389  func (t floatType) PhysicalType() *format.Type               { return &physicalTypes[Float] }
   390  
   391  func (t floatType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   392  	return newFloatColumnIndexer()
   393  }
   394  
   395  func (t floatType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   396  	return newFloatColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   397  }
   398  
   399  func (t floatType) NewDictionary(columnIndex, numValues int, data []byte) Dictionary {
   400  	return newFloatDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   401  }
   402  
   403  func (t floatType) NewPage(columnIndex, numValues int, data []byte) Page {
   404  	return newFloatPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   405  }
   406  
   407  func (t floatType) Encode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   408  	return enc.EncodeFloat(dst, src)
   409  }
   410  
   411  func (t floatType) Decode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   412  	return enc.DecodeFloat(dst, src)
   413  }
   414  
   415  type doubleType struct{}
   416  
   417  func (t doubleType) String() string                           { return "DOUBLE" }
   418  func (t doubleType) Kind() Kind                               { return Double }
   419  func (t doubleType) Length() int                              { return 64 }
   420  func (t doubleType) EstimateSize(n int) int64                 { return 8 * int64(n) }
   421  func (t doubleType) Compare(a, b Value) int                   { return compareFloat64(a.Double(), b.Double()) }
   422  func (t doubleType) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
   423  func (t doubleType) LogicalType() *format.LogicalType         { return nil }
   424  func (t doubleType) ConvertedType() *deprecated.ConvertedType { return nil }
   425  func (t doubleType) PhysicalType() *format.Type               { return &physicalTypes[Double] }
   426  
   427  func (t doubleType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   428  	return newDoubleColumnIndexer()
   429  }
   430  
   431  func (t doubleType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   432  	return newDoubleColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   433  }
   434  
   435  func (t doubleType) NewDictionary(columnIndex, numValues int, data []byte) Dictionary {
   436  	return newDoubleDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   437  }
   438  
   439  func (t doubleType) NewPage(columnIndex, numValues int, data []byte) Page {
   440  	return newDoublePage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   441  }
   442  
   443  func (t doubleType) Encode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   444  	return enc.EncodeDouble(dst, src)
   445  }
   446  
   447  func (t doubleType) Decode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   448  	return enc.DecodeDouble(dst, src)
   449  }
   450  
   451  type byteArrayType struct{}
   452  
   453  func (t byteArrayType) String() string                           { return "BYTE_ARRAY" }
   454  func (t byteArrayType) Kind() Kind                               { return ByteArray }
   455  func (t byteArrayType) Length() int                              { return 0 }
   456  func (t byteArrayType) EstimateSize(n int) int64                 { return 10 * int64(n) }
   457  func (t byteArrayType) Compare(a, b Value) int                   { return bytes.Compare(a.ByteArray(), b.ByteArray()) }
   458  func (t byteArrayType) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
   459  func (t byteArrayType) LogicalType() *format.LogicalType         { return nil }
   460  func (t byteArrayType) ConvertedType() *deprecated.ConvertedType { return nil }
   461  func (t byteArrayType) PhysicalType() *format.Type               { return &physicalTypes[ByteArray] }
   462  
   463  func (t byteArrayType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   464  	return newByteArrayColumnIndexer(sizeLimit)
   465  }
   466  
   467  func (t byteArrayType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   468  	return newByteArrayColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   469  }
   470  
   471  func (t byteArrayType) NewDictionary(columnIndex, numValues int, data []byte) Dictionary {
   472  	return newByteArrayDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   473  }
   474  
   475  func (t byteArrayType) NewPage(columnIndex, numValues int, data []byte) Page {
   476  	return newByteArrayPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   477  }
   478  
   479  func (t byteArrayType) Encode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   480  	return enc.EncodeByteArray(dst, src)
   481  }
   482  
   483  func (t byteArrayType) Decode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   484  	return enc.DecodeByteArray(dst, src)
   485  }
   486  
   487  type fixedLenByteArrayType struct{ length int }
   488  
   489  func (t fixedLenByteArrayType) String() string {
   490  	return fmt.Sprintf("FIXED_LEN_BYTE_ARRAY(%d)", t.length)
   491  }
   492  
   493  func (t fixedLenByteArrayType) Kind() Kind { return FixedLenByteArray }
   494  
   495  func (t fixedLenByteArrayType) Length() int { return t.length }
   496  
   497  func (t fixedLenByteArrayType) EstimateSize(n int) int64 { return int64(t.length) * int64(n) }
   498  
   499  func (t fixedLenByteArrayType) Compare(a, b Value) int {
   500  	return bytes.Compare(a.ByteArray(), b.ByteArray())
   501  }
   502  
   503  func (t fixedLenByteArrayType) ColumnOrder() *format.ColumnOrder { return &typeDefinedColumnOrder }
   504  
   505  func (t fixedLenByteArrayType) LogicalType() *format.LogicalType { return nil }
   506  
   507  func (t fixedLenByteArrayType) ConvertedType() *deprecated.ConvertedType { return nil }
   508  
   509  func (t fixedLenByteArrayType) PhysicalType() *format.Type { return &physicalTypes[FixedLenByteArray] }
   510  
   511  func (t fixedLenByteArrayType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   512  	return newFixedLenByteArrayColumnIndexer(t.length, sizeLimit)
   513  }
   514  
   515  func (t fixedLenByteArrayType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   516  	return newFixedLenByteArrayColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   517  }
   518  
   519  func (t fixedLenByteArrayType) NewDictionary(columnIndex, numValues int, data []byte) Dictionary {
   520  	return newFixedLenByteArrayDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   521  }
   522  
   523  func (t fixedLenByteArrayType) NewPage(columnIndex, numValues int, data []byte) Page {
   524  	return newFixedLenByteArrayPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   525  }
   526  
   527  func (t fixedLenByteArrayType) Encode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   528  	return enc.EncodeFixedLenByteArray(dst, src, t.length)
   529  }
   530  
   531  func (t fixedLenByteArrayType) Decode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   532  	return enc.DecodeFixedLenByteArray(dst, src, t.length)
   533  }
   534  
   535  // BE128 stands for "big-endian 128 bits". This type is used as a special case
   536  // for fixed-length byte arrays of 16 bytes, which are commonly used to
   537  // represent columns of random unique identifiers such as UUIDs.
   538  //
   539  // Comparisons of BE128 values use the natural byte order, the zeroth byte is
   540  // the most significant byte.
   541  //
   542  // The special case is intended to provide optimizations based on the knowledge
   543  // that the values are 16 bytes long. Stronger type checking can also be applied
   544  // by the compiler when using [16]byte values rather than []byte, reducing the
   545  // risk of errors on these common code paths.
   546  type be128Type struct{}
   547  
   548  func (t be128Type) String() string { return "FIXED_LEN_BYTE_ARRAY(16)" }
   549  
   550  func (t be128Type) Kind() Kind { return FixedLenByteArray }
   551  
   552  func (t be128Type) Length() int { return 16 }
   553  
   554  func (t be128Type) EstimateSize(n int) int64 { return 16 * int64(n) }
   555  
   556  func (t be128Type) Compare(a, b Value) int {
   557  	return compareBE128((*[16]byte)(a.ByteArray()), (*[16]byte)(b.ByteArray()))
   558  }
   559  
   560  func (t be128Type) ColumnOrder() *format.ColumnOrder { return &typeDefinedColumnOrder }
   561  
   562  func (t be128Type) LogicalType() *format.LogicalType { return nil }
   563  
   564  func (t be128Type) ConvertedType() *deprecated.ConvertedType { return nil }
   565  
   566  func (t be128Type) PhysicalType() *format.Type { return &physicalTypes[FixedLenByteArray] }
   567  
   568  func (t be128Type) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   569  	return newBE128ColumnIndexer()
   570  }
   571  
   572  func (t be128Type) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   573  	return newBE128ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   574  }
   575  
   576  func (t be128Type) NewDictionary(columnIndex, numValues int, data []byte) Dictionary {
   577  	return newBE128Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   578  }
   579  
   580  func (t be128Type) NewPage(columnIndex, numValues int, data []byte) Page {
   581  	return newBE128Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   582  }
   583  
   584  func (t be128Type) Encode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   585  	return enc.EncodeFixedLenByteArray(dst, src, 16)
   586  }
   587  
   588  func (t be128Type) Decode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   589  	return enc.DecodeFixedLenByteArray(dst, src, 16)
   590  }
   591  
   592  // FixedLenByteArrayType constructs a type for fixed-length values of the given
   593  // size (in bytes).
   594  func FixedLenByteArrayType(length int) Type {
   595  	switch length {
   596  	case 16:
   597  		return be128Type{}
   598  	default:
   599  		return fixedLenByteArrayType{length: length}
   600  	}
   601  }
   602  
   603  // Int constructs a leaf node of signed integer logical type of the given bit
   604  // width.
   605  //
   606  // The bit width must be one of 8, 16, 32, 64, or the function will panic.
   607  func Int(bitWidth int) Node {
   608  	return Leaf(integerType(bitWidth, &signedIntTypes))
   609  }
   610  
   611  // Uint constructs a leaf node of unsigned integer logical type of the given
   612  // bit width.
   613  //
   614  // The bit width must be one of 8, 16, 32, 64, or the function will panic.
   615  func Uint(bitWidth int) Node {
   616  	return Leaf(integerType(bitWidth, &unsignedIntTypes))
   617  }
   618  
   619  func integerType(bitWidth int, types *[4]intType) *intType {
   620  	switch bitWidth {
   621  	case 8:
   622  		return &types[0]
   623  	case 16:
   624  		return &types[1]
   625  	case 32:
   626  		return &types[2]
   627  	case 64:
   628  		return &types[3]
   629  	default:
   630  		panic(fmt.Sprintf("cannot create a %d bits parquet integer node", bitWidth))
   631  	}
   632  }
   633  
   634  var signedIntTypes = [...]intType{
   635  	{BitWidth: 8, IsSigned: true},
   636  	{BitWidth: 16, IsSigned: true},
   637  	{BitWidth: 32, IsSigned: true},
   638  	{BitWidth: 64, IsSigned: true},
   639  }
   640  
   641  var unsignedIntTypes = [...]intType{
   642  	{BitWidth: 8, IsSigned: false},
   643  	{BitWidth: 16, IsSigned: false},
   644  	{BitWidth: 32, IsSigned: false},
   645  	{BitWidth: 64, IsSigned: false},
   646  }
   647  
   648  type intType format.IntType
   649  
   650  func (t *intType) String() string { return (*format.IntType)(t).String() }
   651  
   652  func (t *intType) Kind() Kind {
   653  	if t.BitWidth == 64 {
   654  		return Int64
   655  	} else {
   656  		return Int32
   657  	}
   658  }
   659  
   660  func (t *intType) Length() int { return int(t.BitWidth) }
   661  
   662  func (t *intType) EstimateSize(n int) int64 { return int64(t.BitWidth/8) * int64(n) }
   663  
   664  func (t *intType) Compare(a, b Value) int {
   665  	if t.BitWidth == 64 {
   666  		i1 := a.Int64()
   667  		i2 := b.Int64()
   668  		if t.IsSigned {
   669  			return compareInt64(i1, i2)
   670  		} else {
   671  			return compareUint64(uint64(i1), uint64(i2))
   672  		}
   673  	} else {
   674  		i1 := a.Int32()
   675  		i2 := b.Int32()
   676  		if t.IsSigned {
   677  			return compareInt32(i1, i2)
   678  		} else {
   679  			return compareUint32(uint32(i1), uint32(i2))
   680  		}
   681  	}
   682  }
   683  
   684  func (t *intType) ColumnOrder() *format.ColumnOrder {
   685  	return &typeDefinedColumnOrder
   686  }
   687  
   688  func (t *intType) PhysicalType() *format.Type {
   689  	if t.BitWidth == 64 {
   690  		return &physicalTypes[Int64]
   691  	} else {
   692  		return &physicalTypes[Int32]
   693  	}
   694  }
   695  
   696  func (t *intType) LogicalType() *format.LogicalType {
   697  	return &format.LogicalType{Integer: (*format.IntType)(t)}
   698  }
   699  
   700  func (t *intType) ConvertedType() *deprecated.ConvertedType {
   701  	convertedType := bits.Len8(uint8(t.BitWidth)/8) - 1 // 8=>0, 16=>1, 32=>2, 64=>4
   702  	if t.IsSigned {
   703  		convertedType += int(deprecated.Int8)
   704  	} else {
   705  		convertedType += int(deprecated.Uint8)
   706  	}
   707  	return &convertedTypes[convertedType]
   708  }
   709  
   710  func (t *intType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   711  	if t.IsSigned {
   712  		if t.BitWidth == 64 {
   713  			return newInt64ColumnIndexer()
   714  		} else {
   715  			return newInt32ColumnIndexer()
   716  		}
   717  	} else {
   718  		if t.BitWidth == 64 {
   719  			return newUint64ColumnIndexer()
   720  		} else {
   721  			return newUint32ColumnIndexer()
   722  		}
   723  	}
   724  }
   725  
   726  func (t *intType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   727  	if t.IsSigned {
   728  		if t.BitWidth == 64 {
   729  			return newInt64ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   730  		} else {
   731  			return newInt32ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   732  		}
   733  	} else {
   734  		if t.BitWidth == 64 {
   735  			return newUint64ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   736  		} else {
   737  			return newUint32ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   738  		}
   739  	}
   740  }
   741  
   742  func (t *intType) NewDictionary(columnIndex, numValues int, data []byte) Dictionary {
   743  	if t.IsSigned {
   744  		if t.BitWidth == 64 {
   745  			return newInt64Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   746  		} else {
   747  			return newInt32Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   748  		}
   749  	} else {
   750  		if t.BitWidth == 64 {
   751  			return newUint64Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   752  		} else {
   753  			return newUint32Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   754  		}
   755  	}
   756  }
   757  
   758  func (t *intType) NewPage(columnIndex, numValues int, data []byte) Page {
   759  	if t.IsSigned {
   760  		if t.BitWidth == 64 {
   761  			return newInt64Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   762  		} else {
   763  			return newInt32Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   764  		}
   765  	} else {
   766  		if t.BitWidth == 64 {
   767  			return newUint64Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   768  		} else {
   769  			return newUint32Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   770  		}
   771  	}
   772  }
   773  
   774  func (t *intType) Encode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   775  	if t.BitWidth == 64 {
   776  		return enc.EncodeInt64(dst, src)
   777  	} else {
   778  		return enc.EncodeInt32(dst, src)
   779  	}
   780  }
   781  
   782  func (t *intType) Decode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   783  	if t.BitWidth == 64 {
   784  		return enc.DecodeInt64(dst, src)
   785  	} else {
   786  		return enc.DecodeInt32(dst, src)
   787  	}
   788  }
   789  
   790  // Decimal constructs a leaf node of decimal logical type with the given
   791  // scale, precision, and underlying type.
   792  //
   793  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#decimal
   794  func Decimal(scale, precision int, typ Type) Node {
   795  	switch typ.Kind() {
   796  	case Int32, Int64, FixedLenByteArray:
   797  	default:
   798  		panic("DECIMAL node must annotate Int32, Int64 or FixedLenByteArray but got " + typ.String())
   799  	}
   800  	return Leaf(&decimalType{
   801  		decimal: format.DecimalType{
   802  			Scale:     int32(scale),
   803  			Precision: int32(precision),
   804  		},
   805  		Type: typ,
   806  	})
   807  }
   808  
   809  type decimalType struct {
   810  	decimal format.DecimalType
   811  	Type
   812  }
   813  
   814  func (t *decimalType) String() string { return t.decimal.String() }
   815  
   816  func (t *decimalType) LogicalType() *format.LogicalType {
   817  	return &format.LogicalType{Decimal: &t.decimal}
   818  }
   819  
   820  func (t *decimalType) ConvertedType() *deprecated.ConvertedType {
   821  	return &convertedTypes[deprecated.Decimal]
   822  }
   823  
   824  // String constructs a leaf node of UTF8 logical type.
   825  //
   826  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#string
   827  func String() Node { return Leaf(&stringType{}) }
   828  
   829  type stringType format.StringType
   830  
   831  func (t *stringType) String() string { return (*format.StringType)(t).String() }
   832  
   833  func (t *stringType) Kind() Kind { return ByteArray }
   834  
   835  func (t *stringType) Length() int { return 0 }
   836  
   837  func (t *stringType) EstimateSize(n int) int64 { return 10 * int64(n) }
   838  
   839  func (t *stringType) Compare(a, b Value) int {
   840  	return bytes.Compare(a.ByteArray(), b.ByteArray())
   841  }
   842  
   843  func (t *stringType) ColumnOrder() *format.ColumnOrder {
   844  	return &typeDefinedColumnOrder
   845  }
   846  
   847  func (t *stringType) PhysicalType() *format.Type {
   848  	return &physicalTypes[ByteArray]
   849  }
   850  
   851  func (t *stringType) LogicalType() *format.LogicalType {
   852  	return &format.LogicalType{UTF8: (*format.StringType)(t)}
   853  }
   854  
   855  func (t *stringType) ConvertedType() *deprecated.ConvertedType {
   856  	return &convertedTypes[deprecated.UTF8]
   857  }
   858  
   859  func (t *stringType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   860  	return newByteArrayColumnIndexer(sizeLimit)
   861  }
   862  
   863  func (t *stringType) NewDictionary(columnIndex, numValues int, data []byte) Dictionary {
   864  	return newByteArrayDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   865  }
   866  
   867  func (t *stringType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   868  	return newByteArrayColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   869  }
   870  
   871  func (t *stringType) NewPage(columnIndex, numValues int, data []byte) Page {
   872  	return newByteArrayPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   873  }
   874  
   875  func (t *stringType) Encode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   876  	return enc.EncodeByteArray(dst, src)
   877  }
   878  
   879  func (t *stringType) Decode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   880  	return enc.DecodeByteArray(dst, src)
   881  }
   882  
   883  // UUID constructs a leaf node of UUID logical type.
   884  //
   885  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#uuid
   886  func UUID() Node { return Leaf(&uuidType{}) }
   887  
   888  type uuidType format.UUIDType
   889  
   890  func (t *uuidType) String() string { return (*format.UUIDType)(t).String() }
   891  
   892  func (t *uuidType) Kind() Kind { return FixedLenByteArray }
   893  
   894  func (t *uuidType) Length() int { return 16 }
   895  
   896  func (t *uuidType) EstimateSize(n int) int64 { return 16 * int64(n) }
   897  
   898  func (t *uuidType) Compare(a, b Value) int {
   899  	return compareBE128((*[16]byte)(a.ByteArray()), (*[16]byte)(b.ByteArray()))
   900  }
   901  
   902  func (t *uuidType) ColumnOrder() *format.ColumnOrder { return &typeDefinedColumnOrder }
   903  
   904  func (t *uuidType) PhysicalType() *format.Type { return &physicalTypes[FixedLenByteArray] }
   905  
   906  func (t *uuidType) LogicalType() *format.LogicalType {
   907  	return &format.LogicalType{UUID: (*format.UUIDType)(t)}
   908  }
   909  
   910  func (t *uuidType) ConvertedType() *deprecated.ConvertedType { return nil }
   911  
   912  func (t *uuidType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   913  	return newBE128ColumnIndexer()
   914  }
   915  
   916  func (t *uuidType) NewDictionary(columnIndex, numValues int, data []byte) Dictionary {
   917  	return newBE128Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   918  }
   919  
   920  func (t *uuidType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   921  	return newBE128ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   922  }
   923  
   924  func (t *uuidType) NewPage(columnIndex, numValues int, data []byte) Page {
   925  	return newBE128Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   926  }
   927  
   928  func (t *uuidType) Encode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   929  	return enc.EncodeFixedLenByteArray(dst, src, 16)
   930  }
   931  
   932  func (t *uuidType) Decode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   933  	return enc.DecodeFixedLenByteArray(dst, src, 16)
   934  }
   935  
   936  // Enum constructs a leaf node with a logical type representing enumerations.
   937  //
   938  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#enum
   939  func Enum() Node { return Leaf(&enumType{}) }
   940  
   941  type enumType format.EnumType
   942  
   943  func (t *enumType) String() string { return (*format.EnumType)(t).String() }
   944  
   945  func (t *enumType) Kind() Kind { return ByteArray }
   946  
   947  func (t *enumType) Length() int { return 0 }
   948  
   949  func (t *enumType) EstimateSize(n int) int64 { return 10 * int64(n) }
   950  
   951  func (t *enumType) Compare(a, b Value) int {
   952  	return bytes.Compare(a.ByteArray(), b.ByteArray())
   953  }
   954  
   955  func (t *enumType) ColumnOrder() *format.ColumnOrder {
   956  	return &typeDefinedColumnOrder
   957  }
   958  
   959  func (t *enumType) PhysicalType() *format.Type {
   960  	return &physicalTypes[ByteArray]
   961  }
   962  
   963  func (t *enumType) LogicalType() *format.LogicalType {
   964  	return &format.LogicalType{Enum: (*format.EnumType)(t)}
   965  }
   966  
   967  func (t *enumType) ConvertedType() *deprecated.ConvertedType {
   968  	return &convertedTypes[deprecated.Enum]
   969  }
   970  
   971  func (t *enumType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
   972  	return newByteArrayColumnIndexer(sizeLimit)
   973  }
   974  
   975  func (t *enumType) NewDictionary(columnIndex, numValues int, data []byte) Dictionary {
   976  	return newByteArrayDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   977  }
   978  
   979  func (t *enumType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
   980  	return newByteArrayColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
   981  }
   982  
   983  func (t *enumType) NewPage(columnIndex, numValues int, data []byte) Page {
   984  	return newByteArrayPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
   985  }
   986  
   987  func (t *enumType) Encode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   988  	return enc.EncodeByteArray(dst, src)
   989  }
   990  
   991  func (t *enumType) Decode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
   992  	return enc.DecodeByteArray(dst, src)
   993  }
   994  
   995  // JSON constructs a leaf node of JSON logical type.
   996  //
   997  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#json
   998  func JSON() Node { return Leaf(&jsonType{}) }
   999  
  1000  type jsonType format.JsonType
  1001  
  1002  func (t *jsonType) String() string { return (*format.JsonType)(t).String() }
  1003  
  1004  func (t *jsonType) Kind() Kind { return ByteArray }
  1005  
  1006  func (t *jsonType) Length() int { return 0 }
  1007  
  1008  func (t *jsonType) EstimateSize(n int) int64 { return 10 * int64(n) }
  1009  
  1010  func (t *jsonType) Compare(a, b Value) int {
  1011  	return bytes.Compare(a.ByteArray(), b.ByteArray())
  1012  }
  1013  
  1014  func (t *jsonType) ColumnOrder() *format.ColumnOrder {
  1015  	return &typeDefinedColumnOrder
  1016  }
  1017  
  1018  func (t *jsonType) PhysicalType() *format.Type {
  1019  	return &physicalTypes[ByteArray]
  1020  }
  1021  
  1022  func (t *jsonType) LogicalType() *format.LogicalType {
  1023  	return &format.LogicalType{Json: (*format.JsonType)(t)}
  1024  }
  1025  
  1026  func (t *jsonType) ConvertedType() *deprecated.ConvertedType {
  1027  	return &convertedTypes[deprecated.Json]
  1028  }
  1029  
  1030  func (t *jsonType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
  1031  	return newByteArrayColumnIndexer(sizeLimit)
  1032  }
  1033  
  1034  func (t *jsonType) NewDictionary(columnIndex, numValues int, data []byte) Dictionary {
  1035  	return newByteArrayDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
  1036  }
  1037  
  1038  func (t *jsonType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
  1039  	return newByteArrayColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
  1040  }
  1041  
  1042  func (t *jsonType) NewPage(columnIndex, numValues int, data []byte) Page {
  1043  	return newByteArrayPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
  1044  }
  1045  
  1046  func (t *jsonType) Encode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
  1047  	return enc.EncodeByteArray(dst, src)
  1048  }
  1049  
  1050  func (t *jsonType) Decode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
  1051  	return enc.DecodeByteArray(dst, src)
  1052  }
  1053  
  1054  // BSON constructs a leaf node of BSON logical type.
  1055  //
  1056  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#bson
  1057  func BSON() Node { return Leaf(&bsonType{}) }
  1058  
  1059  type bsonType format.BsonType
  1060  
  1061  func (t *bsonType) String() string { return (*format.BsonType)(t).String() }
  1062  
  1063  func (t *bsonType) Kind() Kind { return ByteArray }
  1064  
  1065  func (t *bsonType) Length() int { return 0 }
  1066  
  1067  func (t *bsonType) EstimateSize(n int) int64 { return 10 * int64(n) }
  1068  
  1069  func (t *bsonType) Compare(a, b Value) int {
  1070  	return bytes.Compare(a.ByteArray(), b.ByteArray())
  1071  }
  1072  
  1073  func (t *bsonType) ColumnOrder() *format.ColumnOrder {
  1074  	return &typeDefinedColumnOrder
  1075  }
  1076  
  1077  func (t *bsonType) PhysicalType() *format.Type {
  1078  	return &physicalTypes[ByteArray]
  1079  }
  1080  
  1081  func (t *bsonType) LogicalType() *format.LogicalType {
  1082  	return &format.LogicalType{Bson: (*format.BsonType)(t)}
  1083  }
  1084  
  1085  func (t *bsonType) ConvertedType() *deprecated.ConvertedType {
  1086  	return &convertedTypes[deprecated.Bson]
  1087  }
  1088  
  1089  func (t *bsonType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
  1090  	return newByteArrayColumnIndexer(sizeLimit)
  1091  }
  1092  
  1093  func (t *bsonType) NewDictionary(columnIndex, numValues int, data []byte) Dictionary {
  1094  	return newByteArrayDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
  1095  }
  1096  
  1097  func (t *bsonType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
  1098  	return newByteArrayColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
  1099  }
  1100  
  1101  func (t *bsonType) NewPage(columnIndex, numValues int, data []byte) Page {
  1102  	return newByteArrayPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
  1103  }
  1104  
  1105  func (t *bsonType) Encode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
  1106  	return enc.EncodeByteArray(dst, src)
  1107  }
  1108  
  1109  func (t *bsonType) Decode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
  1110  	return enc.DecodeByteArray(dst, src)
  1111  }
  1112  
  1113  // Date constructs a leaf node of DATE logical type.
  1114  //
  1115  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#date
  1116  func Date() Node { return Leaf(&dateType{}) }
  1117  
  1118  type dateType format.DateType
  1119  
  1120  func (t *dateType) String() string { return (*format.DateType)(t).String() }
  1121  
  1122  func (t *dateType) Kind() Kind { return Int32 }
  1123  
  1124  func (t *dateType) Length() int { return 32 }
  1125  
  1126  func (t *dateType) EstimateSize(n int) int64 { return 4 * int64(n) }
  1127  
  1128  func (t *dateType) Compare(a, b Value) int { return compareInt32(a.Int32(), b.Int32()) }
  1129  
  1130  func (t *dateType) ColumnOrder() *format.ColumnOrder {
  1131  	return &typeDefinedColumnOrder
  1132  }
  1133  
  1134  func (t *dateType) PhysicalType() *format.Type { return &physicalTypes[Int32] }
  1135  
  1136  func (t *dateType) LogicalType() *format.LogicalType {
  1137  	return &format.LogicalType{Date: (*format.DateType)(t)}
  1138  }
  1139  
  1140  func (t *dateType) ConvertedType() *deprecated.ConvertedType {
  1141  	return &convertedTypes[deprecated.Date]
  1142  }
  1143  
  1144  func (t *dateType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
  1145  	return newInt32ColumnIndexer()
  1146  }
  1147  
  1148  func (t *dateType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
  1149  	return newInt32ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
  1150  }
  1151  
  1152  func (t *dateType) NewDictionary(columnIndex, numValues int, data []byte) Dictionary {
  1153  	return newInt32Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
  1154  }
  1155  
  1156  func (t *dateType) NewPage(columnIndex, numValues int, data []byte) Page {
  1157  	return newInt32Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
  1158  }
  1159  
  1160  func (t *dateType) Encode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
  1161  	return enc.EncodeInt32(dst, src)
  1162  }
  1163  
  1164  func (t *dateType) Decode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
  1165  	return enc.DecodeInt32(dst, src)
  1166  }
  1167  
  1168  // TimeUnit represents units of time in the parquet type system.
  1169  type TimeUnit interface {
  1170  	// Returns the precision of the time unit as a time.Duration value.
  1171  	Duration() time.Duration
  1172  	// Converts the TimeUnit value to its representation in the parquet thrift
  1173  	// format.
  1174  	TimeUnit() format.TimeUnit
  1175  }
  1176  
  1177  var (
  1178  	Millisecond TimeUnit = &millisecond{}
  1179  	Microsecond TimeUnit = &microsecond{}
  1180  	Nanosecond  TimeUnit = &nanosecond{}
  1181  )
  1182  
  1183  type millisecond format.MilliSeconds
  1184  
  1185  func (u *millisecond) Duration() time.Duration { return time.Millisecond }
  1186  func (u *millisecond) TimeUnit() format.TimeUnit {
  1187  	return format.TimeUnit{Millis: (*format.MilliSeconds)(u)}
  1188  }
  1189  
  1190  type microsecond format.MicroSeconds
  1191  
  1192  func (u *microsecond) Duration() time.Duration { return time.Microsecond }
  1193  func (u *microsecond) TimeUnit() format.TimeUnit {
  1194  	return format.TimeUnit{Micros: (*format.MicroSeconds)(u)}
  1195  }
  1196  
  1197  type nanosecond format.NanoSeconds
  1198  
  1199  func (u *nanosecond) Duration() time.Duration { return time.Nanosecond }
  1200  func (u *nanosecond) TimeUnit() format.TimeUnit {
  1201  	return format.TimeUnit{Nanos: (*format.NanoSeconds)(u)}
  1202  }
  1203  
  1204  // Time constructs a leaf node of TIME logical type.
  1205  //
  1206  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#time
  1207  func Time(unit TimeUnit) Node {
  1208  	return Leaf(&timeType{IsAdjustedToUTC: true, Unit: unit.TimeUnit()})
  1209  }
  1210  
  1211  type timeType format.TimeType
  1212  
  1213  func (t *timeType) useInt32() bool {
  1214  	return t.Unit.Millis != nil
  1215  }
  1216  
  1217  func (t *timeType) useInt64() bool {
  1218  	return t.Unit.Micros != nil
  1219  }
  1220  
  1221  func (t *timeType) String() string {
  1222  	return (*format.TimeType)(t).String()
  1223  }
  1224  
  1225  func (t *timeType) Kind() Kind {
  1226  	if t.useInt32() {
  1227  		return Int32
  1228  	} else {
  1229  		return Int64
  1230  	}
  1231  }
  1232  
  1233  func (t *timeType) Length() int {
  1234  	if t.useInt32() {
  1235  		return 32
  1236  	} else {
  1237  		return 64
  1238  	}
  1239  }
  1240  
  1241  func (t *timeType) EstimateSize(n int) int64 {
  1242  	if t.useInt32() {
  1243  		return 4 * int64(n)
  1244  	} else {
  1245  		return 8 * int64(n)
  1246  	}
  1247  }
  1248  
  1249  func (t *timeType) Compare(a, b Value) int {
  1250  	if t.useInt32() {
  1251  		return compareInt32(a.Int32(), b.Int32())
  1252  	} else {
  1253  		return compareInt64(a.Int64(), b.Int64())
  1254  	}
  1255  }
  1256  
  1257  func (t *timeType) ColumnOrder() *format.ColumnOrder {
  1258  	return &typeDefinedColumnOrder
  1259  }
  1260  
  1261  func (t *timeType) PhysicalType() *format.Type {
  1262  	if t.useInt32() {
  1263  		return &physicalTypes[Int32]
  1264  	} else {
  1265  		return &physicalTypes[Int64]
  1266  	}
  1267  }
  1268  
  1269  func (t *timeType) LogicalType() *format.LogicalType {
  1270  	return &format.LogicalType{Time: (*format.TimeType)(t)}
  1271  }
  1272  
  1273  func (t *timeType) ConvertedType() *deprecated.ConvertedType {
  1274  	switch {
  1275  	case t.useInt32():
  1276  		return &convertedTypes[deprecated.TimeMillis]
  1277  	case t.useInt64():
  1278  		return &convertedTypes[deprecated.TimeMicros]
  1279  	default:
  1280  		return nil
  1281  	}
  1282  }
  1283  
  1284  func (t *timeType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
  1285  	if t.useInt32() {
  1286  		return newInt32ColumnIndexer()
  1287  	} else {
  1288  		return newInt64ColumnIndexer()
  1289  	}
  1290  }
  1291  
  1292  func (t *timeType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
  1293  	if t.useInt32() {
  1294  		return newInt32ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
  1295  	} else {
  1296  		return newInt64ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
  1297  	}
  1298  }
  1299  
  1300  func (t *timeType) NewDictionary(columnIndex, numValues int, data []byte) Dictionary {
  1301  	if t.useInt32() {
  1302  		return newInt32Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
  1303  	} else {
  1304  		return newInt64Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
  1305  	}
  1306  }
  1307  
  1308  func (t *timeType) NewPage(columnIndex, numValues int, data []byte) Page {
  1309  	if t.useInt32() {
  1310  		return newInt32Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
  1311  	} else {
  1312  		return newInt64Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
  1313  	}
  1314  }
  1315  
  1316  func (t *timeType) Encode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
  1317  	if t.useInt32() {
  1318  		return enc.EncodeInt32(dst, src)
  1319  	} else {
  1320  		return enc.EncodeInt64(dst, src)
  1321  	}
  1322  }
  1323  
  1324  func (t *timeType) Decode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
  1325  	if t.useInt32() {
  1326  		return enc.DecodeInt32(dst, src)
  1327  	} else {
  1328  		return enc.DecodeInt64(dst, src)
  1329  	}
  1330  }
  1331  
  1332  // Timestamp constructs of leaf node of TIMESTAMP logical type.
  1333  //
  1334  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#timestamp
  1335  func Timestamp(unit TimeUnit) Node {
  1336  	return Leaf(&timestampType{IsAdjustedToUTC: true, Unit: unit.TimeUnit()})
  1337  }
  1338  
  1339  type timestampType format.TimestampType
  1340  
  1341  func (t *timestampType) String() string { return (*format.TimestampType)(t).String() }
  1342  
  1343  func (t *timestampType) Kind() Kind { return Int64 }
  1344  
  1345  func (t *timestampType) Length() int { return 64 }
  1346  
  1347  func (t *timestampType) EstimateSize(n int) int64 { return 8 * int64(n) }
  1348  
  1349  func (t *timestampType) Compare(a, b Value) int { return compareInt64(a.Int64(), b.Int64()) }
  1350  
  1351  func (t *timestampType) ColumnOrder() *format.ColumnOrder { return &typeDefinedColumnOrder }
  1352  
  1353  func (t *timestampType) PhysicalType() *format.Type { return &physicalTypes[Int64] }
  1354  
  1355  func (t *timestampType) LogicalType() *format.LogicalType {
  1356  	return &format.LogicalType{Timestamp: (*format.TimestampType)(t)}
  1357  }
  1358  
  1359  func (t *timestampType) ConvertedType() *deprecated.ConvertedType {
  1360  	switch {
  1361  	case t.Unit.Millis != nil:
  1362  		return &convertedTypes[deprecated.TimestampMillis]
  1363  	case t.Unit.Micros != nil:
  1364  		return &convertedTypes[deprecated.TimestampMicros]
  1365  	default:
  1366  		return nil
  1367  	}
  1368  }
  1369  
  1370  func (t *timestampType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
  1371  	return newInt64ColumnIndexer()
  1372  }
  1373  
  1374  func (t *timestampType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
  1375  	return newInt64ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
  1376  }
  1377  
  1378  func (t *timestampType) NewDictionary(columnIndex, numValues int, data []byte) Dictionary {
  1379  	return newInt64Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
  1380  }
  1381  
  1382  func (t *timestampType) NewPage(columnIndex, numValues int, data []byte) Page {
  1383  	return newInt64Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
  1384  }
  1385  
  1386  func (t *timestampType) Encode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
  1387  	return enc.EncodeInt64(dst, src)
  1388  }
  1389  
  1390  func (t *timestampType) Decode(dst, src []byte, enc encoding.Encoding) ([]byte, error) {
  1391  	return enc.DecodeInt64(dst, src)
  1392  }
  1393  
  1394  // List constructs a node of LIST logical type.
  1395  //
  1396  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
  1397  func List(of Node) Node {
  1398  	return listNode{Group{"list": Repeated(Group{"element": of})}}
  1399  }
  1400  
  1401  type listNode struct{ Group }
  1402  
  1403  func (listNode) Type() Type { return &listType{} }
  1404  
  1405  type listType format.ListType
  1406  
  1407  func (t *listType) String() string { return (*format.ListType)(t).String() }
  1408  
  1409  func (t *listType) Kind() Kind { panic("cannot call Kind on parquet LIST type") }
  1410  
  1411  func (t *listType) Length() int { return 0 }
  1412  
  1413  func (t *listType) EstimateSize(int) int64 { return 0 }
  1414  
  1415  func (t *listType) Compare(Value, Value) int { panic("cannot compare values on parquet LIST type") }
  1416  
  1417  func (t *listType) ColumnOrder() *format.ColumnOrder { return nil }
  1418  
  1419  func (t *listType) PhysicalType() *format.Type { return nil }
  1420  
  1421  func (t *listType) LogicalType() *format.LogicalType {
  1422  	return &format.LogicalType{List: (*format.ListType)(t)}
  1423  }
  1424  
  1425  func (t *listType) ConvertedType() *deprecated.ConvertedType {
  1426  	return &convertedTypes[deprecated.List]
  1427  }
  1428  
  1429  func (t *listType) NewColumnIndexer(int) ColumnIndexer {
  1430  	panic("create create column indexer from parquet LIST type")
  1431  }
  1432  
  1433  func (t *listType) NewDictionary(int, int, []byte) Dictionary {
  1434  	panic("cannot create dictionary from parquet LIST type")
  1435  }
  1436  
  1437  func (t *listType) NewColumnBuffer(int, int) ColumnBuffer {
  1438  	panic("cannot create column buffer from parquet LIST type")
  1439  }
  1440  
  1441  func (t *listType) NewPage(int, int, []byte) Page {
  1442  	panic("cannot create page from parquet LIST type")
  1443  }
  1444  
  1445  func (t *listType) Encode(dst, _ []byte, _ encoding.Encoding) ([]byte, error) {
  1446  	panic("cannot encode parquet LIST type")
  1447  }
  1448  
  1449  func (t *listType) Decode(dst, _ []byte, _ encoding.Encoding) ([]byte, error) {
  1450  	panic("cannot decode parquet LIST type")
  1451  }
  1452  
  1453  // Map constructs a node of MAP logical type.
  1454  //
  1455  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#maps
  1456  func Map(key, value Node) Node {
  1457  	return mapNode{Group{
  1458  		"key_value": Repeated(Group{
  1459  			"key":   Required(key),
  1460  			"value": value,
  1461  		}),
  1462  	}}
  1463  }
  1464  
  1465  type mapNode struct{ Group }
  1466  
  1467  func (mapNode) Type() Type { return &mapType{} }
  1468  
  1469  type mapType format.MapType
  1470  
  1471  func (t *mapType) String() string { return (*format.MapType)(t).String() }
  1472  
  1473  func (t *mapType) Kind() Kind { panic("cannot call Kind on parquet MAP type") }
  1474  
  1475  func (t *mapType) Length() int { return 0 }
  1476  
  1477  func (t *mapType) EstimateSize(int) int64 { return 0 }
  1478  
  1479  func (t *mapType) Compare(Value, Value) int { panic("cannot compare values on parquet MAP type") }
  1480  
  1481  func (t *mapType) ColumnOrder() *format.ColumnOrder { return nil }
  1482  
  1483  func (t *mapType) PhysicalType() *format.Type { return nil }
  1484  
  1485  func (t *mapType) LogicalType() *format.LogicalType {
  1486  	return &format.LogicalType{Map: (*format.MapType)(t)}
  1487  }
  1488  
  1489  func (t *mapType) ConvertedType() *deprecated.ConvertedType {
  1490  	return &convertedTypes[deprecated.Map]
  1491  }
  1492  
  1493  func (t *mapType) NewColumnIndexer(int) ColumnIndexer {
  1494  	panic("create create column indexer from parquet MAP type")
  1495  }
  1496  
  1497  func (t *mapType) NewDictionary(int, int, []byte) Dictionary {
  1498  	panic("cannot create dictionary from parquet MAP type")
  1499  }
  1500  
  1501  func (t *mapType) NewColumnBuffer(int, int) ColumnBuffer {
  1502  	panic("cannot create column buffer from parquet MAP type")
  1503  }
  1504  
  1505  func (t *mapType) NewPage(int, int, []byte) Page {
  1506  	panic("cannot create page from parquet MAP type")
  1507  }
  1508  
  1509  func (t *mapType) Encode(dst, _ []byte, _ encoding.Encoding) ([]byte, error) {
  1510  	panic("cannot encode parquet MAP type")
  1511  }
  1512  
  1513  func (t *mapType) Decode(dst, _ []byte, _ encoding.Encoding) ([]byte, error) {
  1514  	panic("cannot decode parquet MAP type")
  1515  }
  1516  
  1517  type nullType format.NullType
  1518  
  1519  func (t *nullType) String() string { return (*format.NullType)(t).String() }
  1520  
  1521  func (t *nullType) Kind() Kind { return -1 }
  1522  
  1523  func (t *nullType) Length() int { return 0 }
  1524  
  1525  func (t *nullType) EstimateSize(int) int64 { return 0 }
  1526  
  1527  func (t *nullType) Compare(Value, Value) int { panic("cannot compare values on parquet NULL type") }
  1528  
  1529  func (t *nullType) ColumnOrder() *format.ColumnOrder { return nil }
  1530  
  1531  func (t *nullType) PhysicalType() *format.Type { return nil }
  1532  
  1533  func (t *nullType) LogicalType() *format.LogicalType {
  1534  	return &format.LogicalType{Unknown: (*format.NullType)(t)}
  1535  }
  1536  
  1537  func (t *nullType) ConvertedType() *deprecated.ConvertedType { return nil }
  1538  
  1539  func (t *nullType) NewColumnIndexer(int) ColumnIndexer {
  1540  	panic("create create column indexer from parquet NULL type")
  1541  }
  1542  
  1543  func (t *nullType) NewDictionary(int, int, []byte) Dictionary {
  1544  	panic("cannot create dictionary from parquet NULL type")
  1545  }
  1546  
  1547  func (t *nullType) NewColumnBuffer(int, int) ColumnBuffer {
  1548  	panic("cannot create column buffer from parquet NULL type")
  1549  }
  1550  
  1551  func (t *nullType) NewPage(columnIndex, numValues int, _ []byte) Page {
  1552  	return newNullPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
  1553  }
  1554  
  1555  func (t *nullType) Encode(dst, _ []byte, _ encoding.Encoding) ([]byte, error) {
  1556  	return dst[:0], nil
  1557  }
  1558  
  1559  func (t *nullType) Decode(dst, _ []byte, _ encoding.Encoding) ([]byte, error) {
  1560  	return dst[:0], nil
  1561  }
  1562  
  1563  type groupType struct{}
  1564  
  1565  func (groupType) String() string { return "group" }
  1566  
  1567  func (groupType) Kind() Kind {
  1568  	panic("cannot call Kind on parquet group")
  1569  }
  1570  
  1571  func (groupType) Compare(Value, Value) int {
  1572  	panic("cannot compare values on parquet group")
  1573  }
  1574  
  1575  func (groupType) NewColumnIndexer(int) ColumnIndexer {
  1576  	panic("cannot create column indexer from parquet group")
  1577  }
  1578  
  1579  func (groupType) NewDictionary(int, int, []byte) Dictionary {
  1580  	panic("cannot create dictionary from parquet group")
  1581  }
  1582  
  1583  func (t groupType) NewColumnBuffer(int, int) ColumnBuffer {
  1584  	panic("cannot create column buffer from parquet group")
  1585  }
  1586  
  1587  func (t groupType) NewPage(int, int, []byte) Page {
  1588  	panic("cannot create page from parquet group")
  1589  }
  1590  
  1591  func (groupType) Encode(_, _ []byte, _ encoding.Encoding) ([]byte, error) {
  1592  	panic("cannot encode parquet group")
  1593  }
  1594  
  1595  func (groupType) Decode(_, _ []byte, _ encoding.Encoding) ([]byte, error) {
  1596  	panic("cannot decode parquet group")
  1597  }
  1598  
  1599  func (groupType) Length() int { return 0 }
  1600  
  1601  func (groupType) EstimateSize(int) int64 { return 0 }
  1602  
  1603  func (groupType) ColumnOrder() *format.ColumnOrder { return nil }
  1604  
  1605  func (groupType) PhysicalType() *format.Type { return nil }
  1606  
  1607  func (groupType) LogicalType() *format.LogicalType { return nil }
  1608  
  1609  func (groupType) ConvertedType() *deprecated.ConvertedType { return nil }