github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/value.go (about)

     1  package parquet
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/binary"
     6  	"fmt"
     7  	"io"
     8  	"math"
     9  	"reflect"
    10  	"strconv"
    11  	"time"
    12  	"unsafe"
    13  
    14  	"github.com/google/uuid"
    15  	"github.com/segmentio/parquet-go/deprecated"
    16  	"github.com/segmentio/parquet-go/format"
    17  	"github.com/segmentio/parquet-go/internal/unsafecast"
    18  )
    19  
    20  const (
    21  	// 170 x sizeof(Value) = 4KB
    22  	defaultValueBufferSize = 170
    23  )
    24  
    25  // The Value type is similar to the reflect.Value abstraction of Go values, but
    26  // for parquet values. Value instances wrap underlying Go values mapped to one
    27  // of the parquet physical types.
    28  //
    29  // Value instances are small, immutable objects, and usually passed by value
    30  // between function calls.
    31  //
    32  // The zero-value of Value represents the null parquet value.
    33  type Value struct {
    34  	// data
    35  	ptr *byte
    36  	u64 uint64
    37  	// type
    38  	kind int8 // XOR(Kind) so the zero-value is <null>
    39  	// levels
    40  	definitionLevel byte
    41  	repetitionLevel byte
    42  	columnIndex     int16 // XOR so the zero-value is -1
    43  }
    44  
    45  // ValueReader is an interface implemented by types that support reading
    46  // batches of values.
    47  type ValueReader interface {
    48  	// Read values into the buffer passed as argument and return the number of
    49  	// values read. When all values have been read, the error will be io.EOF.
    50  	ReadValues([]Value) (int, error)
    51  }
    52  
    53  // ValueReaderAt is an interface implemented by types that support reading
    54  // values at offsets specified by the application.
    55  type ValueReaderAt interface {
    56  	ReadValuesAt([]Value, int64) (int, error)
    57  }
    58  
    59  // ValueReaderFrom is an interface implemented by value writers to read values
    60  // from a reader.
    61  type ValueReaderFrom interface {
    62  	ReadValuesFrom(ValueReader) (int64, error)
    63  }
    64  
    65  // ValueWriter is an interface implemented by types that support reading
    66  // batches of values.
    67  type ValueWriter interface {
    68  	// Write values from the buffer passed as argument and returns the number
    69  	// of values written.
    70  	WriteValues([]Value) (int, error)
    71  }
    72  
    73  // ValueWriterTo is an interface implemented by value readers to write values to
    74  // a writer.
    75  type ValueWriterTo interface {
    76  	WriteValuesTo(ValueWriter) (int64, error)
    77  }
    78  
    79  // ValueReaderFunc is a function type implementing the ValueReader interface.
    80  type ValueReaderFunc func([]Value) (int, error)
    81  
    82  func (f ValueReaderFunc) ReadValues(values []Value) (int, error) { return f(values) }
    83  
    84  // ValueWriterFunc is a function type implementing the ValueWriter interface.
    85  type ValueWriterFunc func([]Value) (int, error)
    86  
    87  func (f ValueWriterFunc) WriteValues(values []Value) (int, error) { return f(values) }
    88  
    89  // CopyValues copies values from src to dst, returning the number of values
    90  // that were written.
    91  //
    92  // As an optimization, the reader and writer may choose to implement
    93  // ValueReaderFrom and ValueWriterTo to provide their own copy logic.
    94  //
    95  // The function returns any error it encounters reading or writing pages, except
    96  // for io.EOF from the reader which indicates that there were no more values to
    97  // read.
    98  func CopyValues(dst ValueWriter, src ValueReader) (int64, error) {
    99  	return copyValues(dst, src, nil)
   100  }
   101  
   102  func copyValues(dst ValueWriter, src ValueReader, buf []Value) (written int64, err error) {
   103  	if wt, ok := src.(ValueWriterTo); ok {
   104  		return wt.WriteValuesTo(dst)
   105  	}
   106  
   107  	if rf, ok := dst.(ValueReaderFrom); ok {
   108  		return rf.ReadValuesFrom(src)
   109  	}
   110  
   111  	if len(buf) == 0 {
   112  		buf = make([]Value, defaultValueBufferSize)
   113  	}
   114  
   115  	defer clearValues(buf)
   116  
   117  	for {
   118  		n, err := src.ReadValues(buf)
   119  
   120  		if n > 0 {
   121  			wn, werr := dst.WriteValues(buf[:n])
   122  			written += int64(wn)
   123  			if werr != nil {
   124  				return written, werr
   125  			}
   126  		}
   127  
   128  		if err != nil {
   129  			if err == io.EOF {
   130  				err = nil
   131  			}
   132  			return written, err
   133  		}
   134  
   135  		if n == 0 {
   136  			return written, io.ErrNoProgress
   137  		}
   138  	}
   139  }
   140  
   141  // ValueOf constructs a parquet value from a Go value v.
   142  //
   143  // The physical type of the value is assumed from the Go type of v using the
   144  // following conversion table:
   145  //
   146  //	Go type | Parquet physical type
   147  //	------- | ---------------------
   148  //	nil     | NULL
   149  //	bool    | BOOLEAN
   150  //	int8    | INT32
   151  //	int16   | INT32
   152  //	int32   | INT32
   153  //	int64   | INT64
   154  //	int     | INT64
   155  //	uint8   | INT32
   156  //	uint16  | INT32
   157  //	uint32  | INT32
   158  //	uint64  | INT64
   159  //	uintptr | INT64
   160  //	float32 | FLOAT
   161  //	float64 | DOUBLE
   162  //	string  | BYTE_ARRAY
   163  //	[]byte  | BYTE_ARRAY
   164  //	[*]byte | FIXED_LEN_BYTE_ARRAY
   165  //
   166  // When converting a []byte or [*]byte value, the underlying byte array is not
   167  // copied; instead, the returned parquet value holds a reference to it.
   168  //
   169  // The repetition and definition levels of the returned value are both zero.
   170  //
   171  // The function panics if the Go value cannot be represented in parquet.
   172  func ValueOf(v interface{}) Value {
   173  	k := Kind(-1)
   174  	t := reflect.TypeOf(v)
   175  
   176  	switch value := v.(type) {
   177  	case nil:
   178  		return Value{}
   179  	case uuid.UUID:
   180  		return makeValueBytes(FixedLenByteArray, value[:])
   181  	case deprecated.Int96:
   182  		return makeValueInt96(value)
   183  	case time.Time:
   184  		k = Int64
   185  	}
   186  
   187  	switch t.Kind() {
   188  	case reflect.Bool:
   189  		k = Boolean
   190  	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Uint8, reflect.Uint16, reflect.Uint32:
   191  		k = Int32
   192  	case reflect.Int64, reflect.Int, reflect.Uint64, reflect.Uint, reflect.Uintptr:
   193  		k = Int64
   194  	case reflect.Float32:
   195  		k = Float
   196  	case reflect.Float64:
   197  		k = Double
   198  	case reflect.String:
   199  		k = ByteArray
   200  	case reflect.Slice:
   201  		if t.Elem().Kind() == reflect.Uint8 {
   202  			k = ByteArray
   203  		}
   204  	case reflect.Array:
   205  		if t.Elem().Kind() == reflect.Uint8 {
   206  			k = FixedLenByteArray
   207  		}
   208  	}
   209  
   210  	if k < 0 {
   211  		panic("cannot create parquet value from go value of type " + t.String())
   212  	}
   213  
   214  	return makeValue(k, nil, reflect.ValueOf(v))
   215  }
   216  
   217  // NulLValue constructs a null value, which is the zero-value of the Value type.
   218  func NullValue() Value { return Value{} }
   219  
   220  // ZeroValue constructs a zero value of the given kind.
   221  func ZeroValue(kind Kind) Value { return makeValueKind(kind) }
   222  
   223  // BooleanValue constructs a BOOLEAN parquet value from the bool passed as
   224  // argument.
   225  func BooleanValue(value bool) Value { return makeValueBoolean(value) }
   226  
   227  // Int32Value constructs a INT32 parquet value from the int32 passed as
   228  // argument.
   229  func Int32Value(value int32) Value { return makeValueInt32(value) }
   230  
   231  // Int64Value constructs a INT64 parquet value from the int64 passed as
   232  // argument.
   233  func Int64Value(value int64) Value { return makeValueInt64(value) }
   234  
   235  // Int96Value constructs a INT96 parquet value from the deprecated.Int96 passed
   236  // as argument.
   237  func Int96Value(value deprecated.Int96) Value { return makeValueInt96(value) }
   238  
   239  // FloatValue constructs a FLOAT parquet value from the float32 passed as
   240  // argument.
   241  func FloatValue(value float32) Value { return makeValueFloat(value) }
   242  
   243  // DoubleValue constructs a DOUBLE parquet value from the float64 passed as
   244  // argument.
   245  func DoubleValue(value float64) Value { return makeValueDouble(value) }
   246  
   247  // ByteArrayValue constructs a BYTE_ARRAY parquet value from the byte slice
   248  // passed as argument.
   249  func ByteArrayValue(value []byte) Value { return makeValueBytes(ByteArray, value) }
   250  
   251  // FixedLenByteArrayValue constructs a BYTE_ARRAY parquet value from the byte
   252  // slice passed as argument.
   253  func FixedLenByteArrayValue(value []byte) Value { return makeValueBytes(FixedLenByteArray, value) }
   254  
   255  func makeValue(k Kind, lt *format.LogicalType, v reflect.Value) Value {
   256  	switch v.Type() {
   257  	case reflect.TypeOf(time.Time{}):
   258  		unit := Nanosecond.TimeUnit()
   259  		if lt != nil && lt.Timestamp != nil {
   260  			unit = lt.Timestamp.Unit
   261  		}
   262  
   263  		t := v.Interface().(time.Time)
   264  		var val int64
   265  		switch {
   266  		case unit.Millis != nil:
   267  			val = t.UnixMilli()
   268  		case unit.Micros != nil:
   269  			val = t.UnixMicro()
   270  		default:
   271  			val = t.UnixNano()
   272  		}
   273  		return makeValueInt64(val)
   274  	}
   275  
   276  	switch k {
   277  	case Boolean:
   278  		return makeValueBoolean(v.Bool())
   279  
   280  	case Int32:
   281  		switch v.Kind() {
   282  		case reflect.Int8, reflect.Int16, reflect.Int32:
   283  			return makeValueInt32(int32(v.Int()))
   284  		case reflect.Uint8, reflect.Uint16, reflect.Uint32:
   285  			return makeValueInt32(int32(v.Uint()))
   286  		}
   287  
   288  	case Int64:
   289  		switch v.Kind() {
   290  		case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int:
   291  			return makeValueInt64(v.Int())
   292  		case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint, reflect.Uintptr:
   293  			return makeValueUint64(v.Uint())
   294  		}
   295  
   296  	case Int96:
   297  		switch v.Type() {
   298  		case reflect.TypeOf(deprecated.Int96{}):
   299  			return makeValueInt96(v.Interface().(deprecated.Int96))
   300  		}
   301  
   302  	case Float:
   303  		switch v.Kind() {
   304  		case reflect.Float32:
   305  			return makeValueFloat(float32(v.Float()))
   306  		}
   307  
   308  	case Double:
   309  		switch v.Kind() {
   310  		case reflect.Float32, reflect.Float64:
   311  			return makeValueDouble(v.Float())
   312  		}
   313  
   314  	case ByteArray:
   315  		switch v.Kind() {
   316  		case reflect.String:
   317  			return makeValueString(k, v.String())
   318  		case reflect.Slice:
   319  			if v.Type().Elem().Kind() == reflect.Uint8 {
   320  				return makeValueBytes(k, v.Bytes())
   321  			}
   322  		}
   323  
   324  	case FixedLenByteArray:
   325  		switch v.Kind() {
   326  		case reflect.String: // uuid
   327  			return makeValueString(k, v.String())
   328  		case reflect.Array:
   329  			if v.Type().Elem().Kind() == reflect.Uint8 {
   330  				return makeValueFixedLenByteArray(v)
   331  			}
   332  		case reflect.Slice:
   333  			if v.Type().Elem().Kind() == reflect.Uint8 {
   334  				return makeValueBytes(k, v.Bytes())
   335  			}
   336  		}
   337  	}
   338  
   339  	panic("cannot create parquet value of type " + k.String() + " from go value of type " + v.Type().String())
   340  }
   341  
   342  func makeValueKind(kind Kind) Value {
   343  	return Value{kind: ^int8(kind)}
   344  }
   345  
   346  func makeValueBoolean(value bool) Value {
   347  	v := Value{kind: ^int8(Boolean)}
   348  	if value {
   349  		v.u64 = 1
   350  	}
   351  	return v
   352  }
   353  
   354  func makeValueInt32(value int32) Value {
   355  	return Value{
   356  		kind: ^int8(Int32),
   357  		u64:  uint64(value),
   358  	}
   359  }
   360  
   361  func makeValueInt64(value int64) Value {
   362  	return Value{
   363  		kind: ^int8(Int64),
   364  		u64:  uint64(value),
   365  	}
   366  }
   367  
   368  func makeValueInt96(value deprecated.Int96) Value {
   369  	// TODO: this is highly inefficient because we need a heap allocation to
   370  	// store the value; we don't expect INT96 to be used frequently since it
   371  	// is a deprecated feature of parquet, and it helps keep the Value type
   372  	// compact for all the other more common cases.
   373  	bits := [12]byte{}
   374  	binary.LittleEndian.PutUint32(bits[0:4], value[0])
   375  	binary.LittleEndian.PutUint32(bits[4:8], value[1])
   376  	binary.LittleEndian.PutUint32(bits[8:12], value[2])
   377  	return Value{
   378  		kind: ^int8(Int96),
   379  		ptr:  &bits[0],
   380  		u64:  12, // set the length so we can use the ByteArray method
   381  	}
   382  }
   383  
   384  func makeValueUint32(value uint32) Value {
   385  	return Value{
   386  		kind: ^int8(Int32),
   387  		u64:  uint64(value),
   388  	}
   389  }
   390  
   391  func makeValueUint64(value uint64) Value {
   392  	return Value{
   393  		kind: ^int8(Int64),
   394  		u64:  value,
   395  	}
   396  }
   397  
   398  func makeValueFloat(value float32) Value {
   399  	return Value{
   400  		kind: ^int8(Float),
   401  		u64:  uint64(math.Float32bits(value)),
   402  	}
   403  }
   404  
   405  func makeValueDouble(value float64) Value {
   406  	return Value{
   407  		kind: ^int8(Double),
   408  		u64:  math.Float64bits(value),
   409  	}
   410  }
   411  
   412  func makeValueBytes(kind Kind, value []byte) Value {
   413  	return makeValueByteArray(kind, unsafecast.AddressOfBytes(value), len(value))
   414  }
   415  
   416  func makeValueString(kind Kind, value string) Value {
   417  	return makeValueByteArray(kind, unsafecast.AddressOfString(value), len(value))
   418  }
   419  
   420  func makeValueFixedLenByteArray(v reflect.Value) Value {
   421  	t := v.Type()
   422  	// When the array is addressable, we take advantage of this
   423  	// condition to avoid the heap allocation otherwise needed
   424  	// to pack the reference into an interface{} value.
   425  	if v.CanAddr() {
   426  		v = v.Addr()
   427  	} else {
   428  		u := reflect.New(t)
   429  		u.Elem().Set(v)
   430  		v = u
   431  	}
   432  	return makeValueByteArray(FixedLenByteArray, (*byte)(unsafePointer(v)), t.Len())
   433  }
   434  
   435  func makeValueByteArray(kind Kind, data *byte, size int) Value {
   436  	return Value{
   437  		kind: ^int8(kind),
   438  		ptr:  data,
   439  		u64:  uint64(size),
   440  	}
   441  }
   442  
   443  // These methods are internal versions of methods exported by the Value type,
   444  // they are usually inlined by the compiler and intended to be used inside the
   445  // parquet-go package because they tend to generate better code than their
   446  // exported counter part, which requires making a copy of the receiver.
   447  func (v *Value) isNull() bool            { return v.kind == 0 }
   448  func (v *Value) byte() byte              { return byte(v.u64) }
   449  func (v *Value) boolean() bool           { return v.u64 != 0 }
   450  func (v *Value) int32() int32            { return int32(v.u64) }
   451  func (v *Value) int64() int64            { return int64(v.u64) }
   452  func (v *Value) int96() deprecated.Int96 { return makeInt96(v.byteArray()) }
   453  func (v *Value) float() float32          { return math.Float32frombits(uint32(v.u64)) }
   454  func (v *Value) double() float64         { return math.Float64frombits(uint64(v.u64)) }
   455  func (v *Value) uint32() uint32          { return uint32(v.u64) }
   456  func (v *Value) uint64() uint64          { return v.u64 }
   457  func (v *Value) byteArray() []byte       { return unsafecast.Bytes(v.ptr, int(v.u64)) }
   458  func (v *Value) string() string          { return unsafecast.BytesToString(v.byteArray()) }
   459  func (v *Value) be128() *[16]byte        { return (*[16]byte)(unsafe.Pointer(v.ptr)) }
   460  func (v *Value) column() int             { return int(^v.columnIndex) }
   461  
   462  func (v Value) convertToBoolean(x bool) Value {
   463  	v.kind = ^int8(Boolean)
   464  	v.ptr = nil
   465  	v.u64 = 0
   466  	if x {
   467  		v.u64 = 1
   468  	}
   469  	return v
   470  }
   471  
   472  func (v Value) convertToInt32(x int32) Value {
   473  	v.kind = ^int8(Int32)
   474  	v.ptr = nil
   475  	v.u64 = uint64(x)
   476  	return v
   477  }
   478  
   479  func (v Value) convertToInt64(x int64) Value {
   480  	v.kind = ^int8(Int64)
   481  	v.ptr = nil
   482  	v.u64 = uint64(x)
   483  	return v
   484  }
   485  
   486  func (v Value) convertToInt96(x deprecated.Int96) Value {
   487  	i96 := makeValueInt96(x)
   488  	v.kind = i96.kind
   489  	v.ptr = i96.ptr
   490  	v.u64 = i96.u64
   491  	return v
   492  }
   493  
   494  func (v Value) convertToFloat(x float32) Value {
   495  	v.kind = ^int8(Float)
   496  	v.ptr = nil
   497  	v.u64 = uint64(math.Float32bits(x))
   498  	return v
   499  }
   500  
   501  func (v Value) convertToDouble(x float64) Value {
   502  	v.kind = ^int8(Double)
   503  	v.ptr = nil
   504  	v.u64 = math.Float64bits(x)
   505  	return v
   506  }
   507  
   508  func (v Value) convertToByteArray(x []byte) Value {
   509  	v.kind = ^int8(ByteArray)
   510  	v.ptr = unsafecast.AddressOfBytes(x)
   511  	v.u64 = uint64(len(x))
   512  	return v
   513  }
   514  
   515  func (v Value) convertToFixedLenByteArray(x []byte) Value {
   516  	v.kind = ^int8(FixedLenByteArray)
   517  	v.ptr = unsafecast.AddressOfBytes(x)
   518  	v.u64 = uint64(len(x))
   519  	return v
   520  }
   521  
   522  // Kind returns the kind of v, which represents its parquet physical type.
   523  func (v Value) Kind() Kind { return ^Kind(v.kind) }
   524  
   525  // IsNull returns true if v is the null value.
   526  func (v Value) IsNull() bool { return v.isNull() }
   527  
   528  // Byte returns v as a byte, which may truncate the underlying byte.
   529  func (v Value) Byte() byte { return v.byte() }
   530  
   531  // Boolean returns v as a bool, assuming the underlying type is BOOLEAN.
   532  func (v Value) Boolean() bool { return v.boolean() }
   533  
   534  // Int32 returns v as a int32, assuming the underlying type is INT32.
   535  func (v Value) Int32() int32 { return v.int32() }
   536  
   537  // Int64 returns v as a int64, assuming the underlying type is INT64.
   538  func (v Value) Int64() int64 { return v.int64() }
   539  
   540  // Int96 returns v as a int96, assuming the underlying type is INT96.
   541  func (v Value) Int96() deprecated.Int96 {
   542  	var val deprecated.Int96
   543  	if !v.isNull() {
   544  		val = v.int96()
   545  	}
   546  	return val
   547  }
   548  
   549  // Float returns v as a float32, assuming the underlying type is FLOAT.
   550  func (v Value) Float() float32 { return v.float() }
   551  
   552  // Double returns v as a float64, assuming the underlying type is DOUBLE.
   553  func (v Value) Double() float64 { return v.double() }
   554  
   555  // Uint32 returns v as a uint32, assuming the underlying type is INT32.
   556  func (v Value) Uint32() uint32 { return v.uint32() }
   557  
   558  // Uint64 returns v as a uint64, assuming the underlying type is INT64.
   559  func (v Value) Uint64() uint64 { return v.uint64() }
   560  
   561  // ByteArray returns v as a []byte, assuming the underlying type is either
   562  // BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY.
   563  //
   564  // The application must treat the returned byte slice as a read-only value,
   565  // mutating the content will result in undefined behaviors.
   566  func (v Value) ByteArray() []byte { return v.byteArray() }
   567  
   568  // RepetitionLevel returns the repetition level of v.
   569  func (v Value) RepetitionLevel() int { return int(v.repetitionLevel) }
   570  
   571  // DefinitionLevel returns the definition level of v.
   572  func (v Value) DefinitionLevel() int { return int(v.definitionLevel) }
   573  
   574  // Column returns the column index within the row that v was created from.
   575  //
   576  // Returns -1 if the value does not carry a column index.
   577  func (v Value) Column() int { return v.column() }
   578  
   579  // Bytes returns the binary representation of v.
   580  //
   581  // If v is the null value, an nil byte slice is returned.
   582  func (v Value) Bytes() []byte {
   583  	switch v.Kind() {
   584  	case Boolean:
   585  		buf := [8]byte{}
   586  		binary.LittleEndian.PutUint32(buf[:4], v.uint32())
   587  		return buf[0:1]
   588  	case Int32, Float:
   589  		buf := [8]byte{}
   590  		binary.LittleEndian.PutUint32(buf[:4], v.uint32())
   591  		return buf[:4]
   592  	case Int64, Double:
   593  		buf := [8]byte{}
   594  		binary.LittleEndian.PutUint64(buf[:8], v.uint64())
   595  		return buf[:8]
   596  	case ByteArray, FixedLenByteArray, Int96:
   597  		return v.byteArray()
   598  	default:
   599  		return nil
   600  	}
   601  }
   602  
   603  // AppendBytes appends the binary representation of v to b.
   604  //
   605  // If v is the null value, b is returned unchanged.
   606  func (v Value) AppendBytes(b []byte) []byte {
   607  	buf := [8]byte{}
   608  	switch v.Kind() {
   609  	case Boolean:
   610  		binary.LittleEndian.PutUint32(buf[:4], v.uint32())
   611  		return append(b, buf[0])
   612  	case Int32, Float:
   613  		binary.LittleEndian.PutUint32(buf[:4], v.uint32())
   614  		return append(b, buf[:4]...)
   615  	case Int64, Double:
   616  		binary.LittleEndian.PutUint64(buf[:8], v.uint64())
   617  		return append(b, buf[:8]...)
   618  	case ByteArray, FixedLenByteArray, Int96:
   619  		return append(b, v.byteArray()...)
   620  	default:
   621  		return b
   622  	}
   623  }
   624  
   625  // Format outputs a human-readable representation of v to w, using r as the
   626  // formatting verb to describe how the value should be printed.
   627  //
   628  // The following formatting options are supported:
   629  //
   630  //	%c	prints the column index
   631  //	%+c	prints the column index, prefixed with "C:"
   632  //	%d	prints the definition level
   633  //	%+d	prints the definition level, prefixed with "D:"
   634  //	%r	prints the repetition level
   635  //	%+r	prints the repetition level, prefixed with "R:"
   636  //	%q	prints the quoted representation of v
   637  //	%+q	prints the quoted representation of v, prefixed with "V:"
   638  //	%s	prints the string representation of v
   639  //	%+s	prints the string representation of v, prefixed with "V:"
   640  //	%v	same as %s
   641  //	%+v	prints a verbose representation of v
   642  //	%#v	prints a Go value representation of v
   643  //
   644  // Format satisfies the fmt.Formatter interface.
   645  func (v Value) Format(w fmt.State, r rune) {
   646  	switch r {
   647  	case 'c':
   648  		if w.Flag('+') {
   649  			io.WriteString(w, "C:")
   650  		}
   651  		fmt.Fprint(w, v.column())
   652  
   653  	case 'd':
   654  		if w.Flag('+') {
   655  			io.WriteString(w, "D:")
   656  		}
   657  		fmt.Fprint(w, v.definitionLevel)
   658  
   659  	case 'r':
   660  		if w.Flag('+') {
   661  			io.WriteString(w, "R:")
   662  		}
   663  		fmt.Fprint(w, v.repetitionLevel)
   664  
   665  	case 'q':
   666  		if w.Flag('+') {
   667  			io.WriteString(w, "V:")
   668  		}
   669  		switch v.Kind() {
   670  		case ByteArray, FixedLenByteArray:
   671  			fmt.Fprintf(w, "%q", v.byteArray())
   672  		default:
   673  			fmt.Fprintf(w, `"%s"`, v)
   674  		}
   675  
   676  	case 's':
   677  		if w.Flag('+') {
   678  			io.WriteString(w, "V:")
   679  		}
   680  		switch v.Kind() {
   681  		case Boolean:
   682  			fmt.Fprint(w, v.boolean())
   683  		case Int32:
   684  			fmt.Fprint(w, v.int32())
   685  		case Int64:
   686  			fmt.Fprint(w, v.int64())
   687  		case Int96:
   688  			fmt.Fprint(w, v.int96())
   689  		case Float:
   690  			fmt.Fprint(w, v.float())
   691  		case Double:
   692  			fmt.Fprint(w, v.double())
   693  		case ByteArray, FixedLenByteArray:
   694  			w.Write(v.byteArray())
   695  		default:
   696  			io.WriteString(w, "<null>")
   697  		}
   698  
   699  	case 'v':
   700  		switch {
   701  		case w.Flag('+'):
   702  			fmt.Fprintf(w, "%+[1]c %+[1]d %+[1]r %+[1]s", v)
   703  		case w.Flag('#'):
   704  			v.formatGoString(w)
   705  		default:
   706  			v.Format(w, 's')
   707  		}
   708  	}
   709  }
   710  
   711  func (v Value) formatGoString(w fmt.State) {
   712  	io.WriteString(w, "parquet.")
   713  	switch v.Kind() {
   714  	case Boolean:
   715  		fmt.Fprintf(w, "BooleanValue(%t)", v.boolean())
   716  	case Int32:
   717  		fmt.Fprintf(w, "Int32Value(%d)", v.int32())
   718  	case Int64:
   719  		fmt.Fprintf(w, "Int64Value(%d)", v.int64())
   720  	case Int96:
   721  		fmt.Fprintf(w, "Int96Value(%#v)", v.int96())
   722  	case Float:
   723  		fmt.Fprintf(w, "FloatValue(%g)", v.float())
   724  	case Double:
   725  		fmt.Fprintf(w, "DoubleValue(%g)", v.double())
   726  	case ByteArray:
   727  		fmt.Fprintf(w, "ByteArrayValue(%q)", v.byteArray())
   728  	case FixedLenByteArray:
   729  		fmt.Fprintf(w, "FixedLenByteArrayValue(%#v)", v.byteArray())
   730  	default:
   731  		io.WriteString(w, "Value{}")
   732  		return
   733  	}
   734  	fmt.Fprintf(w, ".Level(%d,%d,%d)",
   735  		v.RepetitionLevel(),
   736  		v.DefinitionLevel(),
   737  		v.Column(),
   738  	)
   739  }
   740  
   741  // String returns a string representation of v.
   742  func (v Value) String() string {
   743  	switch v.Kind() {
   744  	case Boolean:
   745  		return strconv.FormatBool(v.boolean())
   746  	case Int32:
   747  		return strconv.FormatInt(int64(v.int32()), 10)
   748  	case Int64:
   749  		return strconv.FormatInt(v.int64(), 10)
   750  	case Int96:
   751  		return v.Int96().String()
   752  	case Float:
   753  		return strconv.FormatFloat(float64(v.float()), 'g', -1, 32)
   754  	case Double:
   755  		return strconv.FormatFloat(v.double(), 'g', -1, 32)
   756  	case ByteArray, FixedLenByteArray:
   757  		return string(v.byteArray())
   758  	default:
   759  		return "<null>"
   760  	}
   761  }
   762  
   763  // GoString returns a Go value string representation of v.
   764  func (v Value) GoString() string { return fmt.Sprintf("%#v", v) }
   765  
   766  // Level returns v with the repetition level, definition level, and column index
   767  // set to the values passed as arguments.
   768  //
   769  // The method panics if either argument is negative.
   770  func (v Value) Level(repetitionLevel, definitionLevel, columnIndex int) Value {
   771  	v.repetitionLevel = makeRepetitionLevel(repetitionLevel)
   772  	v.definitionLevel = makeDefinitionLevel(definitionLevel)
   773  	v.columnIndex = ^makeColumnIndex(columnIndex)
   774  	return v
   775  }
   776  
   777  // Clone returns a copy of v which does not share any pointers with it.
   778  func (v Value) Clone() Value {
   779  	switch k := v.Kind(); k {
   780  	case ByteArray, FixedLenByteArray:
   781  		v.ptr = unsafecast.AddressOfBytes(copyBytes(v.byteArray()))
   782  	}
   783  	return v
   784  }
   785  
   786  func makeInt96(bits []byte) (i96 deprecated.Int96) {
   787  	return deprecated.Int96{
   788  		2: binary.LittleEndian.Uint32(bits[8:12]),
   789  		1: binary.LittleEndian.Uint32(bits[4:8]),
   790  		0: binary.LittleEndian.Uint32(bits[0:4]),
   791  	}
   792  }
   793  
   794  func parseValue(kind Kind, data []byte) (val Value, err error) {
   795  	switch kind {
   796  	case Boolean:
   797  		if len(data) == 1 {
   798  			val = makeValueBoolean(data[0] != 0)
   799  		}
   800  	case Int32:
   801  		if len(data) == 4 {
   802  			val = makeValueInt32(int32(binary.LittleEndian.Uint32(data)))
   803  		}
   804  	case Int64:
   805  		if len(data) == 8 {
   806  			val = makeValueInt64(int64(binary.LittleEndian.Uint64(data)))
   807  		}
   808  	case Int96:
   809  		if len(data) == 12 {
   810  			val = makeValueInt96(makeInt96(data))
   811  		}
   812  	case Float:
   813  		if len(data) == 4 {
   814  			val = makeValueFloat(float32(math.Float32frombits(binary.LittleEndian.Uint32(data))))
   815  		}
   816  	case Double:
   817  		if len(data) == 8 {
   818  			val = makeValueDouble(float64(math.Float64frombits(binary.LittleEndian.Uint64(data))))
   819  		}
   820  	case ByteArray, FixedLenByteArray:
   821  		val = makeValueBytes(kind, data)
   822  	}
   823  	if val.isNull() {
   824  		err = fmt.Errorf("cannot decode %s value from input of length %d", kind, len(data))
   825  	}
   826  	return val, err
   827  }
   828  
   829  func copyBytes(b []byte) []byte {
   830  	c := make([]byte, len(b))
   831  	copy(c, b)
   832  	return c
   833  }
   834  
   835  // Equal returns true if v1 and v2 are equal.
   836  //
   837  // Values are considered equal if they are of the same physical type and hold
   838  // the same Go values. For BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY, the content of
   839  // the underlying byte arrays are tested for equality.
   840  //
   841  // Note that the repetition levels, definition levels, and column indexes are
   842  // not compared by this function, use DeepEqual instead.
   843  func Equal(v1, v2 Value) bool {
   844  	if v1.kind != v2.kind {
   845  		return false
   846  	}
   847  	switch ^Kind(v1.kind) {
   848  	case Boolean:
   849  		return v1.boolean() == v2.boolean()
   850  	case Int32:
   851  		return v1.int32() == v2.int32()
   852  	case Int64:
   853  		return v1.int64() == v2.int64()
   854  	case Int96:
   855  		return v1.int96() == v2.int96()
   856  	case Float:
   857  		return v1.float() == v2.float()
   858  	case Double:
   859  		return v1.double() == v2.double()
   860  	case ByteArray, FixedLenByteArray:
   861  		return bytes.Equal(v1.byteArray(), v2.byteArray())
   862  	case -1: // null
   863  		return true
   864  	default:
   865  		return false
   866  	}
   867  }
   868  
   869  // DeepEqual returns true if v1 and v2 are equal, including their repetition
   870  // levels, definition levels, and column indexes.
   871  //
   872  // See Equal for details about how value equality is determined.
   873  func DeepEqual(v1, v2 Value) bool {
   874  	return Equal(v1, v2) &&
   875  		v1.repetitionLevel == v2.repetitionLevel &&
   876  		v1.definitionLevel == v2.definitionLevel &&
   877  		v1.columnIndex == v2.columnIndex
   878  }
   879  
   880  var (
   881  	_ fmt.Formatter = Value{}
   882  	_ fmt.Stringer  = Value{}
   883  )
   884  
   885  func clearValues(values []Value) {
   886  	for i := range values {
   887  		values[i] = Value{}
   888  	}
   889  }
   890  
   891  // BooleanReader is an interface implemented by ValueReader instances which
   892  // expose the content of a column of boolean values.
   893  type BooleanReader interface {
   894  	// Read boolean values into the buffer passed as argument.
   895  	//
   896  	// The method returns io.EOF when all values have been read.
   897  	ReadBooleans(values []bool) (int, error)
   898  }
   899  
   900  // BooleanWriter is an interface implemented by ValueWriter instances which
   901  // support writing columns of boolean values.
   902  type BooleanWriter interface {
   903  	// Write boolean values.
   904  	//
   905  	// The method returns the number of values written, and any error that
   906  	// occurred while writing the values.
   907  	WriteBooleans(values []bool) (int, error)
   908  }
   909  
   910  // Int32Reader is an interface implemented by ValueReader instances which expose
   911  // the content of a column of int32 values.
   912  type Int32Reader interface {
   913  	// Read 32 bits integer values into the buffer passed as argument.
   914  	//
   915  	// The method returns io.EOF when all values have been read.
   916  	ReadInt32s(values []int32) (int, error)
   917  }
   918  
   919  // Int32Writer is an interface implemented by ValueWriter instances which
   920  // support writing columns of 32 bits signed integer values.
   921  type Int32Writer interface {
   922  	// Write 32 bits signed integer values.
   923  	//
   924  	// The method returns the number of values written, and any error that
   925  	// occurred while writing the values.
   926  	WriteInt32s(values []int32) (int, error)
   927  }
   928  
   929  // Int64Reader is an interface implemented by ValueReader instances which expose
   930  // the content of a column of int64 values.
   931  type Int64Reader interface {
   932  	// Read 64 bits integer values into the buffer passed as argument.
   933  	//
   934  	// The method returns io.EOF when all values have been read.
   935  	ReadInt64s(values []int64) (int, error)
   936  }
   937  
   938  // Int64Writer is an interface implemented by ValueWriter instances which
   939  // support writing columns of 64 bits signed integer values.
   940  type Int64Writer interface {
   941  	// Write 64 bits signed integer values.
   942  	//
   943  	// The method returns the number of values written, and any error that
   944  	// occurred while writing the values.
   945  	WriteInt64s(values []int64) (int, error)
   946  }
   947  
   948  // Int96Reader is an interface implemented by ValueReader instances which expose
   949  // the content of a column of int96 values.
   950  type Int96Reader interface {
   951  	// Read 96 bits integer values into the buffer passed as argument.
   952  	//
   953  	// The method returns io.EOF when all values have been read.
   954  	ReadInt96s(values []deprecated.Int96) (int, error)
   955  }
   956  
   957  // Int96Writer is an interface implemented by ValueWriter instances which
   958  // support writing columns of 96 bits signed integer values.
   959  type Int96Writer interface {
   960  	// Write 96 bits signed integer values.
   961  	//
   962  	// The method returns the number of values written, and any error that
   963  	// occurred while writing the values.
   964  	WriteInt96s(values []deprecated.Int96) (int, error)
   965  }
   966  
   967  // FloatReader is an interface implemented by ValueReader instances which expose
   968  // the content of a column of single-precision floating point values.
   969  type FloatReader interface {
   970  	// Read single-precision floating point values into the buffer passed as
   971  	// argument.
   972  	//
   973  	// The method returns io.EOF when all values have been read.
   974  	ReadFloats(values []float32) (int, error)
   975  }
   976  
   977  // FloatWriter is an interface implemented by ValueWriter instances which
   978  // support writing columns of single-precision floating point values.
   979  type FloatWriter interface {
   980  	// Write single-precision floating point values.
   981  	//
   982  	// The method returns the number of values written, and any error that
   983  	// occurred while writing the values.
   984  	WriteFloats(values []float32) (int, error)
   985  }
   986  
   987  // DoubleReader is an interface implemented by ValueReader instances which
   988  // expose the content of a column of double-precision float point values.
   989  type DoubleReader interface {
   990  	// Read double-precision floating point values into the buffer passed as
   991  	// argument.
   992  	//
   993  	// The method returns io.EOF when all values have been read.
   994  	ReadDoubles(values []float64) (int, error)
   995  }
   996  
   997  // DoubleWriter is an interface implemented by ValueWriter instances which
   998  // support writing columns of double-precision floating point values.
   999  type DoubleWriter interface {
  1000  	// Write double-precision floating point values.
  1001  	//
  1002  	// The method returns the number of values written, and any error that
  1003  	// occurred while writing the values.
  1004  	WriteDoubles(values []float64) (int, error)
  1005  }
  1006  
  1007  // ByteArrayReader is an interface implemented by ValueReader instances which
  1008  // expose the content of a column of variable length byte array values.
  1009  type ByteArrayReader interface {
  1010  	// Read values into the byte buffer passed as argument, returning the number
  1011  	// of values written to the buffer (not the number of bytes). Values are
  1012  	// written using the PLAIN encoding, each byte array prefixed with its
  1013  	// length encoded as a 4 bytes little endian unsigned integer.
  1014  	//
  1015  	// The method returns io.EOF when all values have been read.
  1016  	//
  1017  	// If the buffer was not empty, but too small to hold at least one value,
  1018  	// io.ErrShortBuffer is returned.
  1019  	ReadByteArrays(values []byte) (int, error)
  1020  }
  1021  
  1022  // ByteArrayWriter is an interface implemented by ValueWriter instances which
  1023  // support writing columns of variable length byte array values.
  1024  type ByteArrayWriter interface {
  1025  	// Write variable length byte array values.
  1026  	//
  1027  	// The values passed as input must be laid out using the PLAIN encoding,
  1028  	// with each byte array prefixed with the four bytes little endian unsigned
  1029  	// integer length.
  1030  	//
  1031  	// The method returns the number of values written to the underlying column
  1032  	// (not the number of bytes), or any error that occurred while attempting to
  1033  	// write the values.
  1034  	WriteByteArrays(values []byte) (int, error)
  1035  }
  1036  
  1037  // FixedLenByteArrayReader is an interface implemented by ValueReader instances
  1038  // which expose the content of a column of fixed length byte array values.
  1039  type FixedLenByteArrayReader interface {
  1040  	// Read values into the byte buffer passed as argument, returning the number
  1041  	// of values written to the buffer (not the number of bytes).
  1042  	//
  1043  	// The method returns io.EOF when all values have been read.
  1044  	//
  1045  	// If the buffer was not empty, but too small to hold at least one value,
  1046  	// io.ErrShortBuffer is returned.
  1047  	ReadFixedLenByteArrays(values []byte) (int, error)
  1048  }
  1049  
  1050  // FixedLenByteArrayWriter is an interface implemented by ValueWriter instances
  1051  // which support writing columns of fixed length byte array values.
  1052  type FixedLenByteArrayWriter interface {
  1053  	// Writes the fixed length byte array values.
  1054  	//
  1055  	// The size of the values is assumed to be the same as the expected size of
  1056  	// items in the column. The method errors if the length of the input values
  1057  	// is not a multiple of the expected item size.
  1058  	WriteFixedLenByteArrays(values []byte) (int, error)
  1059  }