github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/value.go (about)

     1  package parquet
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/binary"
     6  	"fmt"
     7  	"io"
     8  	"math"
     9  	"reflect"
    10  	"strconv"
    11  	"time"
    12  	"unsafe"
    13  
    14  	"github.com/google/uuid"
    15  	"github.com/parquet-go/parquet-go/deprecated"
    16  	"github.com/parquet-go/parquet-go/format"
    17  	"github.com/parquet-go/parquet-go/internal/unsafecast"
    18  )
    19  
    20  const (
    21  	// 170 x sizeof(Value) = 4KB
    22  	defaultValueBufferSize = 170
    23  )
    24  
    25  // The Value type is similar to the reflect.Value abstraction of Go values, but
    26  // for parquet values. Value instances wrap underlying Go values mapped to one
    27  // of the parquet physical types.
    28  //
    29  // Value instances are small, immutable objects, and usually passed by value
    30  // between function calls.
    31  //
    32  // The zero-value of Value represents the null parquet value.
    33  type Value struct {
    34  	// data
    35  	ptr *byte
    36  	u64 uint64
    37  	// type
    38  	kind int8 // XOR(Kind) so the zero-value is <null>
    39  	// levels
    40  	definitionLevel byte
    41  	repetitionLevel byte
    42  	columnIndex     int16 // XOR so the zero-value is -1
    43  }
    44  
    45  // ValueReader is an interface implemented by types that support reading
    46  // batches of values.
    47  type ValueReader interface {
    48  	// Read values into the buffer passed as argument and return the number of
    49  	// values read. When all values have been read, the error will be io.EOF.
    50  	ReadValues([]Value) (int, error)
    51  }
    52  
    53  // ValueReaderAt is an interface implemented by types that support reading
    54  // values at offsets specified by the application.
    55  type ValueReaderAt interface {
    56  	ReadValuesAt([]Value, int64) (int, error)
    57  }
    58  
    59  // ValueReaderFrom is an interface implemented by value writers to read values
    60  // from a reader.
    61  type ValueReaderFrom interface {
    62  	ReadValuesFrom(ValueReader) (int64, error)
    63  }
    64  
    65  // ValueWriter is an interface implemented by types that support reading
    66  // batches of values.
    67  type ValueWriter interface {
    68  	// Write values from the buffer passed as argument and returns the number
    69  	// of values written.
    70  	WriteValues([]Value) (int, error)
    71  }
    72  
    73  // ValueWriterTo is an interface implemented by value readers to write values to
    74  // a writer.
    75  type ValueWriterTo interface {
    76  	WriteValuesTo(ValueWriter) (int64, error)
    77  }
    78  
    79  // ValueReaderFunc is a function type implementing the ValueReader interface.
    80  type ValueReaderFunc func([]Value) (int, error)
    81  
    82  func (f ValueReaderFunc) ReadValues(values []Value) (int, error) { return f(values) }
    83  
    84  // ValueWriterFunc is a function type implementing the ValueWriter interface.
    85  type ValueWriterFunc func([]Value) (int, error)
    86  
    87  func (f ValueWriterFunc) WriteValues(values []Value) (int, error) { return f(values) }
    88  
    89  // CopyValues copies values from src to dst, returning the number of values
    90  // that were written.
    91  //
    92  // As an optimization, the reader and writer may choose to implement
    93  // ValueReaderFrom and ValueWriterTo to provide their own copy logic.
    94  //
    95  // The function returns any error it encounters reading or writing pages, except
    96  // for io.EOF from the reader which indicates that there were no more values to
    97  // read.
    98  func CopyValues(dst ValueWriter, src ValueReader) (int64, error) {
    99  	return copyValues(dst, src, nil)
   100  }
   101  
   102  func copyValues(dst ValueWriter, src ValueReader, buf []Value) (written int64, err error) {
   103  	if wt, ok := src.(ValueWriterTo); ok {
   104  		return wt.WriteValuesTo(dst)
   105  	}
   106  
   107  	if rf, ok := dst.(ValueReaderFrom); ok {
   108  		return rf.ReadValuesFrom(src)
   109  	}
   110  
   111  	if len(buf) == 0 {
   112  		buf = make([]Value, defaultValueBufferSize)
   113  	}
   114  
   115  	defer clearValues(buf)
   116  
   117  	for {
   118  		n, err := src.ReadValues(buf)
   119  
   120  		if n > 0 {
   121  			wn, werr := dst.WriteValues(buf[:n])
   122  			written += int64(wn)
   123  			if werr != nil {
   124  				return written, werr
   125  			}
   126  		}
   127  
   128  		if err != nil {
   129  			if err == io.EOF {
   130  				err = nil
   131  			}
   132  			return written, err
   133  		}
   134  
   135  		if n == 0 {
   136  			return written, io.ErrNoProgress
   137  		}
   138  	}
   139  }
   140  
   141  // ValueOf constructs a parquet value from a Go value v.
   142  //
   143  // The physical type of the value is assumed from the Go type of v using the
   144  // following conversion table:
   145  //
   146  //	Go type | Parquet physical type
   147  //	------- | ---------------------
   148  //	nil     | NULL
   149  //	bool    | BOOLEAN
   150  //	int8    | INT32
   151  //	int16   | INT32
   152  //	int32   | INT32
   153  //	int64   | INT64
   154  //	int     | INT64
   155  //	uint8   | INT32
   156  //	uint16  | INT32
   157  //	uint32  | INT32
   158  //	uint64  | INT64
   159  //	uintptr | INT64
   160  //	float32 | FLOAT
   161  //	float64 | DOUBLE
   162  //	string  | BYTE_ARRAY
   163  //	[]byte  | BYTE_ARRAY
   164  //	[*]byte | FIXED_LEN_BYTE_ARRAY
   165  //
   166  // When converting a []byte or [*]byte value, the underlying byte array is not
   167  // copied; instead, the returned parquet value holds a reference to it.
   168  //
   169  // The repetition and definition levels of the returned value are both zero.
   170  //
   171  // The function panics if the Go value cannot be represented in parquet.
   172  func ValueOf(v interface{}) Value {
   173  	k := Kind(-1)
   174  	t := reflect.TypeOf(v)
   175  
   176  	switch value := v.(type) {
   177  	case nil:
   178  		return Value{}
   179  	case uuid.UUID:
   180  		return makeValueBytes(FixedLenByteArray, value[:])
   181  	case deprecated.Int96:
   182  		return makeValueInt96(value)
   183  	case time.Time:
   184  		k = Int64
   185  	}
   186  
   187  	switch t.Kind() {
   188  	case reflect.Bool:
   189  		k = Boolean
   190  	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Uint8, reflect.Uint16, reflect.Uint32:
   191  		k = Int32
   192  	case reflect.Int64, reflect.Int, reflect.Uint64, reflect.Uint, reflect.Uintptr:
   193  		k = Int64
   194  	case reflect.Float32:
   195  		k = Float
   196  	case reflect.Float64:
   197  		k = Double
   198  	case reflect.String:
   199  		k = ByteArray
   200  	case reflect.Slice:
   201  		if t.Elem().Kind() == reflect.Uint8 {
   202  			k = ByteArray
   203  		}
   204  	case reflect.Array:
   205  		if t.Elem().Kind() == reflect.Uint8 {
   206  			k = FixedLenByteArray
   207  		}
   208  	}
   209  
   210  	if k < 0 {
   211  		panic("cannot create parquet value from go value of type " + t.String())
   212  	}
   213  
   214  	return makeValue(k, nil, reflect.ValueOf(v))
   215  }
   216  
   217  // NulLValue constructs a null value, which is the zero-value of the Value type.
   218  func NullValue() Value { return Value{} }
   219  
   220  // ZeroValue constructs a zero value of the given kind.
   221  func ZeroValue(kind Kind) Value { return makeValueKind(kind) }
   222  
   223  // BooleanValue constructs a BOOLEAN parquet value from the bool passed as
   224  // argument.
   225  func BooleanValue(value bool) Value { return makeValueBoolean(value) }
   226  
   227  // Int32Value constructs a INT32 parquet value from the int32 passed as
   228  // argument.
   229  func Int32Value(value int32) Value { return makeValueInt32(value) }
   230  
   231  // Int64Value constructs a INT64 parquet value from the int64 passed as
   232  // argument.
   233  func Int64Value(value int64) Value { return makeValueInt64(value) }
   234  
   235  // Int96Value constructs a INT96 parquet value from the deprecated.Int96 passed
   236  // as argument.
   237  func Int96Value(value deprecated.Int96) Value { return makeValueInt96(value) }
   238  
   239  // FloatValue constructs a FLOAT parquet value from the float32 passed as
   240  // argument.
   241  func FloatValue(value float32) Value { return makeValueFloat(value) }
   242  
   243  // DoubleValue constructs a DOUBLE parquet value from the float64 passed as
   244  // argument.
   245  func DoubleValue(value float64) Value { return makeValueDouble(value) }
   246  
   247  // ByteArrayValue constructs a BYTE_ARRAY parquet value from the byte slice
   248  // passed as argument.
   249  func ByteArrayValue(value []byte) Value { return makeValueBytes(ByteArray, value) }
   250  
   251  // FixedLenByteArrayValue constructs a BYTE_ARRAY parquet value from the byte
   252  // slice passed as argument.
   253  func FixedLenByteArrayValue(value []byte) Value { return makeValueBytes(FixedLenByteArray, value) }
   254  
   255  func makeValue(k Kind, lt *format.LogicalType, v reflect.Value) Value {
   256  	if v.Kind() == reflect.Interface {
   257  		if v.IsNil() {
   258  			return Value{}
   259  		}
   260  		if v = v.Elem(); v.Kind() == reflect.Pointer && v.IsNil() {
   261  			return Value{}
   262  		}
   263  	}
   264  
   265  	switch v.Type() {
   266  	case reflect.TypeOf(time.Time{}):
   267  		unit := Nanosecond.TimeUnit()
   268  		if lt != nil && lt.Timestamp != nil {
   269  			unit = lt.Timestamp.Unit
   270  		}
   271  
   272  		t := v.Interface().(time.Time)
   273  		var val int64
   274  		switch {
   275  		case unit.Millis != nil:
   276  			val = t.UnixMilli()
   277  		case unit.Micros != nil:
   278  			val = t.UnixMicro()
   279  		default:
   280  			val = t.UnixNano()
   281  		}
   282  		return makeValueInt64(val)
   283  	}
   284  
   285  	switch k {
   286  	case Boolean:
   287  		return makeValueBoolean(v.Bool())
   288  
   289  	case Int32:
   290  		switch v.Kind() {
   291  		case reflect.Int8, reflect.Int16, reflect.Int32:
   292  			return makeValueInt32(int32(v.Int()))
   293  		case reflect.Uint8, reflect.Uint16, reflect.Uint32:
   294  			return makeValueInt32(int32(v.Uint()))
   295  		}
   296  
   297  	case Int64:
   298  		switch v.Kind() {
   299  		case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int:
   300  			return makeValueInt64(v.Int())
   301  		case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint, reflect.Uintptr:
   302  			return makeValueUint64(v.Uint())
   303  		}
   304  
   305  	case Int96:
   306  		switch v.Type() {
   307  		case reflect.TypeOf(deprecated.Int96{}):
   308  			return makeValueInt96(v.Interface().(deprecated.Int96))
   309  		}
   310  
   311  	case Float:
   312  		switch v.Kind() {
   313  		case reflect.Float32:
   314  			return makeValueFloat(float32(v.Float()))
   315  		}
   316  
   317  	case Double:
   318  		switch v.Kind() {
   319  		case reflect.Float32, reflect.Float64:
   320  			return makeValueDouble(v.Float())
   321  		}
   322  
   323  	case ByteArray:
   324  		switch v.Kind() {
   325  		case reflect.String:
   326  			return makeValueString(k, v.String())
   327  		case reflect.Slice:
   328  			if v.Type().Elem().Kind() == reflect.Uint8 {
   329  				return makeValueBytes(k, v.Bytes())
   330  			}
   331  		}
   332  
   333  	case FixedLenByteArray:
   334  		switch v.Kind() {
   335  		case reflect.String: // uuid
   336  			return makeValueString(k, v.String())
   337  		case reflect.Array:
   338  			if v.Type().Elem().Kind() == reflect.Uint8 {
   339  				return makeValueFixedLenByteArray(v)
   340  			}
   341  		case reflect.Slice:
   342  			if v.Type().Elem().Kind() == reflect.Uint8 {
   343  				return makeValueBytes(k, v.Bytes())
   344  			}
   345  		}
   346  	}
   347  
   348  	panic("cannot create parquet value of type " + k.String() + " from go value of type " + v.Type().String())
   349  }
   350  
   351  func makeValueKind(kind Kind) Value {
   352  	return Value{kind: ^int8(kind)}
   353  }
   354  
   355  func makeValueBoolean(value bool) Value {
   356  	v := Value{kind: ^int8(Boolean)}
   357  	if value {
   358  		v.u64 = 1
   359  	}
   360  	return v
   361  }
   362  
   363  func makeValueInt32(value int32) Value {
   364  	return Value{
   365  		kind: ^int8(Int32),
   366  		u64:  uint64(value),
   367  	}
   368  }
   369  
   370  func makeValueInt64(value int64) Value {
   371  	return Value{
   372  		kind: ^int8(Int64),
   373  		u64:  uint64(value),
   374  	}
   375  }
   376  
   377  func makeValueInt96(value deprecated.Int96) Value {
   378  	// TODO: this is highly inefficient because we need a heap allocation to
   379  	// store the value; we don't expect INT96 to be used frequently since it
   380  	// is a deprecated feature of parquet, and it helps keep the Value type
   381  	// compact for all the other more common cases.
   382  	bits := [12]byte{}
   383  	binary.LittleEndian.PutUint32(bits[0:4], value[0])
   384  	binary.LittleEndian.PutUint32(bits[4:8], value[1])
   385  	binary.LittleEndian.PutUint32(bits[8:12], value[2])
   386  	return Value{
   387  		kind: ^int8(Int96),
   388  		ptr:  &bits[0],
   389  		u64:  12, // set the length so we can use the ByteArray method
   390  	}
   391  }
   392  
   393  func makeValueUint32(value uint32) Value {
   394  	return Value{
   395  		kind: ^int8(Int32),
   396  		u64:  uint64(value),
   397  	}
   398  }
   399  
   400  func makeValueUint64(value uint64) Value {
   401  	return Value{
   402  		kind: ^int8(Int64),
   403  		u64:  value,
   404  	}
   405  }
   406  
   407  func makeValueFloat(value float32) Value {
   408  	return Value{
   409  		kind: ^int8(Float),
   410  		u64:  uint64(math.Float32bits(value)),
   411  	}
   412  }
   413  
   414  func makeValueDouble(value float64) Value {
   415  	return Value{
   416  		kind: ^int8(Double),
   417  		u64:  math.Float64bits(value),
   418  	}
   419  }
   420  
   421  func makeValueBytes(kind Kind, value []byte) Value {
   422  	return makeValueByteArray(kind, unsafecast.AddressOfBytes(value), len(value))
   423  }
   424  
   425  func makeValueString(kind Kind, value string) Value {
   426  	return makeValueByteArray(kind, unsafecast.AddressOfString(value), len(value))
   427  }
   428  
   429  func makeValueFixedLenByteArray(v reflect.Value) Value {
   430  	t := v.Type()
   431  	// When the array is addressable, we take advantage of this
   432  	// condition to avoid the heap allocation otherwise needed
   433  	// to pack the reference into an interface{} value.
   434  	if v.CanAddr() {
   435  		v = v.Addr()
   436  	} else {
   437  		u := reflect.New(t)
   438  		u.Elem().Set(v)
   439  		v = u
   440  	}
   441  	return makeValueByteArray(FixedLenByteArray, (*byte)(v.UnsafePointer()), t.Len())
   442  }
   443  
   444  func makeValueByteArray(kind Kind, data *byte, size int) Value {
   445  	return Value{
   446  		kind: ^int8(kind),
   447  		ptr:  data,
   448  		u64:  uint64(size),
   449  	}
   450  }
   451  
   452  // These methods are internal versions of methods exported by the Value type,
   453  // they are usually inlined by the compiler and intended to be used inside the
   454  // parquet-go package because they tend to generate better code than their
   455  // exported counter part, which requires making a copy of the receiver.
   456  func (v *Value) isNull() bool            { return v.kind == 0 }
   457  func (v *Value) byte() byte              { return byte(v.u64) }
   458  func (v *Value) boolean() bool           { return v.u64 != 0 }
   459  func (v *Value) int32() int32            { return int32(v.u64) }
   460  func (v *Value) int64() int64            { return int64(v.u64) }
   461  func (v *Value) int96() deprecated.Int96 { return makeInt96(v.byteArray()) }
   462  func (v *Value) float() float32          { return math.Float32frombits(uint32(v.u64)) }
   463  func (v *Value) double() float64         { return math.Float64frombits(uint64(v.u64)) }
   464  func (v *Value) uint32() uint32          { return uint32(v.u64) }
   465  func (v *Value) uint64() uint64          { return v.u64 }
   466  func (v *Value) byteArray() []byte       { return unsafecast.Bytes(v.ptr, int(v.u64)) }
   467  func (v *Value) string() string          { return unsafecast.BytesToString(v.byteArray()) }
   468  func (v *Value) be128() *[16]byte        { return (*[16]byte)(unsafe.Pointer(v.ptr)) }
   469  func (v *Value) column() int             { return int(^v.columnIndex) }
   470  
   471  func (v Value) convertToBoolean(x bool) Value {
   472  	v.kind = ^int8(Boolean)
   473  	v.ptr = nil
   474  	v.u64 = 0
   475  	if x {
   476  		v.u64 = 1
   477  	}
   478  	return v
   479  }
   480  
   481  func (v Value) convertToInt32(x int32) Value {
   482  	v.kind = ^int8(Int32)
   483  	v.ptr = nil
   484  	v.u64 = uint64(x)
   485  	return v
   486  }
   487  
   488  func (v Value) convertToInt64(x int64) Value {
   489  	v.kind = ^int8(Int64)
   490  	v.ptr = nil
   491  	v.u64 = uint64(x)
   492  	return v
   493  }
   494  
   495  func (v Value) convertToInt96(x deprecated.Int96) Value {
   496  	i96 := makeValueInt96(x)
   497  	v.kind = i96.kind
   498  	v.ptr = i96.ptr
   499  	v.u64 = i96.u64
   500  	return v
   501  }
   502  
   503  func (v Value) convertToFloat(x float32) Value {
   504  	v.kind = ^int8(Float)
   505  	v.ptr = nil
   506  	v.u64 = uint64(math.Float32bits(x))
   507  	return v
   508  }
   509  
   510  func (v Value) convertToDouble(x float64) Value {
   511  	v.kind = ^int8(Double)
   512  	v.ptr = nil
   513  	v.u64 = math.Float64bits(x)
   514  	return v
   515  }
   516  
   517  func (v Value) convertToByteArray(x []byte) Value {
   518  	v.kind = ^int8(ByteArray)
   519  	v.ptr = unsafecast.AddressOfBytes(x)
   520  	v.u64 = uint64(len(x))
   521  	return v
   522  }
   523  
   524  func (v Value) convertToFixedLenByteArray(x []byte) Value {
   525  	v.kind = ^int8(FixedLenByteArray)
   526  	v.ptr = unsafecast.AddressOfBytes(x)
   527  	v.u64 = uint64(len(x))
   528  	return v
   529  }
   530  
   531  // Kind returns the kind of v, which represents its parquet physical type.
   532  func (v Value) Kind() Kind { return ^Kind(v.kind) }
   533  
   534  // IsNull returns true if v is the null value.
   535  func (v Value) IsNull() bool { return v.isNull() }
   536  
   537  // Byte returns v as a byte, which may truncate the underlying byte.
   538  func (v Value) Byte() byte { return v.byte() }
   539  
   540  // Boolean returns v as a bool, assuming the underlying type is BOOLEAN.
   541  func (v Value) Boolean() bool { return v.boolean() }
   542  
   543  // Int32 returns v as a int32, assuming the underlying type is INT32.
   544  func (v Value) Int32() int32 { return v.int32() }
   545  
   546  // Int64 returns v as a int64, assuming the underlying type is INT64.
   547  func (v Value) Int64() int64 { return v.int64() }
   548  
   549  // Int96 returns v as a int96, assuming the underlying type is INT96.
   550  func (v Value) Int96() deprecated.Int96 {
   551  	var val deprecated.Int96
   552  	if !v.isNull() {
   553  		val = v.int96()
   554  	}
   555  	return val
   556  }
   557  
   558  // Float returns v as a float32, assuming the underlying type is FLOAT.
   559  func (v Value) Float() float32 { return v.float() }
   560  
   561  // Double returns v as a float64, assuming the underlying type is DOUBLE.
   562  func (v Value) Double() float64 { return v.double() }
   563  
   564  // Uint32 returns v as a uint32, assuming the underlying type is INT32.
   565  func (v Value) Uint32() uint32 { return v.uint32() }
   566  
   567  // Uint64 returns v as a uint64, assuming the underlying type is INT64.
   568  func (v Value) Uint64() uint64 { return v.uint64() }
   569  
   570  // ByteArray returns v as a []byte, assuming the underlying type is either
   571  // BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY.
   572  //
   573  // The application must treat the returned byte slice as a read-only value,
   574  // mutating the content will result in undefined behaviors.
   575  func (v Value) ByteArray() []byte { return v.byteArray() }
   576  
   577  // RepetitionLevel returns the repetition level of v.
   578  func (v Value) RepetitionLevel() int { return int(v.repetitionLevel) }
   579  
   580  // DefinitionLevel returns the definition level of v.
   581  func (v Value) DefinitionLevel() int { return int(v.definitionLevel) }
   582  
   583  // Column returns the column index within the row that v was created from.
   584  //
   585  // Returns -1 if the value does not carry a column index.
   586  func (v Value) Column() int { return v.column() }
   587  
   588  // Bytes returns the binary representation of v.
   589  //
   590  // If v is the null value, an nil byte slice is returned.
   591  func (v Value) Bytes() []byte {
   592  	switch v.Kind() {
   593  	case Boolean:
   594  		buf := [8]byte{}
   595  		binary.LittleEndian.PutUint32(buf[:4], v.uint32())
   596  		return buf[0:1]
   597  	case Int32, Float:
   598  		buf := [8]byte{}
   599  		binary.LittleEndian.PutUint32(buf[:4], v.uint32())
   600  		return buf[:4]
   601  	case Int64, Double:
   602  		buf := [8]byte{}
   603  		binary.LittleEndian.PutUint64(buf[:8], v.uint64())
   604  		return buf[:8]
   605  	case ByteArray, FixedLenByteArray, Int96:
   606  		return v.byteArray()
   607  	default:
   608  		return nil
   609  	}
   610  }
   611  
   612  // AppendBytes appends the binary representation of v to b.
   613  //
   614  // If v is the null value, b is returned unchanged.
   615  func (v Value) AppendBytes(b []byte) []byte {
   616  	buf := [8]byte{}
   617  	switch v.Kind() {
   618  	case Boolean:
   619  		binary.LittleEndian.PutUint32(buf[:4], v.uint32())
   620  		return append(b, buf[0])
   621  	case Int32, Float:
   622  		binary.LittleEndian.PutUint32(buf[:4], v.uint32())
   623  		return append(b, buf[:4]...)
   624  	case Int64, Double:
   625  		binary.LittleEndian.PutUint64(buf[:8], v.uint64())
   626  		return append(b, buf[:8]...)
   627  	case ByteArray, FixedLenByteArray, Int96:
   628  		return append(b, v.byteArray()...)
   629  	default:
   630  		return b
   631  	}
   632  }
   633  
   634  // Format outputs a human-readable representation of v to w, using r as the
   635  // formatting verb to describe how the value should be printed.
   636  //
   637  // The following formatting options are supported:
   638  //
   639  //	%c	prints the column index
   640  //	%+c	prints the column index, prefixed with "C:"
   641  //	%d	prints the definition level
   642  //	%+d	prints the definition level, prefixed with "D:"
   643  //	%r	prints the repetition level
   644  //	%+r	prints the repetition level, prefixed with "R:"
   645  //	%q	prints the quoted representation of v
   646  //	%+q	prints the quoted representation of v, prefixed with "V:"
   647  //	%s	prints the string representation of v
   648  //	%+s	prints the string representation of v, prefixed with "V:"
   649  //	%v	same as %s
   650  //	%+v	prints a verbose representation of v
   651  //	%#v	prints a Go value representation of v
   652  //
   653  // Format satisfies the fmt.Formatter interface.
   654  func (v Value) Format(w fmt.State, r rune) {
   655  	switch r {
   656  	case 'c':
   657  		if w.Flag('+') {
   658  			io.WriteString(w, "C:")
   659  		}
   660  		fmt.Fprint(w, v.column())
   661  
   662  	case 'd':
   663  		if w.Flag('+') {
   664  			io.WriteString(w, "D:")
   665  		}
   666  		fmt.Fprint(w, v.definitionLevel)
   667  
   668  	case 'r':
   669  		if w.Flag('+') {
   670  			io.WriteString(w, "R:")
   671  		}
   672  		fmt.Fprint(w, v.repetitionLevel)
   673  
   674  	case 'q':
   675  		if w.Flag('+') {
   676  			io.WriteString(w, "V:")
   677  		}
   678  		switch v.Kind() {
   679  		case ByteArray, FixedLenByteArray:
   680  			fmt.Fprintf(w, "%q", v.byteArray())
   681  		default:
   682  			fmt.Fprintf(w, `"%s"`, v)
   683  		}
   684  
   685  	case 's':
   686  		if w.Flag('+') {
   687  			io.WriteString(w, "V:")
   688  		}
   689  		switch v.Kind() {
   690  		case Boolean:
   691  			fmt.Fprint(w, v.boolean())
   692  		case Int32:
   693  			fmt.Fprint(w, v.int32())
   694  		case Int64:
   695  			fmt.Fprint(w, v.int64())
   696  		case Int96:
   697  			fmt.Fprint(w, v.int96())
   698  		case Float:
   699  			fmt.Fprint(w, v.float())
   700  		case Double:
   701  			fmt.Fprint(w, v.double())
   702  		case ByteArray, FixedLenByteArray:
   703  			w.Write(v.byteArray())
   704  		default:
   705  			io.WriteString(w, "<null>")
   706  		}
   707  
   708  	case 'v':
   709  		switch {
   710  		case w.Flag('+'):
   711  			fmt.Fprintf(w, "%+[1]c %+[1]d %+[1]r %+[1]s", v)
   712  		case w.Flag('#'):
   713  			v.formatGoString(w)
   714  		default:
   715  			v.Format(w, 's')
   716  		}
   717  	}
   718  }
   719  
   720  func (v Value) formatGoString(w fmt.State) {
   721  	io.WriteString(w, "parquet.")
   722  	switch v.Kind() {
   723  	case Boolean:
   724  		fmt.Fprintf(w, "BooleanValue(%t)", v.boolean())
   725  	case Int32:
   726  		fmt.Fprintf(w, "Int32Value(%d)", v.int32())
   727  	case Int64:
   728  		fmt.Fprintf(w, "Int64Value(%d)", v.int64())
   729  	case Int96:
   730  		fmt.Fprintf(w, "Int96Value(%#v)", v.int96())
   731  	case Float:
   732  		fmt.Fprintf(w, "FloatValue(%g)", v.float())
   733  	case Double:
   734  		fmt.Fprintf(w, "DoubleValue(%g)", v.double())
   735  	case ByteArray:
   736  		fmt.Fprintf(w, "ByteArrayValue(%q)", v.byteArray())
   737  	case FixedLenByteArray:
   738  		fmt.Fprintf(w, "FixedLenByteArrayValue(%#v)", v.byteArray())
   739  	default:
   740  		io.WriteString(w, "Value{}")
   741  		return
   742  	}
   743  	fmt.Fprintf(w, ".Level(%d,%d,%d)",
   744  		v.RepetitionLevel(),
   745  		v.DefinitionLevel(),
   746  		v.Column(),
   747  	)
   748  }
   749  
   750  // String returns a string representation of v.
   751  func (v Value) String() string {
   752  	switch v.Kind() {
   753  	case Boolean:
   754  		return strconv.FormatBool(v.boolean())
   755  	case Int32:
   756  		return strconv.FormatInt(int64(v.int32()), 10)
   757  	case Int64:
   758  		return strconv.FormatInt(v.int64(), 10)
   759  	case Int96:
   760  		return v.Int96().String()
   761  	case Float:
   762  		return strconv.FormatFloat(float64(v.float()), 'g', -1, 32)
   763  	case Double:
   764  		return strconv.FormatFloat(v.double(), 'g', -1, 32)
   765  	case ByteArray, FixedLenByteArray:
   766  		return string(v.byteArray())
   767  	default:
   768  		return "<null>"
   769  	}
   770  }
   771  
   772  // GoString returns a Go value string representation of v.
   773  func (v Value) GoString() string { return fmt.Sprintf("%#v", v) }
   774  
   775  // Level returns v with the repetition level, definition level, and column index
   776  // set to the values passed as arguments.
   777  //
   778  // The method panics if either argument is negative.
   779  func (v Value) Level(repetitionLevel, definitionLevel, columnIndex int) Value {
   780  	v.repetitionLevel = makeRepetitionLevel(repetitionLevel)
   781  	v.definitionLevel = makeDefinitionLevel(definitionLevel)
   782  	v.columnIndex = ^makeColumnIndex(columnIndex)
   783  	return v
   784  }
   785  
   786  // Clone returns a copy of v which does not share any pointers with it.
   787  func (v Value) Clone() Value {
   788  	switch k := v.Kind(); k {
   789  	case ByteArray, FixedLenByteArray:
   790  		v.ptr = unsafecast.AddressOfBytes(copyBytes(v.byteArray()))
   791  	}
   792  	return v
   793  }
   794  
   795  func makeInt96(bits []byte) (i96 deprecated.Int96) {
   796  	return deprecated.Int96{
   797  		2: binary.LittleEndian.Uint32(bits[8:12]),
   798  		1: binary.LittleEndian.Uint32(bits[4:8]),
   799  		0: binary.LittleEndian.Uint32(bits[0:4]),
   800  	}
   801  }
   802  
   803  func parseValue(kind Kind, data []byte) (val Value, err error) {
   804  	switch kind {
   805  	case Boolean:
   806  		if len(data) == 1 {
   807  			val = makeValueBoolean(data[0] != 0)
   808  		}
   809  	case Int32:
   810  		if len(data) == 4 {
   811  			val = makeValueInt32(int32(binary.LittleEndian.Uint32(data)))
   812  		}
   813  	case Int64:
   814  		if len(data) == 8 {
   815  			val = makeValueInt64(int64(binary.LittleEndian.Uint64(data)))
   816  		}
   817  	case Int96:
   818  		if len(data) == 12 {
   819  			val = makeValueInt96(makeInt96(data))
   820  		}
   821  	case Float:
   822  		if len(data) == 4 {
   823  			val = makeValueFloat(float32(math.Float32frombits(binary.LittleEndian.Uint32(data))))
   824  		}
   825  	case Double:
   826  		if len(data) == 8 {
   827  			val = makeValueDouble(float64(math.Float64frombits(binary.LittleEndian.Uint64(data))))
   828  		}
   829  	case ByteArray, FixedLenByteArray:
   830  		val = makeValueBytes(kind, data)
   831  	}
   832  	if val.isNull() {
   833  		err = fmt.Errorf("cannot decode %s value from input of length %d", kind, len(data))
   834  	}
   835  	return val, err
   836  }
   837  
   838  func copyBytes(b []byte) []byte {
   839  	c := make([]byte, len(b))
   840  	copy(c, b)
   841  	return c
   842  }
   843  
   844  // Equal returns true if v1 and v2 are equal.
   845  //
   846  // Values are considered equal if they are of the same physical type and hold
   847  // the same Go values. For BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY, the content of
   848  // the underlying byte arrays are tested for equality.
   849  //
   850  // Note that the repetition levels, definition levels, and column indexes are
   851  // not compared by this function, use DeepEqual instead.
   852  func Equal(v1, v2 Value) bool {
   853  	if v1.kind != v2.kind {
   854  		return false
   855  	}
   856  	switch ^Kind(v1.kind) {
   857  	case Boolean:
   858  		return v1.boolean() == v2.boolean()
   859  	case Int32:
   860  		return v1.int32() == v2.int32()
   861  	case Int64:
   862  		return v1.int64() == v2.int64()
   863  	case Int96:
   864  		return v1.int96() == v2.int96()
   865  	case Float:
   866  		return v1.float() == v2.float()
   867  	case Double:
   868  		return v1.double() == v2.double()
   869  	case ByteArray, FixedLenByteArray:
   870  		return bytes.Equal(v1.byteArray(), v2.byteArray())
   871  	case -1: // null
   872  		return true
   873  	default:
   874  		return false
   875  	}
   876  }
   877  
   878  // DeepEqual returns true if v1 and v2 are equal, including their repetition
   879  // levels, definition levels, and column indexes.
   880  //
   881  // See Equal for details about how value equality is determined.
   882  func DeepEqual(v1, v2 Value) bool {
   883  	return Equal(v1, v2) &&
   884  		v1.repetitionLevel == v2.repetitionLevel &&
   885  		v1.definitionLevel == v2.definitionLevel &&
   886  		v1.columnIndex == v2.columnIndex
   887  }
   888  
   889  var (
   890  	_ fmt.Formatter = Value{}
   891  	_ fmt.Stringer  = Value{}
   892  )
   893  
   894  func clearValues(values []Value) {
   895  	for i := range values {
   896  		values[i] = Value{}
   897  	}
   898  }
   899  
   900  // BooleanReader is an interface implemented by ValueReader instances which
   901  // expose the content of a column of boolean values.
   902  type BooleanReader interface {
   903  	// Read boolean values into the buffer passed as argument.
   904  	//
   905  	// The method returns io.EOF when all values have been read.
   906  	ReadBooleans(values []bool) (int, error)
   907  }
   908  
   909  // BooleanWriter is an interface implemented by ValueWriter instances which
   910  // support writing columns of boolean values.
   911  type BooleanWriter interface {
   912  	// Write boolean values.
   913  	//
   914  	// The method returns the number of values written, and any error that
   915  	// occurred while writing the values.
   916  	WriteBooleans(values []bool) (int, error)
   917  }
   918  
   919  // Int32Reader is an interface implemented by ValueReader instances which expose
   920  // the content of a column of int32 values.
   921  type Int32Reader interface {
   922  	// Read 32 bits integer values into the buffer passed as argument.
   923  	//
   924  	// The method returns io.EOF when all values have been read.
   925  	ReadInt32s(values []int32) (int, error)
   926  }
   927  
   928  // Int32Writer is an interface implemented by ValueWriter instances which
   929  // support writing columns of 32 bits signed integer values.
   930  type Int32Writer interface {
   931  	// Write 32 bits signed integer values.
   932  	//
   933  	// The method returns the number of values written, and any error that
   934  	// occurred while writing the values.
   935  	WriteInt32s(values []int32) (int, error)
   936  }
   937  
   938  // Int64Reader is an interface implemented by ValueReader instances which expose
   939  // the content of a column of int64 values.
   940  type Int64Reader interface {
   941  	// Read 64 bits integer values into the buffer passed as argument.
   942  	//
   943  	// The method returns io.EOF when all values have been read.
   944  	ReadInt64s(values []int64) (int, error)
   945  }
   946  
   947  // Int64Writer is an interface implemented by ValueWriter instances which
   948  // support writing columns of 64 bits signed integer values.
   949  type Int64Writer interface {
   950  	// Write 64 bits signed integer values.
   951  	//
   952  	// The method returns the number of values written, and any error that
   953  	// occurred while writing the values.
   954  	WriteInt64s(values []int64) (int, error)
   955  }
   956  
   957  // Int96Reader is an interface implemented by ValueReader instances which expose
   958  // the content of a column of int96 values.
   959  type Int96Reader interface {
   960  	// Read 96 bits integer values into the buffer passed as argument.
   961  	//
   962  	// The method returns io.EOF when all values have been read.
   963  	ReadInt96s(values []deprecated.Int96) (int, error)
   964  }
   965  
   966  // Int96Writer is an interface implemented by ValueWriter instances which
   967  // support writing columns of 96 bits signed integer values.
   968  type Int96Writer interface {
   969  	// Write 96 bits signed integer values.
   970  	//
   971  	// The method returns the number of values written, and any error that
   972  	// occurred while writing the values.
   973  	WriteInt96s(values []deprecated.Int96) (int, error)
   974  }
   975  
   976  // FloatReader is an interface implemented by ValueReader instances which expose
   977  // the content of a column of single-precision floating point values.
   978  type FloatReader interface {
   979  	// Read single-precision floating point values into the buffer passed as
   980  	// argument.
   981  	//
   982  	// The method returns io.EOF when all values have been read.
   983  	ReadFloats(values []float32) (int, error)
   984  }
   985  
   986  // FloatWriter is an interface implemented by ValueWriter instances which
   987  // support writing columns of single-precision floating point values.
   988  type FloatWriter interface {
   989  	// Write single-precision floating point values.
   990  	//
   991  	// The method returns the number of values written, and any error that
   992  	// occurred while writing the values.
   993  	WriteFloats(values []float32) (int, error)
   994  }
   995  
   996  // DoubleReader is an interface implemented by ValueReader instances which
   997  // expose the content of a column of double-precision float point values.
   998  type DoubleReader interface {
   999  	// Read double-precision floating point values into the buffer passed as
  1000  	// argument.
  1001  	//
  1002  	// The method returns io.EOF when all values have been read.
  1003  	ReadDoubles(values []float64) (int, error)
  1004  }
  1005  
  1006  // DoubleWriter is an interface implemented by ValueWriter instances which
  1007  // support writing columns of double-precision floating point values.
  1008  type DoubleWriter interface {
  1009  	// Write double-precision floating point values.
  1010  	//
  1011  	// The method returns the number of values written, and any error that
  1012  	// occurred while writing the values.
  1013  	WriteDoubles(values []float64) (int, error)
  1014  }
  1015  
  1016  // ByteArrayReader is an interface implemented by ValueReader instances which
  1017  // expose the content of a column of variable length byte array values.
  1018  type ByteArrayReader interface {
  1019  	// Read values into the byte buffer passed as argument, returning the number
  1020  	// of values written to the buffer (not the number of bytes). Values are
  1021  	// written using the PLAIN encoding, each byte array prefixed with its
  1022  	// length encoded as a 4 bytes little endian unsigned integer.
  1023  	//
  1024  	// The method returns io.EOF when all values have been read.
  1025  	//
  1026  	// If the buffer was not empty, but too small to hold at least one value,
  1027  	// io.ErrShortBuffer is returned.
  1028  	ReadByteArrays(values []byte) (int, error)
  1029  }
  1030  
  1031  // ByteArrayWriter is an interface implemented by ValueWriter instances which
  1032  // support writing columns of variable length byte array values.
  1033  type ByteArrayWriter interface {
  1034  	// Write variable length byte array values.
  1035  	//
  1036  	// The values passed as input must be laid out using the PLAIN encoding,
  1037  	// with each byte array prefixed with the four bytes little endian unsigned
  1038  	// integer length.
  1039  	//
  1040  	// The method returns the number of values written to the underlying column
  1041  	// (not the number of bytes), or any error that occurred while attempting to
  1042  	// write the values.
  1043  	WriteByteArrays(values []byte) (int, error)
  1044  }
  1045  
  1046  // FixedLenByteArrayReader is an interface implemented by ValueReader instances
  1047  // which expose the content of a column of fixed length byte array values.
  1048  type FixedLenByteArrayReader interface {
  1049  	// Read values into the byte buffer passed as argument, returning the number
  1050  	// of values written to the buffer (not the number of bytes).
  1051  	//
  1052  	// The method returns io.EOF when all values have been read.
  1053  	//
  1054  	// If the buffer was not empty, but too small to hold at least one value,
  1055  	// io.ErrShortBuffer is returned.
  1056  	ReadFixedLenByteArrays(values []byte) (int, error)
  1057  }
  1058  
  1059  // FixedLenByteArrayWriter is an interface implemented by ValueWriter instances
  1060  // which support writing columns of fixed length byte array values.
  1061  type FixedLenByteArrayWriter interface {
  1062  	// Writes the fixed length byte array values.
  1063  	//
  1064  	// The size of the values is assumed to be the same as the expected size of
  1065  	// items in the column. The method errors if the length of the input values
  1066  	// is not a multiple of the expected item size.
  1067  	WriteFixedLenByteArrays(values []byte) (int, error)
  1068  }