github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/value.go (about)

     1  package parquet
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/binary"
     6  	"fmt"
     7  	"io"
     8  	"math"
     9  	"reflect"
    10  	"strconv"
    11  	"unsafe"
    12  
    13  	"github.com/google/uuid"
    14  	"github.com/vc42/parquet-go/deprecated"
    15  	"github.com/vc42/parquet-go/internal/unsafecast"
    16  )
    17  
    18  const (
    19  	// 170 x sizeof(Value) = 4KB
    20  	defaultValueBufferSize = 170
    21  )
    22  
    23  // The Value type is similar to the reflect.Value abstraction of Go values, but
    24  // for parquet values. Value instances wrap underlying Go values mapped to one
    25  // of the parquet physical types.
    26  //
    27  // Value instances are small, immutable objects, and usually passed by value
    28  // between function calls.
    29  //
    30  // The zero-value of Value represents the null parquet value.
    31  type Value struct {
    32  	// data
    33  	ptr *byte
    34  	u64 uint64
    35  	// type
    36  	kind int8 // XOR(Kind) so the zero-value is <null>
    37  	// levels
    38  	definitionLevel byte
    39  	repetitionLevel byte
    40  	columnIndex     int16 // XOR so the zero-value is -1
    41  }
    42  
    43  // ValueReader is an interface implemented by types that support reading
    44  // batches of values.
    45  type ValueReader interface {
    46  	// Read values into the buffer passed as argument and return the number of
    47  	// values read. When all values have been read, the error will be io.EOF.
    48  	ReadValues([]Value) (int, error)
    49  }
    50  
    51  // ValueReaderAt is an interface implemented by types that support reading
    52  // values at offsets specified by the application.
    53  type ValueReaderAt interface {
    54  	ReadValuesAt([]Value, int64) (int, error)
    55  }
    56  
    57  // ValueReaderFrom is an interface implemented by value writers to read values
    58  // from a reader.
    59  type ValueReaderFrom interface {
    60  	ReadValuesFrom(ValueReader) (int64, error)
    61  }
    62  
    63  // ValueWriter is an interface implemented by types that support reading
    64  // batches of values.
    65  type ValueWriter interface {
    66  	// Write values from the buffer passed as argument and returns the number
    67  	// of values written.
    68  	WriteValues([]Value) (int, error)
    69  }
    70  
    71  // ValueWriterTo is an interface implemented by value readers to write values to
    72  // a writer.
    73  type ValueWriterTo interface {
    74  	WriteValuesTo(ValueWriter) (int64, error)
    75  }
    76  
    77  // CopyValues copies values from src to dst, returning the number of values
    78  // that were written.
    79  //
    80  // As an optimization, the reader and writer may choose to implement
    81  // ValueReaderFrom and ValueWriterTo to provide their own copy logic.
    82  //
    83  // The function returns any error it encounters reading or writing pages, except
    84  // for io.EOF from the reader which indicates that there were no more values to
    85  // read.
    86  func CopyValues(dst ValueWriter, src ValueReader) (int64, error) {
    87  	return copyValues(dst, src, nil)
    88  }
    89  
    90  func copyValues(dst ValueWriter, src ValueReader, buf []Value) (written int64, err error) {
    91  	if wt, ok := src.(ValueWriterTo); ok {
    92  		return wt.WriteValuesTo(dst)
    93  	}
    94  
    95  	if rf, ok := dst.(ValueReaderFrom); ok {
    96  		return rf.ReadValuesFrom(src)
    97  	}
    98  
    99  	if len(buf) == 0 {
   100  		buf = make([]Value, defaultValueBufferSize)
   101  	}
   102  
   103  	defer clearValues(buf)
   104  
   105  	for {
   106  		n, err := src.ReadValues(buf)
   107  
   108  		if n > 0 {
   109  			wn, werr := dst.WriteValues(buf[:n])
   110  			written += int64(wn)
   111  			if werr != nil {
   112  				return written, werr
   113  			}
   114  		}
   115  
   116  		if err != nil {
   117  			if err == io.EOF {
   118  				err = nil
   119  			}
   120  			return written, err
   121  		}
   122  
   123  		if n == 0 {
   124  			return written, io.ErrNoProgress
   125  		}
   126  	}
   127  }
   128  
   129  // ValueOf constructs a parquet value from a Go value v.
   130  //
   131  // The physical type of the value is assumed from the Go type of v using the
   132  // following conversion table:
   133  //
   134  //	Go type | Parquet physical type
   135  //	------- | ---------------------
   136  //	nil     | NULL
   137  //	bool    | BOOLEAN
   138  //	int8    | INT32
   139  //	int16   | INT32
   140  //	int32   | INT32
   141  //	int64   | INT64
   142  //	int     | INT64
   143  //	uint8   | INT32
   144  //	uint16  | INT32
   145  //	uint32  | INT32
   146  //	uint64  | INT64
   147  //	uintptr | INT64
   148  //	float32 | FLOAT
   149  //	float64 | DOUBLE
   150  //	string  | BYTE_ARRAY
   151  //	[]byte  | BYTE_ARRAY
   152  //	[*]byte | FIXED_LEN_BYTE_ARRAY
   153  //
   154  // When converting a []byte or [*]byte value, the underlying byte array is not
   155  // copied; instead, the returned parquet value holds a reference to it.
   156  //
   157  // The repetition and definition levels of the returned value are both zero.
   158  //
   159  // The function panics if the Go value cannot be represented in parquet.
   160  func ValueOf(v interface{}) Value {
   161  	switch value := v.(type) {
   162  	case nil:
   163  		return Value{}
   164  	case uuid.UUID:
   165  		return makeValueBytes(FixedLenByteArray, value[:])
   166  	case deprecated.Int96:
   167  		return makeValueInt96(value)
   168  	}
   169  
   170  	k := Kind(-1)
   171  	t := reflect.TypeOf(v)
   172  
   173  	switch t.Kind() {
   174  	case reflect.Bool:
   175  		k = Boolean
   176  	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Uint8, reflect.Uint16, reflect.Uint32:
   177  		k = Int32
   178  	case reflect.Int64, reflect.Int, reflect.Uint64, reflect.Uint, reflect.Uintptr:
   179  		k = Int64
   180  	case reflect.Float32:
   181  		k = Float
   182  	case reflect.Float64:
   183  		k = Double
   184  	case reflect.String:
   185  		k = ByteArray
   186  	case reflect.Slice:
   187  		if t.Elem().Kind() == reflect.Uint8 {
   188  			k = ByteArray
   189  		}
   190  	case reflect.Array:
   191  		if t.Elem().Kind() == reflect.Uint8 {
   192  			k = FixedLenByteArray
   193  		}
   194  	}
   195  
   196  	if k < 0 {
   197  		panic("cannot create parquet value from go value of type " + t.String())
   198  	}
   199  
   200  	return makeValue(k, reflect.ValueOf(v))
   201  }
   202  
   203  func makeValue(k Kind, v reflect.Value) Value {
   204  	switch k {
   205  	case Boolean:
   206  		return makeValueBoolean(v.Bool())
   207  
   208  	case Int32:
   209  		switch v.Kind() {
   210  		case reflect.Int8, reflect.Int16, reflect.Int32:
   211  			return makeValueInt32(int32(v.Int()))
   212  		case reflect.Uint8, reflect.Uint16, reflect.Uint32:
   213  			return makeValueInt32(int32(v.Uint()))
   214  		}
   215  
   216  	case Int64:
   217  		switch v.Kind() {
   218  		case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int:
   219  			return makeValueInt64(v.Int())
   220  		case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint, reflect.Uintptr:
   221  			return makeValueUint64(v.Uint())
   222  		}
   223  
   224  	case Int96:
   225  		switch v.Type() {
   226  		case reflect.TypeOf(deprecated.Int96{}):
   227  			return makeValueInt96(v.Interface().(deprecated.Int96))
   228  		}
   229  
   230  	case Float:
   231  		switch v.Kind() {
   232  		case reflect.Float32:
   233  			return makeValueFloat(float32(v.Float()))
   234  		}
   235  
   236  	case Double:
   237  		switch v.Kind() {
   238  		case reflect.Float32, reflect.Float64:
   239  			return makeValueDouble(v.Float())
   240  		}
   241  
   242  	case ByteArray:
   243  		switch v.Kind() {
   244  		case reflect.String:
   245  			return makeValueString(k, v.String())
   246  		case reflect.Slice:
   247  			if v.Type().Elem().Kind() == reflect.Uint8 {
   248  				return makeValueBytes(k, v.Bytes())
   249  			}
   250  		}
   251  
   252  	case FixedLenByteArray:
   253  		switch v.Kind() {
   254  		case reflect.String: // uuid
   255  			return makeValueString(k, v.String())
   256  		case reflect.Array:
   257  			if v.Type().Elem().Kind() == reflect.Uint8 {
   258  				return makeValueFixedLenByteArray(v)
   259  			}
   260  		case reflect.Slice:
   261  			if v.Type().Elem().Kind() == reflect.Uint8 {
   262  				return makeValueBytes(k, v.Bytes())
   263  			}
   264  		}
   265  	}
   266  
   267  	panic("cannot create parquet value of type " + k.String() + " from go value of type " + v.Type().String())
   268  }
   269  
   270  func makeValueBoolean(value bool) Value {
   271  	v := Value{kind: ^int8(Boolean)}
   272  	if value {
   273  		v.u64 = 1
   274  	}
   275  	return v
   276  }
   277  
   278  func makeValueInt32(value int32) Value {
   279  	return Value{
   280  		kind: ^int8(Int32),
   281  		u64:  uint64(value),
   282  	}
   283  }
   284  
   285  func makeValueInt64(value int64) Value {
   286  	return Value{
   287  		kind: ^int8(Int64),
   288  		u64:  uint64(value),
   289  	}
   290  }
   291  
   292  func makeValueInt96(value deprecated.Int96) Value {
   293  	// TODO: this is highly inefficient because we need a heap allocation to
   294  	// store the value; we don't expect INT96 to be used frequently since it
   295  	// is a deprecated feature of parquet, and it helps keep the Value type
   296  	// compact for all the other more common cases.
   297  	bits := [12]byte{}
   298  	binary.LittleEndian.PutUint32(bits[0:4], value[0])
   299  	binary.LittleEndian.PutUint32(bits[4:8], value[1])
   300  	binary.LittleEndian.PutUint32(bits[8:12], value[2])
   301  	return Value{
   302  		kind: ^int8(Int96),
   303  		ptr:  &bits[0],
   304  		u64:  12, // set the length so we can use the ByteArray method
   305  	}
   306  }
   307  
   308  func makeValueUint32(value uint32) Value {
   309  	return Value{
   310  		kind: ^int8(Int32),
   311  		u64:  uint64(value),
   312  	}
   313  }
   314  
   315  func makeValueUint64(value uint64) Value {
   316  	return Value{
   317  		kind: ^int8(Int64),
   318  		u64:  value,
   319  	}
   320  }
   321  
   322  func makeValueFloat(value float32) Value {
   323  	return Value{
   324  		kind: ^int8(Float),
   325  		u64:  uint64(math.Float32bits(value)),
   326  	}
   327  }
   328  
   329  func makeValueDouble(value float64) Value {
   330  	return Value{
   331  		kind: ^int8(Double),
   332  		u64:  math.Float64bits(value),
   333  	}
   334  }
   335  
   336  func makeValueBytes(kind Kind, value []byte) Value {
   337  	return makeValueByteArray(kind, unsafecast.AddressOfBytes(value), len(value))
   338  }
   339  
   340  func makeValueString(kind Kind, value string) Value {
   341  	return makeValueByteArray(kind, unsafecast.AddressOfString(value), len(value))
   342  }
   343  
   344  func makeValueFixedLenByteArray(v reflect.Value) Value {
   345  	t := v.Type()
   346  	// When the array is addressable, we take advantage of this
   347  	// condition to avoid the heap allocation otherwise needed
   348  	// to pack the reference into an interface{} value.
   349  	if v.CanAddr() {
   350  		v = v.Addr()
   351  	} else {
   352  		u := reflect.New(t)
   353  		u.Elem().Set(v)
   354  		v = u
   355  	}
   356  	return makeValueByteArray(FixedLenByteArray, (*byte)(unsafePointer(v)), t.Len())
   357  }
   358  
   359  func makeValueByteArray(kind Kind, data *byte, size int) Value {
   360  	return Value{
   361  		kind: ^int8(kind),
   362  		ptr:  data,
   363  		u64:  uint64(size),
   364  	}
   365  }
   366  
   367  // Kind returns the kind of v, which represents its parquet physical type.
   368  func (v Value) Kind() Kind { return ^Kind(v.kind) }
   369  
   370  // IsNull returns true if v is the null value.
   371  func (v Value) IsNull() bool { return v.kind == 0 }
   372  
   373  // Byte returns v as a byte, which may truncate the underlying byte.
   374  func (v Value) Byte() byte { return byte(v.u64) }
   375  
   376  // Boolean returns v as a bool, assuming the underlying type is BOOLEAN.
   377  func (v Value) Boolean() bool { return v.u64 != 0 }
   378  
   379  // Int32 returns v as a int32, assuming the underlying type is INT32.
   380  func (v Value) Int32() int32 { return int32(v.u64) }
   381  
   382  // Int64 returns v as a int64, assuming the underlying type is INT64.
   383  func (v Value) Int64() int64 { return int64(v.u64) }
   384  
   385  // Int96 returns v as a int96, assuming the underlying type is INT96.
   386  func (v Value) Int96() deprecated.Int96 { return makeInt96(v.ByteArray()) }
   387  
   388  // Float returns v as a float32, assuming the underlying type is FLOAT.
   389  func (v Value) Float() float32 { return math.Float32frombits(uint32(v.u64)) }
   390  
   391  // Double returns v as a float64, assuming the underlying type is DOUBLE.
   392  func (v Value) Double() float64 { return math.Float64frombits(v.u64) }
   393  
   394  // Uint32 returns v as a uint32, assuming the underlying type is INT32.
   395  func (v Value) Uint32() uint32 { return uint32(v.u64) }
   396  
   397  // Uint64 returns v as a uint64, assuming the underlying type is INT64.
   398  func (v Value) Uint64() uint64 { return v.u64 }
   399  
   400  // ByteArray returns v as a []byte, assuming the underlying type is either
   401  // BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY.
   402  //
   403  // The application must treat the returned byte slice as a read-only value,
   404  // mutating the content will result in undefined behaviors.
   405  func (v Value) ByteArray() []byte { return unsafe.Slice(v.ptr, int(v.u64)) }
   406  
   407  // RepetitionLevel returns the repetition level of v.
   408  func (v Value) RepetitionLevel() int { return int(v.repetitionLevel) }
   409  
   410  // DefinitionLevel returns the definition level of v.
   411  func (v Value) DefinitionLevel() int { return int(v.definitionLevel) }
   412  
   413  // Column returns the column index within the row that v was created from.
   414  //
   415  // Returns -1 if the value does not carry a column index.
   416  func (v Value) Column() int { return int(^v.columnIndex) }
   417  
   418  // Bytes returns the binary representation of v.
   419  //
   420  // If v is the null value, an nil byte slice is returned.
   421  func (v Value) Bytes() []byte { return v.AppendBytes(nil) }
   422  
   423  // AppendBytes appends the binary representation of v to b.
   424  //
   425  // If v is the null value, b is returned unchanged.
   426  func (v Value) AppendBytes(b []byte) []byte {
   427  	buf := [8]byte{}
   428  	switch v.Kind() {
   429  	case Boolean:
   430  		binary.LittleEndian.PutUint32(buf[:4], uint32(v.u64))
   431  		return append(b, buf[0])
   432  	case Int32, Float:
   433  		binary.LittleEndian.PutUint32(buf[:4], uint32(v.u64))
   434  		return append(b, buf[:4]...)
   435  	case Int64, Double:
   436  		binary.LittleEndian.PutUint64(buf[:8], v.u64)
   437  		return append(b, buf[:8]...)
   438  	case ByteArray, FixedLenByteArray, Int96:
   439  		return append(b, v.ByteArray()...)
   440  	default:
   441  		return b
   442  	}
   443  }
   444  
   445  // Format outputs a human-readable representation of v to w, using r as the
   446  // formatting verb to describe how the value should be printed.
   447  //
   448  // The following formatting options are supported:
   449  //
   450  //		%c	prints the column index
   451  //		%+c	prints the column index, prefixed with "C:"
   452  //		%d	prints the definition level
   453  //		%+d	prints the definition level, prefixed with "D:"
   454  //		%r	prints the repetition level
   455  //		%+r	prints the repetition level, prefixed with "R:"
   456  //		%q	prints the quoted representation of v
   457  //		%+q	prints the quoted representation of v, prefixed with "V:"
   458  //		%s	prints the string representation of v
   459  //		%+s	prints the string representation of v, prefixed with "V:"
   460  //		%v	same as %s
   461  //		%+v	prints a verbose representation of v
   462  //		%#v	prints a Go value representation of v
   463  //
   464  // Format satisfies the fmt.Formatter interface.
   465  func (v Value) Format(w fmt.State, r rune) {
   466  	switch r {
   467  	case 'c':
   468  		if w.Flag('+') {
   469  			io.WriteString(w, "C:")
   470  		}
   471  		fmt.Fprint(w, v.Column())
   472  
   473  	case 'd':
   474  		if w.Flag('+') {
   475  			io.WriteString(w, "D:")
   476  		}
   477  		fmt.Fprint(w, v.DefinitionLevel())
   478  
   479  	case 'r':
   480  		if w.Flag('+') {
   481  			io.WriteString(w, "R:")
   482  		}
   483  		fmt.Fprint(w, v.RepetitionLevel())
   484  
   485  	case 'q':
   486  		if w.Flag('+') {
   487  			io.WriteString(w, "V:")
   488  		}
   489  		switch v.Kind() {
   490  		case ByteArray, FixedLenByteArray:
   491  			fmt.Fprintf(w, "%q", v.ByteArray())
   492  		default:
   493  			fmt.Fprintf(w, `"%s"`, v)
   494  		}
   495  
   496  	case 's':
   497  		if w.Flag('+') {
   498  			io.WriteString(w, "V:")
   499  		}
   500  		switch v.Kind() {
   501  		case Boolean:
   502  			fmt.Fprint(w, v.Boolean())
   503  		case Int32:
   504  			fmt.Fprint(w, v.Int32())
   505  		case Int64:
   506  			fmt.Fprint(w, v.Int64())
   507  		case Int96:
   508  			fmt.Fprint(w, v.Int96())
   509  		case Float:
   510  			fmt.Fprint(w, v.Float())
   511  		case Double:
   512  			fmt.Fprint(w, v.Double())
   513  		case ByteArray, FixedLenByteArray:
   514  			w.Write(v.ByteArray())
   515  		default:
   516  			io.WriteString(w, "<null>")
   517  		}
   518  
   519  	case 'v':
   520  		switch {
   521  		case w.Flag('+'):
   522  			fmt.Fprintf(w, "%+[1]c %+[1]d %+[1]r %+[1]s", v)
   523  		case w.Flag('#'):
   524  			fmt.Fprintf(w, "parquet.Value{%+[1]c, %+[1]d, %+[1]r, %+[1]s}", v)
   525  		default:
   526  			v.Format(w, 's')
   527  		}
   528  	}
   529  }
   530  
   531  // String returns a string representation of v.
   532  func (v Value) String() string {
   533  	switch v.Kind() {
   534  	case Boolean:
   535  		return strconv.FormatBool(v.Boolean())
   536  	case Int32:
   537  		return strconv.FormatInt(int64(v.Int32()), 10)
   538  	case Int64:
   539  		return strconv.FormatInt(v.Int64(), 10)
   540  	case Int96:
   541  		return v.Int96().String()
   542  	case Float:
   543  		return strconv.FormatFloat(float64(v.Float()), 'g', -1, 32)
   544  	case Double:
   545  		return strconv.FormatFloat(v.Double(), 'g', -1, 32)
   546  	case ByteArray, FixedLenByteArray:
   547  		// As an optimizations for the common case of using String on UTF8
   548  		// columns we convert the byte array to a string without copying the
   549  		// underlying data to a new memory location. This is safe as long as the
   550  		// application respects the requirement to not mutate the byte slices
   551  		// returned when calling ByteArray.
   552  		return unsafecast.BytesToString(v.ByteArray())
   553  	default:
   554  		return "<null>"
   555  	}
   556  }
   557  
   558  // GoString returns a Go value string representation of v.
   559  func (v Value) GoString() string {
   560  	return fmt.Sprintf("%#v", v)
   561  }
   562  
   563  // Level returns v with the repetition level, definition level, and column index
   564  // set to the values passed as arguments.
   565  //
   566  // The method panics if either argument is negative.
   567  func (v Value) Level(repetitionLevel, definitionLevel, columnIndex int) Value {
   568  	v.repetitionLevel = makeRepetitionLevel(repetitionLevel)
   569  	v.definitionLevel = makeDefinitionLevel(definitionLevel)
   570  	v.columnIndex = ^makeColumnIndex(columnIndex)
   571  	return v
   572  }
   573  
   574  // Clone returns a copy of v which does not share any pointers with it.
   575  func (v Value) Clone() Value {
   576  	switch k := v.Kind(); k {
   577  	case ByteArray, FixedLenByteArray:
   578  		b := copyBytes(v.ByteArray())
   579  		v.ptr = unsafecast.AddressOfBytes(b)
   580  	}
   581  	return v
   582  }
   583  
   584  func makeInt96(bits []byte) (i96 deprecated.Int96) {
   585  	return deprecated.Int96{
   586  		2: binary.LittleEndian.Uint32(bits[8:12]),
   587  		1: binary.LittleEndian.Uint32(bits[4:8]),
   588  		0: binary.LittleEndian.Uint32(bits[0:4]),
   589  	}
   590  }
   591  
   592  func assignValue(dst reflect.Value, src Value) error {
   593  	if src.IsNull() {
   594  		dst.Set(reflect.Zero(dst.Type()))
   595  		return nil
   596  	}
   597  
   598  	dstKind := dst.Kind()
   599  	srcKind := src.Kind()
   600  
   601  	var val reflect.Value
   602  	switch srcKind {
   603  	case Boolean:
   604  		v := src.Boolean()
   605  		switch dstKind {
   606  		case reflect.Bool:
   607  			dst.SetBool(v)
   608  			return nil
   609  		default:
   610  			val = reflect.ValueOf(v)
   611  		}
   612  
   613  	case Int32:
   614  		v := int64(src.Int32())
   615  		switch dstKind {
   616  		case reflect.Int8, reflect.Int16, reflect.Int32:
   617  			dst.SetInt(int64(v))
   618  			return nil
   619  		case reflect.Uint8, reflect.Uint16, reflect.Uint32:
   620  			dst.SetUint(uint64(v))
   621  			return nil
   622  		default:
   623  			val = reflect.ValueOf(v)
   624  		}
   625  
   626  	case Int64:
   627  		v := src.Int64()
   628  		switch dstKind {
   629  		case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int:
   630  			dst.SetInt(v)
   631  			return nil
   632  		case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint, reflect.Uintptr:
   633  			dst.SetUint(uint64(v))
   634  			return nil
   635  		default:
   636  			val = reflect.ValueOf(v)
   637  		}
   638  
   639  	case Int96:
   640  		val = reflect.ValueOf(src.Int96())
   641  
   642  	case Float:
   643  		v := src.Float()
   644  		switch dstKind {
   645  		case reflect.Float32, reflect.Float64:
   646  			dst.SetFloat(float64(v))
   647  			return nil
   648  		default:
   649  			val = reflect.ValueOf(v)
   650  		}
   651  
   652  	case Double:
   653  		v := src.Double()
   654  		switch dstKind {
   655  		case reflect.Float32, reflect.Float64:
   656  			dst.SetFloat(v)
   657  			return nil
   658  		default:
   659  			val = reflect.ValueOf(v)
   660  		}
   661  
   662  	case ByteArray:
   663  		v := src.ByteArray()
   664  		switch dstKind {
   665  		case reflect.String:
   666  			dst.SetString(string(v))
   667  			return nil
   668  		case reflect.Slice:
   669  			if dst.Type().Elem().Kind() == reflect.Uint8 {
   670  				dst.SetBytes(copyBytes(v))
   671  				return nil
   672  			}
   673  		default:
   674  			val = reflect.ValueOf(v)
   675  		}
   676  
   677  	case FixedLenByteArray:
   678  		v := src.ByteArray()
   679  		switch dstKind {
   680  		case reflect.Array:
   681  			if dst.Type().Elem().Kind() == reflect.Uint8 && dst.Len() == len(v) {
   682  				// This code could be implemented as a call to reflect.Copy but
   683  				// it would require creating a reflect.Value from v which causes
   684  				// the heap allocation to pack the []byte value. To avoid this
   685  				// overhead we instead convert the reflect.Value holding the
   686  				// destination array into a byte slice which allows us to use
   687  				// a more efficient call to copy.
   688  				d := unsafe.Slice((*byte)(unsafecast.PointerOfValue(dst)), len(v))
   689  				copy(d, v)
   690  				return nil
   691  			}
   692  		case reflect.Slice:
   693  			if dst.Type().Elem().Kind() == reflect.Uint8 {
   694  				dst.SetBytes(copyBytes(v))
   695  				return nil
   696  			}
   697  		default:
   698  			val = reflect.ValueOf(v)
   699  		}
   700  	}
   701  
   702  	if val.IsValid() && val.Type().AssignableTo(dst.Type()) {
   703  		dst.Set(val)
   704  		return nil
   705  	}
   706  
   707  	return fmt.Errorf("cannot assign parquet value of type %s to go value of type %s", srcKind.String(), dst.Type())
   708  }
   709  
   710  func parseValue(kind Kind, data []byte) (val Value, err error) {
   711  	switch kind {
   712  	case Boolean:
   713  		if len(data) == 1 {
   714  			val = makeValueBoolean(data[0] != 0)
   715  		}
   716  	case Int32:
   717  		if len(data) == 4 {
   718  			val = makeValueInt32(int32(binary.LittleEndian.Uint32(data)))
   719  		}
   720  	case Int64:
   721  		if len(data) == 8 {
   722  			val = makeValueInt64(int64(binary.LittleEndian.Uint64(data)))
   723  		}
   724  	case Int96:
   725  		if len(data) == 12 {
   726  			val = makeValueInt96(makeInt96(data))
   727  		}
   728  	case Float:
   729  		if len(data) == 4 {
   730  			val = makeValueFloat(float32(math.Float32frombits(binary.LittleEndian.Uint32(data))))
   731  		}
   732  	case Double:
   733  		if len(data) == 8 {
   734  			val = makeValueDouble(float64(math.Float64frombits(binary.LittleEndian.Uint64(data))))
   735  		}
   736  	case ByteArray, FixedLenByteArray:
   737  		val = makeValueBytes(kind, data)
   738  	}
   739  	if val.IsNull() {
   740  		err = fmt.Errorf("cannot decode %s value from input of length %d", kind, len(data))
   741  	}
   742  	return val, err
   743  }
   744  
   745  func copyBytes(b []byte) []byte {
   746  	c := make([]byte, len(b))
   747  	copy(c, b)
   748  	return c
   749  }
   750  
   751  // Equal returns true if v1 and v2 are equal.
   752  //
   753  // Values are considered equal if they are of the same physical type and hold
   754  // the same Go values. For BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY, the content of
   755  // the underlying byte arrays are tested for equality.
   756  //
   757  // Note that the repetition levels, definition levels, and column indexes are
   758  // not compared by this function, use DeepEqual instead.
   759  func Equal(v1, v2 Value) bool {
   760  	if v1.kind != v2.kind {
   761  		return false
   762  	}
   763  	switch v1.Kind() {
   764  	case Boolean:
   765  		return v1.Boolean() == v2.Boolean()
   766  	case Int32:
   767  		return v1.Int32() == v2.Int32()
   768  	case Int64:
   769  		return v1.Int64() == v2.Int64()
   770  	case Int96:
   771  		return v1.Int96() == v2.Int96()
   772  	case Float:
   773  		return v1.Float() == v2.Float()
   774  	case Double:
   775  		return v1.Double() == v2.Double()
   776  	case ByteArray, FixedLenByteArray:
   777  		return bytes.Equal(v1.ByteArray(), v2.ByteArray())
   778  	case -1: // null
   779  		return true
   780  	default:
   781  		return false
   782  	}
   783  }
   784  
   785  // DeepEqual returns true if v1 and v2 are equal, including their repetition
   786  // levels, definition levels, and column indexes.
   787  //
   788  // See Equal for details about how value equality is determined.
   789  func DeepEqual(v1, v2 Value) bool {
   790  	return Equal(v1, v2) &&
   791  		v1.repetitionLevel == v2.repetitionLevel &&
   792  		v1.definitionLevel == v2.definitionLevel &&
   793  		v1.columnIndex == v2.columnIndex
   794  }
   795  
   796  var (
   797  	_ fmt.Formatter = Value{}
   798  	_ fmt.Stringer  = Value{}
   799  )
   800  
   801  func clearValues(values []Value) {
   802  	for i := range values {
   803  		values[i] = Value{}
   804  	}
   805  }
   806  
   807  // BooleanReader is an interface implemented by ValueReader instances which
   808  // expose the content of a column of boolean values.
   809  type BooleanReader interface {
   810  	// Read boolean values into the buffer passed as argument.
   811  	//
   812  	// The method returns io.EOF when all values have been read.
   813  	ReadBooleans(values []bool) (int, error)
   814  }
   815  
   816  // BooleanWriter is an interface implemented by ValueWriter instances which
   817  // support writing columns of boolean values.
   818  type BooleanWriter interface {
   819  	// Write boolean values.
   820  	//
   821  	// The method returns the number of values written, and any error that
   822  	// occurred while writing the values.
   823  	WriteBooleans(values []bool) (int, error)
   824  }
   825  
   826  // Int32Reader is an interface implemented by ValueReader instances which expose
   827  // the content of a column of int32 values.
   828  type Int32Reader interface {
   829  	// Read 32 bits integer values into the buffer passed as argument.
   830  	//
   831  	// The method returns io.EOF when all values have been read.
   832  	ReadInt32s(values []int32) (int, error)
   833  }
   834  
   835  // Int32Writer is an interface implemented by ValueWriter instances which
   836  // support writing columns of 32 bits signed integer values.
   837  type Int32Writer interface {
   838  	// Write 32 bits signed integer values.
   839  	//
   840  	// The method returns the number of values written, and any error that
   841  	// occurred while writing the values.
   842  	WriteInt32s(values []int32) (int, error)
   843  }
   844  
   845  // Int64Reader is an interface implemented by ValueReader instances which expose
   846  // the content of a column of int64 values.
   847  type Int64Reader interface {
   848  	// Read 64 bits integer values into the buffer passed as argument.
   849  	//
   850  	// The method returns io.EOF when all values have been read.
   851  	ReadInt64s(values []int64) (int, error)
   852  }
   853  
   854  // Int64Writer is an interface implemented by ValueWriter instances which
   855  // support writing columns of 64 bits signed integer values.
   856  type Int64Writer interface {
   857  	// Write 64 bits signed integer values.
   858  	//
   859  	// The method returns the number of values written, and any error that
   860  	// occurred while writing the values.
   861  	WriteInt64s(values []int64) (int, error)
   862  }
   863  
   864  // Int96Reader is an interface implemented by ValueReader instances which expose
   865  // the content of a column of int96 values.
   866  type Int96Reader interface {
   867  	// Read 96 bits integer values into the buffer passed as argument.
   868  	//
   869  	// The method returns io.EOF when all values have been read.
   870  	ReadInt96s(values []deprecated.Int96) (int, error)
   871  }
   872  
   873  // Int96Writer is an interface implemented by ValueWriter instances which
   874  // support writing columns of 96 bits signed integer values.
   875  type Int96Writer interface {
   876  	// Write 96 bits signed integer values.
   877  	//
   878  	// The method returns the number of values written, and any error that
   879  	// occurred while writing the values.
   880  	WriteInt96s(values []deprecated.Int96) (int, error)
   881  }
   882  
   883  // FloatReader is an interface implemented by ValueReader instances which expose
   884  // the content of a column of single-precision floating point values.
   885  type FloatReader interface {
   886  	// Read single-precision floating point values into the buffer passed as
   887  	// argument.
   888  	//
   889  	// The method returns io.EOF when all values have been read.
   890  	ReadFloats(values []float32) (int, error)
   891  }
   892  
   893  // FloatWriter is an interface implemented by ValueWriter instances which
   894  // support writing columns of single-precision floating point values.
   895  type FloatWriter interface {
   896  	// Write single-precision floating point values.
   897  	//
   898  	// The method returns the number of values written, and any error that
   899  	// occurred while writing the values.
   900  	WriteFloats(values []float32) (int, error)
   901  }
   902  
   903  // DoubleReader is an interface implemented by ValueReader instances which
   904  // expose the content of a column of double-precision float point values.
   905  type DoubleReader interface {
   906  	// Read double-precision floating point values into the buffer passed as
   907  	// argument.
   908  	//
   909  	// The method returns io.EOF when all values have been read.
   910  	ReadDoubles(values []float64) (int, error)
   911  }
   912  
   913  // DoubleWriter is an interface implemented by ValueWriter instances which
   914  // support writing columns of double-precision floating point values.
   915  type DoubleWriter interface {
   916  	// Write double-precision floating point values.
   917  	//
   918  	// The method returns the number of values written, and any error that
   919  	// occurred while writing the values.
   920  	WriteDoubles(values []float64) (int, error)
   921  }
   922  
   923  // ByteArrayReader is an interface implemented by ValueReader instances which
   924  // expose the content of a column of variable length byte array values.
   925  type ByteArrayReader interface {
   926  	// Read values into the byte buffer passed as argument, returning the number
   927  	// of values written to the buffer (not the number of bytes). Values are
   928  	// written using the PLAIN encoding, each byte array prefixed with its
   929  	// length encoded as a 4 bytes little endian unsigned integer.
   930  	//
   931  	// The method returns io.EOF when all values have been read.
   932  	//
   933  	// If the buffer was not empty, but too small to hold at least one value,
   934  	// io.ErrShortBuffer is returned.
   935  	ReadByteArrays(values []byte) (int, error)
   936  }
   937  
   938  // ByteArrayWriter is an interface implemented by ValueWriter instances which
   939  // support writing columns of variable length byte array values.
   940  type ByteArrayWriter interface {
   941  	// Write variable length byte array values.
   942  	//
   943  	// The values passed as input must be laid out using the PLAIN encoding,
   944  	// with each byte array prefixed with the four bytes little endian unsigned
   945  	// integer length.
   946  	//
   947  	// The method returns the number of values written to the underlying column
   948  	// (not the number of bytes), or any error that occurred while attempting to
   949  	// write the values.
   950  	WriteByteArrays(values []byte) (int, error)
   951  }
   952  
   953  // FixedLenByteArrayReader is an interface implemented by ValueReader instances
   954  // which expose the content of a column of fixed length byte array values.
   955  type FixedLenByteArrayReader interface {
   956  	// Read values into the byte buffer passed as argument, returning the number
   957  	// of values written to the buffer (not the number of bytes).
   958  	//
   959  	// The method returns io.EOF when all values have been read.
   960  	//
   961  	// If the buffer was not empty, but too small to hold at least one value,
   962  	// io.ErrShortBuffer is returned.
   963  	ReadFixedLenByteArrays(values []byte) (int, error)
   964  }
   965  
   966  // FixedLenByteArrayWriter is an interface implemented by ValueWriter instances
   967  // which support writing columns of fixed length byte array values.
   968  type FixedLenByteArrayWriter interface {
   969  	// Writes the fixed length byte array values.
   970  	//
   971  	// The size of the values is assumed to be the same as the expected size of
   972  	// items in the column. The method errors if the length of the input values
   973  	// is not a multiple of the expected item size.
   974  	WriteFixedLenByteArrays(values []byte) (int, error)
   975  }