github.com/parquet-go/parquet-go@v0.20.0/convert.go (about)

     1  package parquet
     2  
     3  import (
     4  	"encoding/binary"
     5  	"encoding/hex"
     6  	"fmt"
     7  	"io"
     8  	"math"
     9  	"math/big"
    10  	"strconv"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/parquet-go/parquet-go/deprecated"
    15  	"github.com/parquet-go/parquet-go/encoding"
    16  	"github.com/parquet-go/parquet-go/format"
    17  )
    18  
    19  // ConvertError is an error type returned by calls to Convert when the conversion
    20  // of parquet schemas is impossible or the input row for the conversion is
    21  // malformed.
    22  type ConvertError struct {
    23  	Path []string
    24  	From Node
    25  	To   Node
    26  }
    27  
    28  // Error satisfies the error interface.
    29  func (e *ConvertError) Error() string {
    30  	sourceType := e.From.Type()
    31  	targetType := e.To.Type()
    32  
    33  	sourceRepetition := fieldRepetitionTypeOf(e.From)
    34  	targetRepetition := fieldRepetitionTypeOf(e.To)
    35  
    36  	return fmt.Sprintf("cannot convert parquet column %q from %s %s to %s %s",
    37  		columnPath(e.Path),
    38  		sourceRepetition,
    39  		sourceType,
    40  		targetRepetition,
    41  		targetType,
    42  	)
    43  }
    44  
    45  // Conversion is an interface implemented by types that provide conversion of
    46  // parquet rows from one schema to another.
    47  //
    48  // Conversion instances must be safe to use concurrently from multiple goroutines.
    49  type Conversion interface {
    50  	// Applies the conversion logic on the src row, returning the result
    51  	// appended to dst.
    52  	Convert(rows []Row) (int, error)
    53  	// Converts the given column index in the target schema to the original
    54  	// column index in the source schema of the conversion.
    55  	Column(int) int
    56  	// Returns the target schema of the conversion.
    57  	Schema() *Schema
    58  }
    59  
    60  type conversion struct {
    61  	columns []conversionColumn
    62  	schema  *Schema
    63  	buffers sync.Pool
    64  	// This field is used to size the column buffers held in the sync.Pool since
    65  	// they are intended to store the source rows being converted from.
    66  	numberOfSourceColumns int
    67  }
    68  
    69  type conversionBuffer struct {
    70  	columns [][]Value
    71  }
    72  
    73  type conversionColumn struct {
    74  	sourceIndex   int
    75  	convertValues conversionFunc
    76  }
    77  
    78  type conversionFunc func([]Value) error
    79  
    80  func convertToSelf(column []Value) error { return nil }
    81  
    82  //go:noinline
    83  func convertToType(targetType, sourceType Type) conversionFunc {
    84  	return func(column []Value) error {
    85  		for i, v := range column {
    86  			v, err := sourceType.ConvertValue(v, targetType)
    87  			if err != nil {
    88  				return err
    89  			}
    90  			column[i].ptr = v.ptr
    91  			column[i].u64 = v.u64
    92  			column[i].kind = v.kind
    93  		}
    94  		return nil
    95  	}
    96  }
    97  
    98  //go:noinline
    99  func convertToValue(value Value) conversionFunc {
   100  	return func(column []Value) error {
   101  		for i := range column {
   102  			column[i] = value
   103  		}
   104  		return nil
   105  	}
   106  }
   107  
   108  //go:noinline
   109  func convertToZero(kind Kind) conversionFunc {
   110  	return func(column []Value) error {
   111  		for i := range column {
   112  			column[i].ptr = nil
   113  			column[i].u64 = 0
   114  			column[i].kind = ^int8(kind)
   115  		}
   116  		return nil
   117  	}
   118  }
   119  
   120  //go:noinline
   121  func convertToLevels(repetitionLevels, definitionLevels []byte) conversionFunc {
   122  	return func(column []Value) error {
   123  		for i := range column {
   124  			r := column[i].repetitionLevel
   125  			d := column[i].definitionLevel
   126  			column[i].repetitionLevel = repetitionLevels[r]
   127  			column[i].definitionLevel = definitionLevels[d]
   128  		}
   129  		return nil
   130  	}
   131  }
   132  
   133  //go:noinline
   134  func multiConversionFunc(conversions []conversionFunc) conversionFunc {
   135  	switch len(conversions) {
   136  	case 0:
   137  		return convertToSelf
   138  	case 1:
   139  		return conversions[0]
   140  	default:
   141  		return func(column []Value) error {
   142  			for _, conv := range conversions {
   143  				if err := conv(column); err != nil {
   144  					return err
   145  				}
   146  			}
   147  			return nil
   148  		}
   149  	}
   150  }
   151  
   152  func (c *conversion) getBuffer() *conversionBuffer {
   153  	b, _ := c.buffers.Get().(*conversionBuffer)
   154  	if b == nil {
   155  		b = &conversionBuffer{
   156  			columns: make([][]Value, c.numberOfSourceColumns),
   157  		}
   158  		values := make([]Value, c.numberOfSourceColumns)
   159  		for i := range b.columns {
   160  			b.columns[i] = values[i : i : i+1]
   161  		}
   162  	}
   163  	return b
   164  }
   165  
   166  func (c *conversion) putBuffer(b *conversionBuffer) {
   167  	c.buffers.Put(b)
   168  }
   169  
   170  // Convert here satisfies the Conversion interface, and does the actual work
   171  // to convert between the source and target Rows.
   172  func (c *conversion) Convert(rows []Row) (int, error) {
   173  	source := c.getBuffer()
   174  	defer c.putBuffer(source)
   175  
   176  	for n, row := range rows {
   177  		for i, values := range source.columns {
   178  			source.columns[i] = values[:0]
   179  		}
   180  		row.Range(func(columnIndex int, columnValues []Value) bool {
   181  			source.columns[columnIndex] = append(source.columns[columnIndex], columnValues...)
   182  			return true
   183  		})
   184  		row = row[:0]
   185  
   186  		for columnIndex, conv := range c.columns {
   187  			columnOffset := len(row)
   188  			if conv.sourceIndex < 0 {
   189  				// When there is no source column, we put a single value as
   190  				// placeholder in the column. This is a condition where the
   191  				// target contained a column which did not exist at had not
   192  				// other columns existing at that same level.
   193  				row = append(row, Value{})
   194  			} else {
   195  				// We must copy to the output row first and not mutate the
   196  				// source columns because multiple target columns may map to
   197  				// the same source column.
   198  				row = append(row, source.columns[conv.sourceIndex]...)
   199  			}
   200  			columnValues := row[columnOffset:]
   201  
   202  			if err := conv.convertValues(columnValues); err != nil {
   203  				return n, err
   204  			}
   205  
   206  			// Since the column index may have changed between the source and
   207  			// taget columns we ensure that the right value is always written
   208  			// to the output row.
   209  			for i := range columnValues {
   210  				columnValues[i].columnIndex = ^int16(columnIndex)
   211  			}
   212  		}
   213  
   214  		rows[n] = row
   215  	}
   216  
   217  	return len(rows), nil
   218  }
   219  
   220  func (c *conversion) Column(i int) int {
   221  	return c.columns[i].sourceIndex
   222  }
   223  
   224  func (c *conversion) Schema() *Schema {
   225  	return c.schema
   226  }
   227  
   228  type identity struct{ schema *Schema }
   229  
   230  func (id identity) Convert(rows []Row) (int, error) { return len(rows), nil }
   231  func (id identity) Column(i int) int                { return i }
   232  func (id identity) Schema() *Schema                 { return id.schema }
   233  
   234  // Convert constructs a conversion function from one parquet schema to another.
   235  //
   236  // The function supports converting between schemas where the source or target
   237  // have extra columns; if there are more columns in the source, they will be
   238  // stripped out of the rows. Extra columns in the target schema will be set to
   239  // null or zero values.
   240  //
   241  // The returned function is intended to be used to append the converted source
   242  // row to the destination buffer.
   243  func Convert(to, from Node) (conv Conversion, err error) {
   244  	schema, _ := to.(*Schema)
   245  	if schema == nil {
   246  		schema = NewSchema("", to)
   247  	}
   248  
   249  	if nodesAreEqual(to, from) {
   250  		return identity{schema}, nil
   251  	}
   252  
   253  	targetMapping, targetColumns := columnMappingOf(to)
   254  	sourceMapping, sourceColumns := columnMappingOf(from)
   255  	columns := make([]conversionColumn, len(targetColumns))
   256  
   257  	for i, path := range targetColumns {
   258  		targetColumn := targetMapping.lookup(path)
   259  		sourceColumn := sourceMapping.lookup(path)
   260  
   261  		conversions := []conversionFunc{}
   262  		if sourceColumn.node != nil {
   263  			targetType := targetColumn.node.Type()
   264  			sourceType := sourceColumn.node.Type()
   265  			if !typesAreEqual(targetType, sourceType) {
   266  				conversions = append(conversions,
   267  					convertToType(targetType, sourceType),
   268  				)
   269  			}
   270  
   271  			repetitionLevels := make([]byte, len(path)+1)
   272  			definitionLevels := make([]byte, len(path)+1)
   273  			targetRepetitionLevel := byte(0)
   274  			targetDefinitionLevel := byte(0)
   275  			sourceRepetitionLevel := byte(0)
   276  			sourceDefinitionLevel := byte(0)
   277  			targetNode := to
   278  			sourceNode := from
   279  
   280  			for j := 0; j < len(path); j++ {
   281  				targetNode = fieldByName(targetNode, path[j])
   282  				sourceNode = fieldByName(sourceNode, path[j])
   283  
   284  				targetRepetitionLevel, targetDefinitionLevel = applyFieldRepetitionType(
   285  					fieldRepetitionTypeOf(targetNode),
   286  					targetRepetitionLevel,
   287  					targetDefinitionLevel,
   288  				)
   289  				sourceRepetitionLevel, sourceDefinitionLevel = applyFieldRepetitionType(
   290  					fieldRepetitionTypeOf(sourceNode),
   291  					sourceRepetitionLevel,
   292  					sourceDefinitionLevel,
   293  				)
   294  
   295  				repetitionLevels[sourceRepetitionLevel] = targetRepetitionLevel
   296  				definitionLevels[sourceDefinitionLevel] = targetDefinitionLevel
   297  			}
   298  
   299  			repetitionLevels = repetitionLevels[:sourceRepetitionLevel+1]
   300  			definitionLevels = definitionLevels[:sourceDefinitionLevel+1]
   301  
   302  			if !isDirectLevelMapping(repetitionLevels) || !isDirectLevelMapping(definitionLevels) {
   303  				conversions = append(conversions,
   304  					convertToLevels(repetitionLevels, definitionLevels),
   305  				)
   306  			}
   307  
   308  		} else {
   309  			targetType := targetColumn.node.Type()
   310  			targetKind := targetType.Kind()
   311  			sourceColumn = sourceMapping.lookupClosest(path)
   312  			if sourceColumn.node != nil {
   313  				conversions = append(conversions,
   314  					convertToZero(targetKind),
   315  				)
   316  			} else {
   317  				conversions = append(conversions,
   318  					convertToValue(ZeroValue(targetKind)),
   319  				)
   320  			}
   321  		}
   322  
   323  		columns[i] = conversionColumn{
   324  			sourceIndex:   int(sourceColumn.columnIndex),
   325  			convertValues: multiConversionFunc(conversions),
   326  		}
   327  	}
   328  
   329  	c := &conversion{
   330  		columns:               columns,
   331  		schema:                schema,
   332  		numberOfSourceColumns: len(sourceColumns),
   333  	}
   334  	return c, nil
   335  }
   336  
   337  func isDirectLevelMapping(levels []byte) bool {
   338  	for i, level := range levels {
   339  		if level != byte(i) {
   340  			return false
   341  		}
   342  	}
   343  	return true
   344  }
   345  
   346  // ConvertRowGroup constructs a wrapper of the given row group which applies
   347  // the given schema conversion to its rows.
   348  func ConvertRowGroup(rowGroup RowGroup, conv Conversion) RowGroup {
   349  	schema := conv.Schema()
   350  	numRows := rowGroup.NumRows()
   351  	rowGroupColumns := rowGroup.ColumnChunks()
   352  
   353  	columns := make([]ColumnChunk, numLeafColumnsOf(schema))
   354  	forEachLeafColumnOf(schema, func(leaf leafColumn) {
   355  		i := leaf.columnIndex
   356  		j := conv.Column(int(leaf.columnIndex))
   357  		if j < 0 {
   358  			columns[i] = &missingColumnChunk{
   359  				typ:    leaf.node.Type(),
   360  				column: i,
   361  				// TODO: we assume the number of values is the same as the
   362  				// number of rows, which may not be accurate when the column is
   363  				// part of a repeated group; neighbor columns may be repeated in
   364  				// which case it would be impossible for this chunk not to be.
   365  				numRows:   numRows,
   366  				numValues: numRows,
   367  				numNulls:  numRows,
   368  			}
   369  		} else {
   370  			columns[i] = rowGroupColumns[j]
   371  		}
   372  	})
   373  
   374  	// Sorting columns must exist on the conversion schema in order to be
   375  	// advertised on the converted row group otherwise the resulting rows
   376  	// would not be in the right order.
   377  	sorting := []SortingColumn{}
   378  	for _, col := range rowGroup.SortingColumns() {
   379  		if !hasColumnPath(schema, col.Path()) {
   380  			break
   381  		}
   382  		sorting = append(sorting, col)
   383  	}
   384  
   385  	return &convertedRowGroup{
   386  		// The pair of rowGroup+conv is retained to construct a converted row
   387  		// reader by wrapping the underlying row reader of the row group because
   388  		// it allows proper reconstruction of the repetition and definition
   389  		// levels.
   390  		//
   391  		// TODO: can we figure out how to set the repetition and definition
   392  		// levels when reading values from missing column pages? At first sight
   393  		// it appears complex to do, however:
   394  		//
   395  		// * It is possible that having these levels when reading values of
   396  		//   missing column pages is not necessary in some scenarios (e.g. when
   397  		//   merging row groups).
   398  		//
   399  		// * We may be able to assume the repetition and definition levels at
   400  		//   the call site (e.g. in the functions reading rows from columns).
   401  		//
   402  		// Columns of the source row group which do not exist in the target are
   403  		// masked to prevent loading unneeded pages when reading rows from the
   404  		// converted row group.
   405  		rowGroup: maskMissingRowGroupColumns(rowGroup, len(columns), conv),
   406  		columns:  columns,
   407  		sorting:  sorting,
   408  		conv:     conv,
   409  	}
   410  }
   411  
   412  func maskMissingRowGroupColumns(r RowGroup, numColumns int, conv Conversion) RowGroup {
   413  	rowGroupColumns := r.ColumnChunks()
   414  	columns := make([]ColumnChunk, len(rowGroupColumns))
   415  	missing := make([]missingColumnChunk, len(columns))
   416  	numRows := r.NumRows()
   417  
   418  	for i := range missing {
   419  		missing[i] = missingColumnChunk{
   420  			typ:       rowGroupColumns[i].Type(),
   421  			column:    int16(i),
   422  			numRows:   numRows,
   423  			numValues: numRows,
   424  			numNulls:  numRows,
   425  		}
   426  	}
   427  
   428  	for i := range columns {
   429  		columns[i] = &missing[i]
   430  	}
   431  
   432  	for i := 0; i < numColumns; i++ {
   433  		j := conv.Column(i)
   434  		if j >= 0 && j < len(columns) {
   435  			columns[j] = rowGroupColumns[j]
   436  		}
   437  	}
   438  
   439  	return &rowGroup{
   440  		schema:  r.Schema(),
   441  		numRows: numRows,
   442  		columns: columns,
   443  	}
   444  }
   445  
   446  type missingColumnChunk struct {
   447  	typ       Type
   448  	column    int16
   449  	numRows   int64
   450  	numValues int64
   451  	numNulls  int64
   452  }
   453  
   454  func (c *missingColumnChunk) Type() Type                        { return c.typ }
   455  func (c *missingColumnChunk) Column() int                       { return int(c.column) }
   456  func (c *missingColumnChunk) Pages() Pages                      { return onePage(missingPage{c}) }
   457  func (c *missingColumnChunk) ColumnIndex() (ColumnIndex, error) { return missingColumnIndex{c}, nil }
   458  func (c *missingColumnChunk) OffsetIndex() (OffsetIndex, error) { return missingOffsetIndex{}, nil }
   459  func (c *missingColumnChunk) BloomFilter() BloomFilter          { return missingBloomFilter{} }
   460  func (c *missingColumnChunk) NumValues() int64                  { return c.numValues }
   461  
   462  type missingColumnIndex struct{ *missingColumnChunk }
   463  
   464  func (i missingColumnIndex) NumPages() int       { return 1 }
   465  func (i missingColumnIndex) NullCount(int) int64 { return i.numNulls }
   466  func (i missingColumnIndex) NullPage(int) bool   { return true }
   467  func (i missingColumnIndex) MinValue(int) Value  { return Value{} }
   468  func (i missingColumnIndex) MaxValue(int) Value  { return Value{} }
   469  func (i missingColumnIndex) IsAscending() bool   { return true }
   470  func (i missingColumnIndex) IsDescending() bool  { return false }
   471  
   472  type missingOffsetIndex struct{}
   473  
   474  func (missingOffsetIndex) NumPages() int                { return 1 }
   475  func (missingOffsetIndex) Offset(int) int64             { return 0 }
   476  func (missingOffsetIndex) CompressedPageSize(int) int64 { return 0 }
   477  func (missingOffsetIndex) FirstRowIndex(int) int64      { return 0 }
   478  
   479  type missingBloomFilter struct{}
   480  
   481  func (missingBloomFilter) ReadAt([]byte, int64) (int, error) { return 0, io.EOF }
   482  func (missingBloomFilter) Size() int64                       { return 0 }
   483  func (missingBloomFilter) Check(Value) (bool, error)         { return false, nil }
   484  
   485  type missingPage struct{ *missingColumnChunk }
   486  
   487  func (p missingPage) Column() int                       { return int(p.column) }
   488  func (p missingPage) Dictionary() Dictionary            { return nil }
   489  func (p missingPage) NumRows() int64                    { return p.numRows }
   490  func (p missingPage) NumValues() int64                  { return p.numValues }
   491  func (p missingPage) NumNulls() int64                   { return p.numNulls }
   492  func (p missingPage) Bounds() (min, max Value, ok bool) { return }
   493  func (p missingPage) Slice(i, j int64) Page {
   494  	return missingPage{
   495  		&missingColumnChunk{
   496  			typ:       p.typ,
   497  			column:    p.column,
   498  			numRows:   j - i,
   499  			numValues: j - i,
   500  			numNulls:  j - i,
   501  		},
   502  	}
   503  }
   504  func (p missingPage) Size() int64              { return 0 }
   505  func (p missingPage) RepetitionLevels() []byte { return nil }
   506  func (p missingPage) DefinitionLevels() []byte { return nil }
   507  func (p missingPage) Data() encoding.Values    { return p.typ.NewValues(nil, nil) }
   508  func (p missingPage) Values() ValueReader      { return &missingPageValues{page: p} }
   509  
   510  type missingPageValues struct {
   511  	page missingPage
   512  	read int64
   513  }
   514  
   515  func (r *missingPageValues) ReadValues(values []Value) (int, error) {
   516  	remain := r.page.numValues - r.read
   517  	if int64(len(values)) > remain {
   518  		values = values[:remain]
   519  	}
   520  	for i := range values {
   521  		// TODO: how do we set the repetition and definition levels here?
   522  		values[i] = Value{columnIndex: ^r.page.column}
   523  	}
   524  	if r.read += int64(len(values)); r.read == r.page.numValues {
   525  		return len(values), io.EOF
   526  	}
   527  	return len(values), nil
   528  }
   529  
   530  func (r *missingPageValues) Close() error {
   531  	r.read = r.page.numValues
   532  	return nil
   533  }
   534  
   535  type convertedRowGroup struct {
   536  	rowGroup RowGroup
   537  	columns  []ColumnChunk
   538  	sorting  []SortingColumn
   539  	conv     Conversion
   540  }
   541  
   542  func (c *convertedRowGroup) NumRows() int64                  { return c.rowGroup.NumRows() }
   543  func (c *convertedRowGroup) ColumnChunks() []ColumnChunk     { return c.columns }
   544  func (c *convertedRowGroup) Schema() *Schema                 { return c.conv.Schema() }
   545  func (c *convertedRowGroup) SortingColumns() []SortingColumn { return c.sorting }
   546  func (c *convertedRowGroup) Rows() Rows {
   547  	rows := c.rowGroup.Rows()
   548  	return &convertedRows{
   549  		Closer: rows,
   550  		rows:   rows,
   551  		conv:   c.conv,
   552  	}
   553  }
   554  
   555  // ConvertRowReader constructs a wrapper of the given row reader which applies
   556  // the given schema conversion to the rows.
   557  func ConvertRowReader(rows RowReader, conv Conversion) RowReaderWithSchema {
   558  	return &convertedRows{rows: &forwardRowSeeker{rows: rows}, conv: conv}
   559  }
   560  
   561  type convertedRows struct {
   562  	io.Closer
   563  	rows RowReadSeeker
   564  	conv Conversion
   565  }
   566  
   567  func (c *convertedRows) ReadRows(rows []Row) (int, error) {
   568  	n, err := c.rows.ReadRows(rows)
   569  	if n > 0 {
   570  		var convErr error
   571  		n, convErr = c.conv.Convert(rows[:n])
   572  		if convErr != nil {
   573  			err = convErr
   574  		}
   575  	}
   576  	return n, err
   577  }
   578  
   579  func (c *convertedRows) Schema() *Schema {
   580  	return c.conv.Schema()
   581  }
   582  
   583  func (c *convertedRows) SeekToRow(rowIndex int64) error {
   584  	return c.rows.SeekToRow(rowIndex)
   585  }
   586  
   587  var (
   588  	trueBytes  = []byte(`true`)
   589  	falseBytes = []byte(`false`)
   590  	unixEpoch  = time.Date(1970, time.January, 1, 0, 0, 0, 0, time.UTC)
   591  )
   592  
   593  func convertBooleanToInt32(v Value) (Value, error) {
   594  	return v.convertToInt32(int32(v.byte())), nil
   595  }
   596  
   597  func convertBooleanToInt64(v Value) (Value, error) {
   598  	return v.convertToInt64(int64(v.byte())), nil
   599  }
   600  
   601  func convertBooleanToInt96(v Value) (Value, error) {
   602  	return v.convertToInt96(deprecated.Int96{0: uint32(v.byte())}), nil
   603  }
   604  
   605  func convertBooleanToFloat(v Value) (Value, error) {
   606  	return v.convertToFloat(float32(v.byte())), nil
   607  }
   608  
   609  func convertBooleanToDouble(v Value) (Value, error) {
   610  	return v.convertToDouble(float64(v.byte())), nil
   611  }
   612  
   613  func convertBooleanToByteArray(v Value) (Value, error) {
   614  	return v.convertToByteArray([]byte{v.byte()}), nil
   615  }
   616  
   617  func convertBooleanToFixedLenByteArray(v Value, size int) (Value, error) {
   618  	b := []byte{v.byte()}
   619  	c := make([]byte, size)
   620  	copy(c, b)
   621  	return v.convertToFixedLenByteArray(c), nil
   622  }
   623  
   624  func convertBooleanToString(v Value) (Value, error) {
   625  	b := ([]byte)(nil)
   626  	if v.boolean() {
   627  		b = trueBytes
   628  	} else {
   629  		b = falseBytes
   630  	}
   631  	return v.convertToByteArray(b), nil
   632  }
   633  
   634  func convertInt32ToBoolean(v Value) (Value, error) {
   635  	return v.convertToBoolean(v.int32() != 0), nil
   636  }
   637  
   638  func convertInt32ToInt64(v Value) (Value, error) {
   639  	return v.convertToInt64(int64(v.int32())), nil
   640  }
   641  
   642  func convertInt32ToInt96(v Value) (Value, error) {
   643  	return v.convertToInt96(deprecated.Int32ToInt96(v.int32())), nil
   644  }
   645  
   646  func convertInt32ToFloat(v Value) (Value, error) {
   647  	return v.convertToFloat(float32(v.int32())), nil
   648  }
   649  
   650  func convertInt32ToDouble(v Value) (Value, error) {
   651  	return v.convertToDouble(float64(v.int32())), nil
   652  }
   653  
   654  func convertInt32ToByteArray(v Value) (Value, error) {
   655  	b := make([]byte, 4)
   656  	binary.LittleEndian.PutUint32(b, v.uint32())
   657  	return v.convertToByteArray(b), nil
   658  }
   659  
   660  func convertInt32ToFixedLenByteArray(v Value, size int) (Value, error) {
   661  	b := make([]byte, 4)
   662  	c := make([]byte, size)
   663  	binary.LittleEndian.PutUint32(b, v.uint32())
   664  	copy(c, b)
   665  	return v.convertToFixedLenByteArray(c), nil
   666  }
   667  
   668  func convertInt32ToString(v Value) (Value, error) {
   669  	return v.convertToByteArray(strconv.AppendInt(nil, int64(v.int32()), 10)), nil
   670  }
   671  
   672  func convertInt64ToBoolean(v Value) (Value, error) {
   673  	return v.convertToBoolean(v.int64() != 0), nil
   674  }
   675  
   676  func convertInt64ToInt32(v Value) (Value, error) {
   677  	return v.convertToInt32(int32(v.int64())), nil
   678  }
   679  
   680  func convertInt64ToInt96(v Value) (Value, error) {
   681  	return v.convertToInt96(deprecated.Int64ToInt96(v.int64())), nil
   682  }
   683  
   684  func convertInt64ToFloat(v Value) (Value, error) {
   685  	return v.convertToFloat(float32(v.int64())), nil
   686  }
   687  
   688  func convertInt64ToDouble(v Value) (Value, error) {
   689  	return v.convertToDouble(float64(v.int64())), nil
   690  }
   691  
   692  func convertInt64ToByteArray(v Value) (Value, error) {
   693  	b := make([]byte, 8)
   694  	binary.LittleEndian.PutUint64(b, v.uint64())
   695  	return v.convertToByteArray(b), nil
   696  }
   697  
   698  func convertInt64ToFixedLenByteArray(v Value, size int) (Value, error) {
   699  	b := make([]byte, 8)
   700  	c := make([]byte, size)
   701  	binary.LittleEndian.PutUint64(b, v.uint64())
   702  	copy(c, b)
   703  	return v.convertToFixedLenByteArray(c), nil
   704  }
   705  
   706  func convertInt64ToString(v Value) (Value, error) {
   707  	return v.convertToByteArray(strconv.AppendInt(nil, v.int64(), 10)), nil
   708  }
   709  
   710  func convertInt96ToBoolean(v Value) (Value, error) {
   711  	return v.convertToBoolean(!v.int96().IsZero()), nil
   712  }
   713  
   714  func convertInt96ToInt32(v Value) (Value, error) {
   715  	return v.convertToInt32(v.int96().Int32()), nil
   716  }
   717  
   718  func convertInt96ToInt64(v Value) (Value, error) {
   719  	return v.convertToInt64(v.int96().Int64()), nil
   720  }
   721  
   722  func convertInt96ToFloat(v Value) (Value, error) {
   723  	return v, invalidConversion(v, "INT96", "FLOAT")
   724  }
   725  
   726  func convertInt96ToDouble(v Value) (Value, error) {
   727  	return v, invalidConversion(v, "INT96", "DOUBLE")
   728  }
   729  
   730  func convertInt96ToByteArray(v Value) (Value, error) {
   731  	return v.convertToByteArray(v.byteArray()), nil
   732  }
   733  
   734  func convertInt96ToFixedLenByteArray(v Value, size int) (Value, error) {
   735  	b := v.byteArray()
   736  	if len(b) < size {
   737  		c := make([]byte, size)
   738  		copy(c, b)
   739  		b = c
   740  	} else {
   741  		b = b[:size]
   742  	}
   743  	return v.convertToFixedLenByteArray(b), nil
   744  }
   745  
   746  func convertInt96ToString(v Value) (Value, error) {
   747  	return v.convertToByteArray([]byte(v.String())), nil
   748  }
   749  
   750  func convertFloatToBoolean(v Value) (Value, error) {
   751  	return v.convertToBoolean(v.float() != 0), nil
   752  }
   753  
   754  func convertFloatToInt32(v Value) (Value, error) {
   755  	return v.convertToInt32(int32(v.float())), nil
   756  }
   757  
   758  func convertFloatToInt64(v Value) (Value, error) {
   759  	return v.convertToInt64(int64(v.float())), nil
   760  }
   761  
   762  func convertFloatToInt96(v Value) (Value, error) {
   763  	return v, invalidConversion(v, "FLOAT", "INT96")
   764  }
   765  
   766  func convertFloatToDouble(v Value) (Value, error) {
   767  	return v.convertToDouble(float64(v.float())), nil
   768  }
   769  
   770  func convertFloatToByteArray(v Value) (Value, error) {
   771  	b := make([]byte, 4)
   772  	binary.LittleEndian.PutUint32(b, v.uint32())
   773  	return v.convertToByteArray(b), nil
   774  }
   775  
   776  func convertFloatToFixedLenByteArray(v Value, size int) (Value, error) {
   777  	b := make([]byte, 4)
   778  	c := make([]byte, size)
   779  	binary.LittleEndian.PutUint32(b, v.uint32())
   780  	copy(c, b)
   781  	return v.convertToFixedLenByteArray(c), nil
   782  }
   783  
   784  func convertFloatToString(v Value) (Value, error) {
   785  	return v.convertToByteArray(strconv.AppendFloat(nil, float64(v.float()), 'g', -1, 32)), nil
   786  }
   787  
   788  func convertDoubleToBoolean(v Value) (Value, error) {
   789  	return v.convertToBoolean(v.double() != 0), nil
   790  }
   791  
   792  func convertDoubleToInt32(v Value) (Value, error) {
   793  	return v.convertToInt32(int32(v.double())), nil
   794  }
   795  
   796  func convertDoubleToInt64(v Value) (Value, error) {
   797  	return v.convertToInt64(int64(v.double())), nil
   798  }
   799  
   800  func convertDoubleToInt96(v Value) (Value, error) {
   801  	return v, invalidConversion(v, "FLOAT", "INT96")
   802  }
   803  
   804  func convertDoubleToFloat(v Value) (Value, error) {
   805  	return v.convertToFloat(float32(v.double())), nil
   806  }
   807  
   808  func convertDoubleToByteArray(v Value) (Value, error) {
   809  	b := make([]byte, 8)
   810  	binary.LittleEndian.PutUint64(b, v.uint64())
   811  	return v.convertToByteArray(b), nil
   812  }
   813  
   814  func convertDoubleToFixedLenByteArray(v Value, size int) (Value, error) {
   815  	b := make([]byte, 8)
   816  	c := make([]byte, size)
   817  	binary.LittleEndian.PutUint64(b, v.uint64())
   818  	copy(c, b)
   819  	return v.convertToFixedLenByteArray(c), nil
   820  }
   821  
   822  func convertDoubleToString(v Value) (Value, error) {
   823  	return v.convertToByteArray(strconv.AppendFloat(nil, v.double(), 'g', -1, 64)), nil
   824  }
   825  
   826  func convertByteArrayToBoolean(v Value) (Value, error) {
   827  	return v.convertToBoolean(!isZero(v.byteArray())), nil
   828  }
   829  
   830  func convertByteArrayToInt32(v Value) (Value, error) {
   831  	b := make([]byte, 4)
   832  	copy(b, v.byteArray())
   833  	return v.convertToInt32(int32(binary.LittleEndian.Uint32(b))), nil
   834  }
   835  
   836  func convertByteArrayToInt64(v Value) (Value, error) {
   837  	b := make([]byte, 8)
   838  	copy(b, v.byteArray())
   839  	return v.convertToInt64(int64(binary.LittleEndian.Uint64(b))), nil
   840  }
   841  
   842  func convertByteArrayToInt96(v Value) (Value, error) {
   843  	b := make([]byte, 12)
   844  	copy(b, v.byteArray())
   845  	return v.convertToInt96(deprecated.Int96{
   846  		0: binary.LittleEndian.Uint32(b[0:4]),
   847  		1: binary.LittleEndian.Uint32(b[4:8]),
   848  		2: binary.LittleEndian.Uint32(b[8:12]),
   849  	}), nil
   850  }
   851  
   852  func convertByteArrayToFloat(v Value) (Value, error) {
   853  	b := make([]byte, 4)
   854  	copy(b, v.byteArray())
   855  	return v.convertToFloat(math.Float32frombits(binary.LittleEndian.Uint32(b))), nil
   856  }
   857  
   858  func convertByteArrayToDouble(v Value) (Value, error) {
   859  	b := make([]byte, 8)
   860  	copy(b, v.byteArray())
   861  	return v.convertToDouble(math.Float64frombits(binary.LittleEndian.Uint64(b))), nil
   862  }
   863  
   864  func convertByteArrayToFixedLenByteArray(v Value, size int) (Value, error) {
   865  	b := v.byteArray()
   866  	if len(b) < size {
   867  		c := make([]byte, size)
   868  		copy(c, b)
   869  		b = c
   870  	} else {
   871  		b = b[:size]
   872  	}
   873  	return v.convertToFixedLenByteArray(b), nil
   874  }
   875  
   876  func convertFixedLenByteArrayToString(v Value) (Value, error) {
   877  	b := v.byteArray()
   878  	c := make([]byte, hex.EncodedLen(len(b)))
   879  	hex.Encode(c, b)
   880  	return v.convertToByteArray(c), nil
   881  }
   882  
   883  func convertStringToBoolean(v Value) (Value, error) {
   884  	b, err := strconv.ParseBool(v.string())
   885  	if err != nil {
   886  		return v, conversionError(v, "STRING", "BOOLEAN", err)
   887  	}
   888  	return v.convertToBoolean(b), nil
   889  }
   890  
   891  func convertStringToInt32(v Value) (Value, error) {
   892  	i, err := strconv.ParseInt(v.string(), 10, 32)
   893  	if err != nil {
   894  		return v, conversionError(v, "STRING", "INT32", err)
   895  	}
   896  	return v.convertToInt32(int32(i)), nil
   897  }
   898  
   899  func convertStringToInt64(v Value) (Value, error) {
   900  	i, err := strconv.ParseInt(v.string(), 10, 64)
   901  	if err != nil {
   902  		return v, conversionError(v, "STRING", "INT64", err)
   903  	}
   904  	return v.convertToInt64(i), nil
   905  }
   906  
   907  func convertStringToInt96(v Value) (Value, error) {
   908  	i, ok := new(big.Int).SetString(v.string(), 10)
   909  	if !ok {
   910  		return v, conversionError(v, "STRING", "INT96", strconv.ErrSyntax)
   911  	}
   912  	b := i.Bytes()
   913  	c := make([]byte, 12)
   914  	copy(c, b)
   915  	i96 := deprecated.BytesToInt96(c)
   916  	return v.convertToInt96(i96[0]), nil
   917  }
   918  
   919  func convertStringToFloat(v Value) (Value, error) {
   920  	f, err := strconv.ParseFloat(v.string(), 32)
   921  	if err != nil {
   922  		return v, conversionError(v, "STRING", "FLOAT", err)
   923  	}
   924  	return v.convertToFloat(float32(f)), nil
   925  }
   926  
   927  func convertStringToDouble(v Value) (Value, error) {
   928  	f, err := strconv.ParseFloat(v.string(), 64)
   929  	if err != nil {
   930  		return v, conversionError(v, "STRING", "DOUBLE", err)
   931  	}
   932  	return v.convertToDouble(f), nil
   933  }
   934  
   935  func convertStringToFixedLenByteArray(v Value, size int) (Value, error) {
   936  	b := v.byteArray()
   937  	c := make([]byte, size)
   938  	_, err := hex.Decode(c, b)
   939  	if err != nil {
   940  		return v, conversionError(v, "STRING", "BYTE_ARRAY", err)
   941  	}
   942  	return v.convertToFixedLenByteArray(c), nil
   943  }
   944  
   945  func convertStringToDate(v Value, tz *time.Location) (Value, error) {
   946  	t, err := time.ParseInLocation("2006-01-02", v.string(), tz)
   947  	if err != nil {
   948  		return v, conversionError(v, "STRING", "DATE", err)
   949  	}
   950  	d := daysSinceUnixEpoch(t)
   951  	return v.convertToInt32(int32(d)), nil
   952  }
   953  
   954  func convertStringToTimeMillis(v Value, tz *time.Location) (Value, error) {
   955  	t, err := time.ParseInLocation("15:04:05.999", v.string(), tz)
   956  	if err != nil {
   957  		return v, conversionError(v, "STRING", "TIME", err)
   958  	}
   959  	m := nearestMidnightLessThan(t)
   960  	milliseconds := t.Sub(m).Milliseconds()
   961  	return v.convertToInt32(int32(milliseconds)), nil
   962  }
   963  
   964  func convertStringToTimeMicros(v Value, tz *time.Location) (Value, error) {
   965  	t, err := time.ParseInLocation("15:04:05.999999", v.string(), tz)
   966  	if err != nil {
   967  		return v, conversionError(v, "STRING", "TIME", err)
   968  	}
   969  	m := nearestMidnightLessThan(t)
   970  	microseconds := t.Sub(m).Microseconds()
   971  	return v.convertToInt64(microseconds), nil
   972  }
   973  
   974  func convertDateToTimestamp(v Value, u format.TimeUnit, tz *time.Location) (Value, error) {
   975  	t := unixEpoch.AddDate(0, 0, int(v.int32()))
   976  	d := timeUnitDuration(u)
   977  	return v.convertToInt64(int64(t.In(tz).Sub(unixEpoch) / d)), nil
   978  }
   979  
   980  func convertDateToString(v Value) (Value, error) {
   981  	t := unixEpoch.AddDate(0, 0, int(v.int32()))
   982  	b := t.AppendFormat(make([]byte, 0, 10), "2006-01-02")
   983  	return v.convertToByteArray(b), nil
   984  }
   985  
   986  func convertTimeMillisToString(v Value, tz *time.Location) (Value, error) {
   987  	t := time.UnixMilli(int64(v.int32())).In(tz)
   988  	b := t.AppendFormat(make([]byte, 0, 12), "15:04:05.999")
   989  	return v.convertToByteArray(b), nil
   990  }
   991  
   992  func convertTimeMicrosToString(v Value, tz *time.Location) (Value, error) {
   993  	t := time.UnixMicro(v.int64()).In(tz)
   994  	b := t.AppendFormat(make([]byte, 0, 15), "15:04:05.999999")
   995  	return v.convertToByteArray(b), nil
   996  }
   997  
   998  func convertTimestampToDate(v Value, u format.TimeUnit, tz *time.Location) (Value, error) {
   999  	t := timestamp(v, u, tz)
  1000  	d := daysSinceUnixEpoch(t)
  1001  	return v.convertToInt32(int32(d)), nil
  1002  }
  1003  
  1004  func convertTimestampToTimeMillis(v Value, u format.TimeUnit, sourceZone, targetZone *time.Location) (Value, error) {
  1005  	t := timestamp(v, u, sourceZone)
  1006  	m := nearestMidnightLessThan(t)
  1007  	milliseconds := t.In(targetZone).Sub(m).Milliseconds()
  1008  	return v.convertToInt32(int32(milliseconds)), nil
  1009  }
  1010  
  1011  func convertTimestampToTimeMicros(v Value, u format.TimeUnit, sourceZone, targetZone *time.Location) (Value, error) {
  1012  	t := timestamp(v, u, sourceZone)
  1013  	m := nearestMidnightLessThan(t)
  1014  	microseconds := t.In(targetZone).Sub(m).Microseconds()
  1015  	return v.convertToInt64(int64(microseconds)), nil
  1016  }
  1017  
  1018  func convertTimestampToTimestamp(v Value, sourceUnit, targetUnit format.TimeUnit) (Value, error) {
  1019  	sourceScale := timeUnitDuration(sourceUnit).Nanoseconds()
  1020  	targetScale := timeUnitDuration(targetUnit).Nanoseconds()
  1021  	targetValue := (v.int64() * sourceScale) / targetScale
  1022  	return v.convertToInt64(targetValue), nil
  1023  }
  1024  
  1025  const nanosecondsPerDay = 24 * 60 * 60 * 1e9
  1026  
  1027  func daysSinceUnixEpoch(t time.Time) int {
  1028  	return int(t.Sub(unixEpoch).Hours()) / 24
  1029  }
  1030  
  1031  func nearestMidnightLessThan(t time.Time) time.Time {
  1032  	y, m, d := t.Date()
  1033  	return time.Date(y, m, d, 0, 0, 0, 0, t.Location())
  1034  }
  1035  
  1036  func timestamp(v Value, u format.TimeUnit, tz *time.Location) time.Time {
  1037  	return unixEpoch.In(tz).Add(time.Duration(v.int64()) * timeUnitDuration(u))
  1038  }
  1039  
  1040  func timeUnitDuration(unit format.TimeUnit) time.Duration {
  1041  	switch {
  1042  	case unit.Millis != nil:
  1043  		return time.Millisecond
  1044  	case unit.Micros != nil:
  1045  		return time.Microsecond
  1046  	default:
  1047  		return time.Nanosecond
  1048  	}
  1049  }
  1050  
  1051  func invalidConversion(value Value, from, to string) error {
  1052  	return fmt.Errorf("%s to %s: %s: %w", from, to, value, ErrInvalidConversion)
  1053  }
  1054  
  1055  func conversionError(value Value, from, to string, err error) error {
  1056  	return fmt.Errorf("%s to %s: %q: %s: %w", from, to, value.string(), err, ErrInvalidConversion)
  1057  }