github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/convert.go (about)

     1  package parquet
     2  
     3  import (
     4  	"encoding/binary"
     5  	"encoding/hex"
     6  	"fmt"
     7  	"io"
     8  	"math"
     9  	"math/big"
    10  	"strconv"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/segmentio/parquet-go/deprecated"
    15  	"github.com/segmentio/parquet-go/encoding"
    16  	"github.com/segmentio/parquet-go/format"
    17  )
    18  
    19  // ConvertError is an error type returned by calls to Convert when the conversion
    20  // of parquet schemas is impossible or the input row for the conversion is
    21  // malformed.
    22  type ConvertError struct {
    23  	Path []string
    24  	From Node
    25  	To   Node
    26  }
    27  
    28  // Error satisfies the error interface.
    29  func (e *ConvertError) Error() string {
    30  	sourceType := e.From.Type()
    31  	targetType := e.To.Type()
    32  
    33  	sourceRepetition := fieldRepetitionTypeOf(e.From)
    34  	targetRepetition := fieldRepetitionTypeOf(e.To)
    35  
    36  	return fmt.Sprintf("cannot convert parquet column %q from %s %s to %s %s",
    37  		columnPath(e.Path),
    38  		sourceRepetition,
    39  		sourceType,
    40  		targetRepetition,
    41  		targetType,
    42  	)
    43  }
    44  
    45  // Conversion is an interface implemented by types that provide conversion of
    46  // parquet rows from one schema to another.
    47  //
    48  // Conversion instances must be safe to use concurrently from multiple goroutines.
    49  type Conversion interface {
    50  	// Applies the conversion logic on the src row, returning the result
    51  	// appended to dst.
    52  	Convert(rows []Row) (int, error)
    53  	// Converts the given column index in the target schema to the original
    54  	// column index in the source schema of the conversion.
    55  	Column(int) int
    56  	// Returns the target schema of the conversion.
    57  	Schema() *Schema
    58  }
    59  
    60  type conversion struct {
    61  	columns []conversionColumn
    62  	schema  *Schema
    63  	buffers sync.Pool
    64  	// This field is used to size the column buffers held in the sync.Pool since
    65  	// they are intended to store the source rows being converted from.
    66  	numberOfSourceColumns int
    67  }
    68  
    69  type conversionBuffer struct {
    70  	columns [][]Value
    71  }
    72  
    73  type conversionColumn struct {
    74  	sourceIndex   int
    75  	convertValues conversionFunc
    76  }
    77  
    78  type conversionFunc func([]Value) error
    79  
    80  func convertToSelf(column []Value) error { return nil }
    81  
    82  //go:noinline
    83  func convertToType(targetType, sourceType Type) conversionFunc {
    84  	return func(column []Value) error {
    85  		for i, v := range column {
    86  			v, err := sourceType.ConvertValue(v, targetType)
    87  			if err != nil {
    88  				return err
    89  			}
    90  			column[i].ptr = v.ptr
    91  			column[i].u64 = v.u64
    92  			column[i].kind = v.kind
    93  		}
    94  		return nil
    95  	}
    96  }
    97  
    98  //go:noinline
    99  func convertToValue(value Value) conversionFunc {
   100  	return func(column []Value) error {
   101  		for i := range column {
   102  			column[i] = value
   103  		}
   104  		return nil
   105  	}
   106  }
   107  
   108  //go:noinline
   109  func convertToZero(kind Kind) conversionFunc {
   110  	return func(column []Value) error {
   111  		for i := range column {
   112  			column[i].ptr = nil
   113  			column[i].u64 = 0
   114  			column[i].kind = ^int8(kind)
   115  		}
   116  		return nil
   117  	}
   118  }
   119  
   120  //go:noinline
   121  func convertToLevels(repetitionLevels, definitionLevels []byte) conversionFunc {
   122  	return func(column []Value) error {
   123  		for i := range column {
   124  			r := column[i].repetitionLevel
   125  			d := column[i].definitionLevel
   126  			column[i].repetitionLevel = repetitionLevels[r]
   127  			column[i].definitionLevel = definitionLevels[d]
   128  		}
   129  		return nil
   130  	}
   131  }
   132  
   133  //go:noinline
   134  func multiConversionFunc(conversions []conversionFunc) conversionFunc {
   135  	switch len(conversions) {
   136  	case 0:
   137  		return convertToSelf
   138  	case 1:
   139  		return conversions[0]
   140  	default:
   141  		return func(column []Value) error {
   142  			for _, conv := range conversions {
   143  				if err := conv(column); err != nil {
   144  					return err
   145  				}
   146  			}
   147  			return nil
   148  		}
   149  	}
   150  }
   151  
   152  func (c *conversion) getBuffer() *conversionBuffer {
   153  	b, _ := c.buffers.Get().(*conversionBuffer)
   154  	if b == nil {
   155  		b = &conversionBuffer{
   156  			columns: make([][]Value, c.numberOfSourceColumns),
   157  		}
   158  		values := make([]Value, c.numberOfSourceColumns)
   159  		for i := range b.columns {
   160  			b.columns[i] = values[i : i : i+1]
   161  		}
   162  	}
   163  	return b
   164  }
   165  
   166  func (c *conversion) putBuffer(b *conversionBuffer) {
   167  	c.buffers.Put(b)
   168  }
   169  
   170  // Convert here satisfies the Conversion interface, and does the actual work
   171  // to convert between the source and target Rows.
   172  func (c *conversion) Convert(rows []Row) (int, error) {
   173  	source := c.getBuffer()
   174  	defer c.putBuffer(source)
   175  
   176  	for n, row := range rows {
   177  		for i, values := range source.columns {
   178  			source.columns[i] = values[:0]
   179  		}
   180  		row.Range(func(columnIndex int, columnValues []Value) bool {
   181  			source.columns[columnIndex] = append(source.columns[columnIndex], columnValues...)
   182  			return true
   183  		})
   184  		row = row[:0]
   185  
   186  		for columnIndex, conv := range c.columns {
   187  			columnOffset := len(row)
   188  			if conv.sourceIndex < 0 {
   189  				// When there is no source column, we put a single value as
   190  				// placeholder in the column. This is a condition where the
   191  				// target contained a column which did not exist at had not
   192  				// other columns existing at that same level.
   193  				row = append(row, Value{})
   194  			} else {
   195  				// We must copy to the output row first and not mutate the
   196  				// source columns because multiple target columns may map to
   197  				// the same source column.
   198  				row = append(row, source.columns[conv.sourceIndex]...)
   199  			}
   200  			columnValues := row[columnOffset:]
   201  
   202  			if err := conv.convertValues(columnValues); err != nil {
   203  				return n, err
   204  			}
   205  
   206  			// Since the column index may have changed between the source and
   207  			// taget columns we ensure that the right value is always written
   208  			// to the output row.
   209  			for i := range columnValues {
   210  				columnValues[i].columnIndex = ^int16(columnIndex)
   211  			}
   212  		}
   213  
   214  		rows[n] = row
   215  	}
   216  
   217  	return len(rows), nil
   218  }
   219  
   220  func (c *conversion) Column(i int) int {
   221  	return c.columns[i].sourceIndex
   222  }
   223  
   224  func (c *conversion) Schema() *Schema {
   225  	return c.schema
   226  }
   227  
   228  type identity struct{ schema *Schema }
   229  
   230  func (id identity) Convert(rows []Row) (int, error) { return len(rows), nil }
   231  func (id identity) Column(i int) int                { return i }
   232  func (id identity) Schema() *Schema                 { return id.schema }
   233  
   234  // Convert constructs a conversion function from one parquet schema to another.
   235  //
   236  // The function supports converting between schemas where the source or target
   237  // have extra columns; if there are more columns in the source, they will be
   238  // stripped out of the rows. Extra columns in the target schema will be set to
   239  // null or zero values.
   240  //
   241  // The returned function is intended to be used to append the converted source
   242  // row to the destination buffer.
   243  func Convert(to, from Node) (conv Conversion, err error) {
   244  	schema, _ := to.(*Schema)
   245  	if schema == nil {
   246  		schema = NewSchema("", to)
   247  	}
   248  
   249  	if nodesAreEqual(to, from) {
   250  		return identity{schema}, nil
   251  	}
   252  
   253  	targetMapping, targetColumns := columnMappingOf(to)
   254  	sourceMapping, sourceColumns := columnMappingOf(from)
   255  	columns := make([]conversionColumn, len(targetColumns))
   256  
   257  	for i, path := range targetColumns {
   258  		targetColumn := targetMapping.lookup(path)
   259  		sourceColumn := sourceMapping.lookup(path)
   260  
   261  		conversions := []conversionFunc{}
   262  		if sourceColumn.node != nil {
   263  			targetType := targetColumn.node.Type()
   264  			sourceType := sourceColumn.node.Type()
   265  			if !typesAreEqual(targetType, sourceType) {
   266  				conversions = append(conversions,
   267  					convertToType(targetType, sourceType),
   268  				)
   269  			}
   270  
   271  			repetitionLevels := make([]byte, len(path)+1)
   272  			definitionLevels := make([]byte, len(path)+1)
   273  			targetRepetitionLevel := byte(0)
   274  			targetDefinitionLevel := byte(0)
   275  			sourceRepetitionLevel := byte(0)
   276  			sourceDefinitionLevel := byte(0)
   277  			targetNode := to
   278  			sourceNode := from
   279  
   280  			for j := 0; j < len(path); j++ {
   281  				targetNode = fieldByName(targetNode, path[j])
   282  				sourceNode = fieldByName(sourceNode, path[j])
   283  
   284  				targetRepetitionLevel, targetDefinitionLevel = applyFieldRepetitionType(
   285  					fieldRepetitionTypeOf(targetNode),
   286  					targetRepetitionLevel,
   287  					targetDefinitionLevel,
   288  				)
   289  				sourceRepetitionLevel, sourceDefinitionLevel = applyFieldRepetitionType(
   290  					fieldRepetitionTypeOf(sourceNode),
   291  					sourceRepetitionLevel,
   292  					sourceDefinitionLevel,
   293  				)
   294  
   295  				repetitionLevels[sourceRepetitionLevel] = targetRepetitionLevel
   296  				definitionLevels[sourceDefinitionLevel] = targetDefinitionLevel
   297  			}
   298  
   299  			repetitionLevels = repetitionLevels[:sourceRepetitionLevel+1]
   300  			definitionLevels = definitionLevels[:sourceDefinitionLevel+1]
   301  
   302  			if !isDirectLevelMapping(repetitionLevels) || !isDirectLevelMapping(definitionLevels) {
   303  				conversions = append(conversions,
   304  					convertToLevels(repetitionLevels, definitionLevels),
   305  				)
   306  			}
   307  
   308  		} else {
   309  			targetType := targetColumn.node.Type()
   310  			targetKind := targetType.Kind()
   311  			sourceColumn = sourceMapping.lookupClosest(path)
   312  			if sourceColumn.node != nil {
   313  				conversions = append(conversions,
   314  					convertToZero(targetKind),
   315  				)
   316  			} else {
   317  				conversions = append(conversions,
   318  					convertToValue(ZeroValue(targetKind)),
   319  				)
   320  			}
   321  		}
   322  
   323  		columns[i] = conversionColumn{
   324  			sourceIndex:   int(sourceColumn.columnIndex),
   325  			convertValues: multiConversionFunc(conversions),
   326  		}
   327  	}
   328  
   329  	c := &conversion{
   330  		columns:               columns,
   331  		schema:                schema,
   332  		numberOfSourceColumns: len(sourceColumns),
   333  	}
   334  	return c, nil
   335  }
   336  
   337  func isDirectLevelMapping(levels []byte) bool {
   338  	for i, level := range levels {
   339  		if level != byte(i) {
   340  			return false
   341  		}
   342  	}
   343  	return true
   344  }
   345  
   346  // ConvertRowGroup constructs a wrapper of the given row group which applies
   347  // the given schema conversion to its rows.
   348  func ConvertRowGroup(rowGroup RowGroup, conv Conversion) RowGroup {
   349  	schema := conv.Schema()
   350  	numRows := rowGroup.NumRows()
   351  	rowGroupColumns := rowGroup.ColumnChunks()
   352  
   353  	columns := make([]ColumnChunk, numLeafColumnsOf(schema))
   354  	forEachLeafColumnOf(schema, func(leaf leafColumn) {
   355  		i := leaf.columnIndex
   356  		j := conv.Column(int(leaf.columnIndex))
   357  		if j < 0 {
   358  			columns[i] = &missingColumnChunk{
   359  				typ:    leaf.node.Type(),
   360  				column: i,
   361  				// TODO: we assume the number of values is the same as the
   362  				// number of rows, which may not be accurate when the column is
   363  				// part of a repeated group; neighbor columns may be repeated in
   364  				// which case it would be impossible for this chunk not to be.
   365  				numRows:   numRows,
   366  				numValues: numRows,
   367  				numNulls:  numRows,
   368  			}
   369  		} else {
   370  			columns[i] = rowGroupColumns[j]
   371  		}
   372  	})
   373  
   374  	// Sorting columns must exist on the conversion schema in order to be
   375  	// advertised on the converted row group otherwise the resulting rows
   376  	// would not be in the right order.
   377  	sorting := []SortingColumn{}
   378  	for _, col := range rowGroup.SortingColumns() {
   379  		if !hasColumnPath(schema, col.Path()) {
   380  			break
   381  		}
   382  		sorting = append(sorting, col)
   383  	}
   384  
   385  	return &convertedRowGroup{
   386  		// The pair of rowGroup+conv is retained to construct a converted row
   387  		// reader by wrapping the underlying row reader of the row group because
   388  		// it allows proper reconstruction of the repetition and definition
   389  		// levels.
   390  		//
   391  		// TODO: can we figure out how to set the repetition and definition
   392  		// levels when reading values from missing column pages? At first sight
   393  		// it appears complex to do, however:
   394  		//
   395  		// * It is possible that having these levels when reading values of
   396  		//   missing column pages is not necessary in some scenarios (e.g. when
   397  		//   merging row groups).
   398  		//
   399  		// * We may be able to assume the repetition and definition levels at
   400  		//   the call site (e.g. in the functions reading rows from columns).
   401  		//
   402  		// Columns of the source row group which do not exist in the target are
   403  		// masked to prevent loading unneeded pages when reading rows from the
   404  		// converted row group.
   405  		rowGroup: maskMissingRowGroupColumns(rowGroup, len(columns), conv),
   406  		columns:  columns,
   407  		sorting:  sorting,
   408  		conv:     conv,
   409  	}
   410  }
   411  
   412  func maskMissingRowGroupColumns(r RowGroup, numColumns int, conv Conversion) RowGroup {
   413  	rowGroupColumns := r.ColumnChunks()
   414  	columns := make([]ColumnChunk, len(rowGroupColumns))
   415  	missing := make([]missingColumnChunk, len(columns))
   416  	numRows := r.NumRows()
   417  
   418  	for i := range missing {
   419  		missing[i] = missingColumnChunk{
   420  			typ:       rowGroupColumns[i].Type(),
   421  			column:    int16(i),
   422  			numRows:   numRows,
   423  			numValues: numRows,
   424  			numNulls:  numRows,
   425  		}
   426  	}
   427  
   428  	for i := range columns {
   429  		columns[i] = &missing[i]
   430  	}
   431  
   432  	for i := 0; i < numColumns; i++ {
   433  		j := conv.Column(i)
   434  		if j >= 0 && j < len(columns) {
   435  			columns[j] = rowGroupColumns[j]
   436  		}
   437  	}
   438  
   439  	return &rowGroup{
   440  		schema:  r.Schema(),
   441  		numRows: numRows,
   442  		columns: columns,
   443  	}
   444  }
   445  
   446  type missingColumnChunk struct {
   447  	typ       Type
   448  	column    int16
   449  	numRows   int64
   450  	numValues int64
   451  	numNulls  int64
   452  }
   453  
   454  func (c *missingColumnChunk) Type() Type               { return c.typ }
   455  func (c *missingColumnChunk) Column() int              { return int(c.column) }
   456  func (c *missingColumnChunk) Pages() Pages             { return onePage(missingPage{c}) }
   457  func (c *missingColumnChunk) ColumnIndex() ColumnIndex { return missingColumnIndex{c} }
   458  func (c *missingColumnChunk) OffsetIndex() OffsetIndex { return missingOffsetIndex{} }
   459  func (c *missingColumnChunk) BloomFilter() BloomFilter { return missingBloomFilter{} }
   460  func (c *missingColumnChunk) NumValues() int64         { return 0 }
   461  
   462  type missingColumnIndex struct{ *missingColumnChunk }
   463  
   464  func (i missingColumnIndex) NumPages() int       { return 1 }
   465  func (i missingColumnIndex) NullCount(int) int64 { return i.numNulls }
   466  func (i missingColumnIndex) NullPage(int) bool   { return true }
   467  func (i missingColumnIndex) MinValue(int) Value  { return Value{} }
   468  func (i missingColumnIndex) MaxValue(int) Value  { return Value{} }
   469  func (i missingColumnIndex) IsAscending() bool   { return true }
   470  func (i missingColumnIndex) IsDescending() bool  { return false }
   471  
   472  type missingOffsetIndex struct{}
   473  
   474  func (missingOffsetIndex) NumPages() int                { return 1 }
   475  func (missingOffsetIndex) Offset(int) int64             { return 0 }
   476  func (missingOffsetIndex) CompressedPageSize(int) int64 { return 0 }
   477  func (missingOffsetIndex) FirstRowIndex(int) int64      { return 0 }
   478  
   479  type missingBloomFilter struct{}
   480  
   481  func (missingBloomFilter) ReadAt([]byte, int64) (int, error) { return 0, io.EOF }
   482  func (missingBloomFilter) Size() int64                       { return 0 }
   483  func (missingBloomFilter) Check(Value) (bool, error)         { return false, nil }
   484  
   485  type missingPage struct{ *missingColumnChunk }
   486  
   487  func (p missingPage) Column() int                       { return int(p.column) }
   488  func (p missingPage) Dictionary() Dictionary            { return nil }
   489  func (p missingPage) NumRows() int64                    { return p.numRows }
   490  func (p missingPage) NumValues() int64                  { return p.numValues }
   491  func (p missingPage) NumNulls() int64                   { return p.numNulls }
   492  func (p missingPage) Bounds() (min, max Value, ok bool) { return }
   493  func (p missingPage) Slice(i, j int64) Page             { return p }
   494  func (p missingPage) Size() int64                       { return 0 }
   495  func (p missingPage) RepetitionLevels() []byte          { return nil }
   496  func (p missingPage) DefinitionLevels() []byte          { return nil }
   497  func (p missingPage) Data() encoding.Values             { return p.typ.NewValues(nil, nil) }
   498  func (p missingPage) Values() ValueReader               { return &missingPageValues{page: p} }
   499  
   500  type missingPageValues struct {
   501  	page missingPage
   502  	read int64
   503  }
   504  
   505  func (r *missingPageValues) ReadValues(values []Value) (int, error) {
   506  	remain := r.page.numValues - r.read
   507  	if int64(len(values)) > remain {
   508  		values = values[:remain]
   509  	}
   510  	for i := range values {
   511  		// TODO: how do we set the repetition and definition levels here?
   512  		values[i] = Value{columnIndex: ^r.page.column}
   513  	}
   514  	if r.read += int64(len(values)); r.read == r.page.numValues {
   515  		return len(values), io.EOF
   516  	}
   517  	return len(values), nil
   518  }
   519  
   520  func (r *missingPageValues) Close() error {
   521  	r.read = r.page.numValues
   522  	return nil
   523  }
   524  
   525  type convertedRowGroup struct {
   526  	rowGroup RowGroup
   527  	columns  []ColumnChunk
   528  	sorting  []SortingColumn
   529  	conv     Conversion
   530  }
   531  
   532  func (c *convertedRowGroup) NumRows() int64                  { return c.rowGroup.NumRows() }
   533  func (c *convertedRowGroup) ColumnChunks() []ColumnChunk     { return c.columns }
   534  func (c *convertedRowGroup) Schema() *Schema                 { return c.conv.Schema() }
   535  func (c *convertedRowGroup) SortingColumns() []SortingColumn { return c.sorting }
   536  func (c *convertedRowGroup) Rows() Rows {
   537  	rows := c.rowGroup.Rows()
   538  	return &convertedRows{
   539  		Closer: rows,
   540  		rows:   rows,
   541  		conv:   c.conv,
   542  	}
   543  }
   544  
   545  // ConvertRowReader constructs a wrapper of the given row reader which applies
   546  // the given schema conversion to the rows.
   547  func ConvertRowReader(rows RowReader, conv Conversion) RowReaderWithSchema {
   548  	return &convertedRows{rows: &forwardRowSeeker{rows: rows}, conv: conv}
   549  }
   550  
   551  type convertedRows struct {
   552  	io.Closer
   553  	rows RowReadSeeker
   554  	conv Conversion
   555  }
   556  
   557  func (c *convertedRows) ReadRows(rows []Row) (int, error) {
   558  	n, err := c.rows.ReadRows(rows)
   559  	if n > 0 {
   560  		var convErr error
   561  		n, convErr = c.conv.Convert(rows[:n])
   562  		if convErr != nil {
   563  			err = convErr
   564  		}
   565  	}
   566  	return n, err
   567  }
   568  
   569  func (c *convertedRows) Schema() *Schema {
   570  	return c.conv.Schema()
   571  }
   572  
   573  func (c *convertedRows) SeekToRow(rowIndex int64) error {
   574  	return c.rows.SeekToRow(rowIndex)
   575  }
   576  
   577  var (
   578  	trueBytes  = []byte(`true`)
   579  	falseBytes = []byte(`false`)
   580  	unixEpoch  = time.Date(1970, time.January, 1, 0, 0, 0, 0, time.UTC)
   581  )
   582  
   583  func convertBooleanToInt32(v Value) (Value, error) {
   584  	return v.convertToInt32(int32(v.byte())), nil
   585  }
   586  
   587  func convertBooleanToInt64(v Value) (Value, error) {
   588  	return v.convertToInt64(int64(v.byte())), nil
   589  }
   590  
   591  func convertBooleanToInt96(v Value) (Value, error) {
   592  	return v.convertToInt96(deprecated.Int96{0: uint32(v.byte())}), nil
   593  }
   594  
   595  func convertBooleanToFloat(v Value) (Value, error) {
   596  	return v.convertToFloat(float32(v.byte())), nil
   597  }
   598  
   599  func convertBooleanToDouble(v Value) (Value, error) {
   600  	return v.convertToDouble(float64(v.byte())), nil
   601  }
   602  
   603  func convertBooleanToByteArray(v Value) (Value, error) {
   604  	return v.convertToByteArray([]byte{v.byte()}), nil
   605  }
   606  
   607  func convertBooleanToFixedLenByteArray(v Value, size int) (Value, error) {
   608  	b := []byte{v.byte()}
   609  	c := make([]byte, size)
   610  	copy(c, b)
   611  	return v.convertToFixedLenByteArray(c), nil
   612  }
   613  
   614  func convertBooleanToString(v Value) (Value, error) {
   615  	b := ([]byte)(nil)
   616  	if v.boolean() {
   617  		b = trueBytes
   618  	} else {
   619  		b = falseBytes
   620  	}
   621  	return v.convertToByteArray(b), nil
   622  }
   623  
   624  func convertInt32ToBoolean(v Value) (Value, error) {
   625  	return v.convertToBoolean(v.int32() != 0), nil
   626  }
   627  
   628  func convertInt32ToInt64(v Value) (Value, error) {
   629  	return v.convertToInt64(int64(v.int32())), nil
   630  }
   631  
   632  func convertInt32ToInt96(v Value) (Value, error) {
   633  	return v.convertToInt96(deprecated.Int32ToInt96(v.int32())), nil
   634  }
   635  
   636  func convertInt32ToFloat(v Value) (Value, error) {
   637  	return v.convertToFloat(float32(v.int32())), nil
   638  }
   639  
   640  func convertInt32ToDouble(v Value) (Value, error) {
   641  	return v.convertToDouble(float64(v.int32())), nil
   642  }
   643  
   644  func convertInt32ToByteArray(v Value) (Value, error) {
   645  	b := make([]byte, 4)
   646  	binary.LittleEndian.PutUint32(b, v.uint32())
   647  	return v.convertToByteArray(b), nil
   648  }
   649  
   650  func convertInt32ToFixedLenByteArray(v Value, size int) (Value, error) {
   651  	b := make([]byte, 4)
   652  	c := make([]byte, size)
   653  	binary.LittleEndian.PutUint32(b, v.uint32())
   654  	copy(c, b)
   655  	return v.convertToFixedLenByteArray(c), nil
   656  }
   657  
   658  func convertInt32ToString(v Value) (Value, error) {
   659  	return v.convertToByteArray(strconv.AppendInt(nil, int64(v.int32()), 10)), nil
   660  }
   661  
   662  func convertInt64ToBoolean(v Value) (Value, error) {
   663  	return v.convertToBoolean(v.int64() != 0), nil
   664  }
   665  
   666  func convertInt64ToInt32(v Value) (Value, error) {
   667  	return v.convertToInt32(int32(v.int64())), nil
   668  }
   669  
   670  func convertInt64ToInt96(v Value) (Value, error) {
   671  	return v.convertToInt96(deprecated.Int64ToInt96(v.int64())), nil
   672  }
   673  
   674  func convertInt64ToFloat(v Value) (Value, error) {
   675  	return v.convertToFloat(float32(v.int64())), nil
   676  }
   677  
   678  func convertInt64ToDouble(v Value) (Value, error) {
   679  	return v.convertToDouble(float64(v.int64())), nil
   680  }
   681  
   682  func convertInt64ToByteArray(v Value) (Value, error) {
   683  	b := make([]byte, 8)
   684  	binary.LittleEndian.PutUint64(b, v.uint64())
   685  	return v.convertToByteArray(b), nil
   686  }
   687  
   688  func convertInt64ToFixedLenByteArray(v Value, size int) (Value, error) {
   689  	b := make([]byte, 8)
   690  	c := make([]byte, size)
   691  	binary.LittleEndian.PutUint64(b, v.uint64())
   692  	copy(c, b)
   693  	return v.convertToFixedLenByteArray(c), nil
   694  }
   695  
   696  func convertInt64ToString(v Value) (Value, error) {
   697  	return v.convertToByteArray(strconv.AppendInt(nil, v.int64(), 10)), nil
   698  }
   699  
   700  func convertInt96ToBoolean(v Value) (Value, error) {
   701  	return v.convertToBoolean(!v.int96().IsZero()), nil
   702  }
   703  
   704  func convertInt96ToInt32(v Value) (Value, error) {
   705  	return v.convertToInt32(v.int96().Int32()), nil
   706  }
   707  
   708  func convertInt96ToInt64(v Value) (Value, error) {
   709  	return v.convertToInt64(v.int96().Int64()), nil
   710  }
   711  
   712  func convertInt96ToFloat(v Value) (Value, error) {
   713  	return v, invalidConversion(v, "INT96", "FLOAT")
   714  }
   715  
   716  func convertInt96ToDouble(v Value) (Value, error) {
   717  	return v, invalidConversion(v, "INT96", "DOUBLE")
   718  }
   719  
   720  func convertInt96ToByteArray(v Value) (Value, error) {
   721  	return v.convertToByteArray(v.byteArray()), nil
   722  }
   723  
   724  func convertInt96ToFixedLenByteArray(v Value, size int) (Value, error) {
   725  	b := v.byteArray()
   726  	if len(b) < size {
   727  		c := make([]byte, size)
   728  		copy(c, b)
   729  		b = c
   730  	} else {
   731  		b = b[:size]
   732  	}
   733  	return v.convertToFixedLenByteArray(b), nil
   734  }
   735  
   736  func convertInt96ToString(v Value) (Value, error) {
   737  	return v.convertToByteArray([]byte(v.String())), nil
   738  }
   739  
   740  func convertFloatToBoolean(v Value) (Value, error) {
   741  	return v.convertToBoolean(v.float() != 0), nil
   742  }
   743  
   744  func convertFloatToInt32(v Value) (Value, error) {
   745  	return v.convertToInt32(int32(v.float())), nil
   746  }
   747  
   748  func convertFloatToInt64(v Value) (Value, error) {
   749  	return v.convertToInt64(int64(v.float())), nil
   750  }
   751  
   752  func convertFloatToInt96(v Value) (Value, error) {
   753  	return v, invalidConversion(v, "FLOAT", "INT96")
   754  }
   755  
   756  func convertFloatToDouble(v Value) (Value, error) {
   757  	return v.convertToDouble(float64(v.float())), nil
   758  }
   759  
   760  func convertFloatToByteArray(v Value) (Value, error) {
   761  	b := make([]byte, 4)
   762  	binary.LittleEndian.PutUint32(b, v.uint32())
   763  	return v.convertToByteArray(b), nil
   764  }
   765  
   766  func convertFloatToFixedLenByteArray(v Value, size int) (Value, error) {
   767  	b := make([]byte, 4)
   768  	c := make([]byte, size)
   769  	binary.LittleEndian.PutUint32(b, v.uint32())
   770  	copy(c, b)
   771  	return v.convertToFixedLenByteArray(c), nil
   772  }
   773  
   774  func convertFloatToString(v Value) (Value, error) {
   775  	return v.convertToByteArray(strconv.AppendFloat(nil, float64(v.float()), 'g', -1, 32)), nil
   776  }
   777  
   778  func convertDoubleToBoolean(v Value) (Value, error) {
   779  	return v.convertToBoolean(v.double() != 0), nil
   780  }
   781  
   782  func convertDoubleToInt32(v Value) (Value, error) {
   783  	return v.convertToInt32(int32(v.double())), nil
   784  }
   785  
   786  func convertDoubleToInt64(v Value) (Value, error) {
   787  	return v.convertToInt64(int64(v.double())), nil
   788  }
   789  
   790  func convertDoubleToInt96(v Value) (Value, error) {
   791  	return v, invalidConversion(v, "FLOAT", "INT96")
   792  }
   793  
   794  func convertDoubleToFloat(v Value) (Value, error) {
   795  	return v.convertToFloat(float32(v.double())), nil
   796  }
   797  
   798  func convertDoubleToByteArray(v Value) (Value, error) {
   799  	b := make([]byte, 8)
   800  	binary.LittleEndian.PutUint64(b, v.uint64())
   801  	return v.convertToByteArray(b), nil
   802  }
   803  
   804  func convertDoubleToFixedLenByteArray(v Value, size int) (Value, error) {
   805  	b := make([]byte, 8)
   806  	c := make([]byte, size)
   807  	binary.LittleEndian.PutUint64(b, v.uint64())
   808  	copy(c, b)
   809  	return v.convertToFixedLenByteArray(c), nil
   810  }
   811  
   812  func convertDoubleToString(v Value) (Value, error) {
   813  	return v.convertToByteArray(strconv.AppendFloat(nil, v.double(), 'g', -1, 64)), nil
   814  }
   815  
   816  func convertByteArrayToBoolean(v Value) (Value, error) {
   817  	return v.convertToBoolean(!isZero(v.byteArray())), nil
   818  }
   819  
   820  func convertByteArrayToInt32(v Value) (Value, error) {
   821  	b := make([]byte, 4)
   822  	copy(b, v.byteArray())
   823  	return v.convertToInt32(int32(binary.LittleEndian.Uint32(b))), nil
   824  }
   825  
   826  func convertByteArrayToInt64(v Value) (Value, error) {
   827  	b := make([]byte, 8)
   828  	copy(b, v.byteArray())
   829  	return v.convertToInt64(int64(binary.LittleEndian.Uint64(b))), nil
   830  }
   831  
   832  func convertByteArrayToInt96(v Value) (Value, error) {
   833  	b := make([]byte, 12)
   834  	copy(b, v.byteArray())
   835  	return v.convertToInt96(deprecated.Int96{
   836  		0: binary.LittleEndian.Uint32(b[0:4]),
   837  		1: binary.LittleEndian.Uint32(b[4:8]),
   838  		2: binary.LittleEndian.Uint32(b[8:12]),
   839  	}), nil
   840  }
   841  
   842  func convertByteArrayToFloat(v Value) (Value, error) {
   843  	b := make([]byte, 4)
   844  	copy(b, v.byteArray())
   845  	return v.convertToFloat(math.Float32frombits(binary.LittleEndian.Uint32(b))), nil
   846  }
   847  
   848  func convertByteArrayToDouble(v Value) (Value, error) {
   849  	b := make([]byte, 8)
   850  	copy(b, v.byteArray())
   851  	return v.convertToDouble(math.Float64frombits(binary.LittleEndian.Uint64(b))), nil
   852  }
   853  
   854  func convertByteArrayToFixedLenByteArray(v Value, size int) (Value, error) {
   855  	b := v.byteArray()
   856  	if len(b) < size {
   857  		c := make([]byte, size)
   858  		copy(c, b)
   859  		b = c
   860  	} else {
   861  		b = b[:size]
   862  	}
   863  	return v.convertToFixedLenByteArray(b), nil
   864  }
   865  
   866  func convertFixedLenByteArrayToString(v Value) (Value, error) {
   867  	b := v.byteArray()
   868  	c := make([]byte, hex.EncodedLen(len(b)))
   869  	hex.Encode(c, b)
   870  	return v.convertToByteArray(c), nil
   871  }
   872  
   873  func convertStringToBoolean(v Value) (Value, error) {
   874  	b, err := strconv.ParseBool(v.string())
   875  	if err != nil {
   876  		return v, conversionError(v, "STRING", "BOOLEAN", err)
   877  	}
   878  	return v.convertToBoolean(b), nil
   879  }
   880  
   881  func convertStringToInt32(v Value) (Value, error) {
   882  	i, err := strconv.ParseInt(v.string(), 10, 32)
   883  	if err != nil {
   884  		return v, conversionError(v, "STRING", "INT32", err)
   885  	}
   886  	return v.convertToInt32(int32(i)), nil
   887  }
   888  
   889  func convertStringToInt64(v Value) (Value, error) {
   890  	i, err := strconv.ParseInt(v.string(), 10, 64)
   891  	if err != nil {
   892  		return v, conversionError(v, "STRING", "INT64", err)
   893  	}
   894  	return v.convertToInt64(i), nil
   895  }
   896  
   897  func convertStringToInt96(v Value) (Value, error) {
   898  	i, ok := new(big.Int).SetString(v.string(), 10)
   899  	if !ok {
   900  		return v, conversionError(v, "STRING", "INT96", strconv.ErrSyntax)
   901  	}
   902  	b := i.Bytes()
   903  	c := make([]byte, 12)
   904  	copy(c, b)
   905  	i96 := deprecated.BytesToInt96(c)
   906  	return v.convertToInt96(i96[0]), nil
   907  }
   908  
   909  func convertStringToFloat(v Value) (Value, error) {
   910  	f, err := strconv.ParseFloat(v.string(), 32)
   911  	if err != nil {
   912  		return v, conversionError(v, "STRING", "FLOAT", err)
   913  	}
   914  	return v.convertToFloat(float32(f)), nil
   915  }
   916  
   917  func convertStringToDouble(v Value) (Value, error) {
   918  	f, err := strconv.ParseFloat(v.string(), 64)
   919  	if err != nil {
   920  		return v, conversionError(v, "STRING", "DOUBLE", err)
   921  	}
   922  	return v.convertToDouble(f), nil
   923  }
   924  
   925  func convertStringToFixedLenByteArray(v Value, size int) (Value, error) {
   926  	b := v.byteArray()
   927  	c := make([]byte, size)
   928  	_, err := hex.Decode(c, b)
   929  	if err != nil {
   930  		return v, conversionError(v, "STRING", "BYTE_ARRAY", err)
   931  	}
   932  	return v.convertToFixedLenByteArray(c), nil
   933  }
   934  
   935  func convertStringToDate(v Value, tz *time.Location) (Value, error) {
   936  	t, err := time.ParseInLocation("2006-01-02", v.string(), tz)
   937  	if err != nil {
   938  		return v, conversionError(v, "STRING", "DATE", err)
   939  	}
   940  	d := daysSinceUnixEpoch(t)
   941  	return v.convertToInt32(int32(d)), nil
   942  }
   943  
   944  func convertStringToTimeMillis(v Value, tz *time.Location) (Value, error) {
   945  	t, err := time.ParseInLocation("15:04:05.999", v.string(), tz)
   946  	if err != nil {
   947  		return v, conversionError(v, "STRING", "TIME", err)
   948  	}
   949  	m := nearestMidnightLessThan(t)
   950  	milliseconds := t.Sub(m).Milliseconds()
   951  	return v.convertToInt32(int32(milliseconds)), nil
   952  }
   953  
   954  func convertStringToTimeMicros(v Value, tz *time.Location) (Value, error) {
   955  	t, err := time.ParseInLocation("15:04:05.999999", v.string(), tz)
   956  	if err != nil {
   957  		return v, conversionError(v, "STRING", "TIME", err)
   958  	}
   959  	m := nearestMidnightLessThan(t)
   960  	microseconds := t.Sub(m).Microseconds()
   961  	return v.convertToInt64(microseconds), nil
   962  }
   963  
   964  func convertDateToTimestamp(v Value, u format.TimeUnit, tz *time.Location) (Value, error) {
   965  	t := unixEpoch.AddDate(0, 0, int(v.int32()))
   966  	d := timeUnitDuration(u)
   967  	return v.convertToInt64(int64(t.In(tz).Sub(unixEpoch) / d)), nil
   968  }
   969  
   970  func convertDateToString(v Value) (Value, error) {
   971  	t := unixEpoch.AddDate(0, 0, int(v.int32()))
   972  	b := t.AppendFormat(make([]byte, 0, 10), "2006-01-02")
   973  	return v.convertToByteArray(b), nil
   974  }
   975  
   976  func convertTimeMillisToString(v Value, tz *time.Location) (Value, error) {
   977  	t := time.UnixMilli(int64(v.int32())).In(tz)
   978  	b := t.AppendFormat(make([]byte, 0, 12), "15:04:05.999")
   979  	return v.convertToByteArray(b), nil
   980  }
   981  
   982  func convertTimeMicrosToString(v Value, tz *time.Location) (Value, error) {
   983  	t := time.UnixMicro(v.int64()).In(tz)
   984  	b := t.AppendFormat(make([]byte, 0, 15), "15:04:05.999999")
   985  	return v.convertToByteArray(b), nil
   986  }
   987  
   988  func convertTimestampToDate(v Value, u format.TimeUnit, tz *time.Location) (Value, error) {
   989  	t := timestamp(v, u, tz)
   990  	d := daysSinceUnixEpoch(t)
   991  	return v.convertToInt32(int32(d)), nil
   992  }
   993  
   994  func convertTimestampToTimeMillis(v Value, u format.TimeUnit, sourceZone, targetZone *time.Location) (Value, error) {
   995  	t := timestamp(v, u, sourceZone)
   996  	m := nearestMidnightLessThan(t)
   997  	milliseconds := t.In(targetZone).Sub(m).Milliseconds()
   998  	return v.convertToInt32(int32(milliseconds)), nil
   999  }
  1000  
  1001  func convertTimestampToTimeMicros(v Value, u format.TimeUnit, sourceZone, targetZone *time.Location) (Value, error) {
  1002  	t := timestamp(v, u, sourceZone)
  1003  	m := nearestMidnightLessThan(t)
  1004  	microseconds := t.In(targetZone).Sub(m).Microseconds()
  1005  	return v.convertToInt64(int64(microseconds)), nil
  1006  }
  1007  
  1008  func convertTimestampToTimestamp(v Value, sourceUnit, targetUnit format.TimeUnit) (Value, error) {
  1009  	sourceScale := timeUnitDuration(sourceUnit).Nanoseconds()
  1010  	targetScale := timeUnitDuration(targetUnit).Nanoseconds()
  1011  	targetValue := (v.int64() * sourceScale) / targetScale
  1012  	return v.convertToInt64(targetValue), nil
  1013  }
  1014  
  1015  const nanosecondsPerDay = 24 * 60 * 60 * 1e9
  1016  
  1017  func daysSinceUnixEpoch(t time.Time) int {
  1018  	return int(t.Sub(unixEpoch).Hours()) / 24
  1019  }
  1020  
  1021  func nearestMidnightLessThan(t time.Time) time.Time {
  1022  	y, m, d := t.Date()
  1023  	return time.Date(y, m, d, 0, 0, 0, 0, t.Location())
  1024  }
  1025  
  1026  func timestamp(v Value, u format.TimeUnit, tz *time.Location) time.Time {
  1027  	return unixEpoch.In(tz).Add(time.Duration(v.int64()) * timeUnitDuration(u))
  1028  }
  1029  
  1030  func timeUnitDuration(unit format.TimeUnit) time.Duration {
  1031  	switch {
  1032  	case unit.Millis != nil:
  1033  		return time.Millisecond
  1034  	case unit.Micros != nil:
  1035  		return time.Microsecond
  1036  	default:
  1037  		return time.Nanosecond
  1038  	}
  1039  }
  1040  
  1041  func invalidConversion(value Value, from, to string) error {
  1042  	return fmt.Errorf("%s to %s: %s: %w", from, to, value, ErrInvalidConversion)
  1043  }
  1044  
  1045  func conversionError(value Value, from, to string, err error) error {
  1046  	return fmt.Errorf("%s to %s: %q: %s: %w", from, to, value.string(), err, ErrInvalidConversion)
  1047  }