github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/convert.go (about) 1 package parquet 2 3 import ( 4 "encoding/binary" 5 "encoding/hex" 6 "fmt" 7 "io" 8 "math" 9 "math/big" 10 "strconv" 11 "sync" 12 "time" 13 14 "github.com/segmentio/parquet-go/deprecated" 15 "github.com/segmentio/parquet-go/encoding" 16 "github.com/segmentio/parquet-go/format" 17 ) 18 19 // ConvertError is an error type returned by calls to Convert when the conversion 20 // of parquet schemas is impossible or the input row for the conversion is 21 // malformed. 22 type ConvertError struct { 23 Path []string 24 From Node 25 To Node 26 } 27 28 // Error satisfies the error interface. 29 func (e *ConvertError) Error() string { 30 sourceType := e.From.Type() 31 targetType := e.To.Type() 32 33 sourceRepetition := fieldRepetitionTypeOf(e.From) 34 targetRepetition := fieldRepetitionTypeOf(e.To) 35 36 return fmt.Sprintf("cannot convert parquet column %q from %s %s to %s %s", 37 columnPath(e.Path), 38 sourceRepetition, 39 sourceType, 40 targetRepetition, 41 targetType, 42 ) 43 } 44 45 // Conversion is an interface implemented by types that provide conversion of 46 // parquet rows from one schema to another. 47 // 48 // Conversion instances must be safe to use concurrently from multiple goroutines. 49 type Conversion interface { 50 // Applies the conversion logic on the src row, returning the result 51 // appended to dst. 52 Convert(rows []Row) (int, error) 53 // Converts the given column index in the target schema to the original 54 // column index in the source schema of the conversion. 55 Column(int) int 56 // Returns the target schema of the conversion. 57 Schema() *Schema 58 } 59 60 type conversion struct { 61 columns []conversionColumn 62 schema *Schema 63 buffers sync.Pool 64 // This field is used to size the column buffers held in the sync.Pool since 65 // they are intended to store the source rows being converted from. 66 numberOfSourceColumns int 67 } 68 69 type conversionBuffer struct { 70 columns [][]Value 71 } 72 73 type conversionColumn struct { 74 sourceIndex int 75 convertValues conversionFunc 76 } 77 78 type conversionFunc func([]Value) error 79 80 func convertToSelf(column []Value) error { return nil } 81 82 //go:noinline 83 func convertToType(targetType, sourceType Type) conversionFunc { 84 return func(column []Value) error { 85 for i, v := range column { 86 v, err := sourceType.ConvertValue(v, targetType) 87 if err != nil { 88 return err 89 } 90 column[i].ptr = v.ptr 91 column[i].u64 = v.u64 92 column[i].kind = v.kind 93 } 94 return nil 95 } 96 } 97 98 //go:noinline 99 func convertToValue(value Value) conversionFunc { 100 return func(column []Value) error { 101 for i := range column { 102 column[i] = value 103 } 104 return nil 105 } 106 } 107 108 //go:noinline 109 func convertToZero(kind Kind) conversionFunc { 110 return func(column []Value) error { 111 for i := range column { 112 column[i].ptr = nil 113 column[i].u64 = 0 114 column[i].kind = ^int8(kind) 115 } 116 return nil 117 } 118 } 119 120 //go:noinline 121 func convertToLevels(repetitionLevels, definitionLevels []byte) conversionFunc { 122 return func(column []Value) error { 123 for i := range column { 124 r := column[i].repetitionLevel 125 d := column[i].definitionLevel 126 column[i].repetitionLevel = repetitionLevels[r] 127 column[i].definitionLevel = definitionLevels[d] 128 } 129 return nil 130 } 131 } 132 133 //go:noinline 134 func multiConversionFunc(conversions []conversionFunc) conversionFunc { 135 switch len(conversions) { 136 case 0: 137 return convertToSelf 138 case 1: 139 return conversions[0] 140 default: 141 return func(column []Value) error { 142 for _, conv := range conversions { 143 if err := conv(column); err != nil { 144 return err 145 } 146 } 147 return nil 148 } 149 } 150 } 151 152 func (c *conversion) getBuffer() *conversionBuffer { 153 b, _ := c.buffers.Get().(*conversionBuffer) 154 if b == nil { 155 b = &conversionBuffer{ 156 columns: make([][]Value, c.numberOfSourceColumns), 157 } 158 values := make([]Value, c.numberOfSourceColumns) 159 for i := range b.columns { 160 b.columns[i] = values[i : i : i+1] 161 } 162 } 163 return b 164 } 165 166 func (c *conversion) putBuffer(b *conversionBuffer) { 167 c.buffers.Put(b) 168 } 169 170 // Convert here satisfies the Conversion interface, and does the actual work 171 // to convert between the source and target Rows. 172 func (c *conversion) Convert(rows []Row) (int, error) { 173 source := c.getBuffer() 174 defer c.putBuffer(source) 175 176 for n, row := range rows { 177 for i, values := range source.columns { 178 source.columns[i] = values[:0] 179 } 180 row.Range(func(columnIndex int, columnValues []Value) bool { 181 source.columns[columnIndex] = append(source.columns[columnIndex], columnValues...) 182 return true 183 }) 184 row = row[:0] 185 186 for columnIndex, conv := range c.columns { 187 columnOffset := len(row) 188 if conv.sourceIndex < 0 { 189 // When there is no source column, we put a single value as 190 // placeholder in the column. This is a condition where the 191 // target contained a column which did not exist at had not 192 // other columns existing at that same level. 193 row = append(row, Value{}) 194 } else { 195 // We must copy to the output row first and not mutate the 196 // source columns because multiple target columns may map to 197 // the same source column. 198 row = append(row, source.columns[conv.sourceIndex]...) 199 } 200 columnValues := row[columnOffset:] 201 202 if err := conv.convertValues(columnValues); err != nil { 203 return n, err 204 } 205 206 // Since the column index may have changed between the source and 207 // taget columns we ensure that the right value is always written 208 // to the output row. 209 for i := range columnValues { 210 columnValues[i].columnIndex = ^int16(columnIndex) 211 } 212 } 213 214 rows[n] = row 215 } 216 217 return len(rows), nil 218 } 219 220 func (c *conversion) Column(i int) int { 221 return c.columns[i].sourceIndex 222 } 223 224 func (c *conversion) Schema() *Schema { 225 return c.schema 226 } 227 228 type identity struct{ schema *Schema } 229 230 func (id identity) Convert(rows []Row) (int, error) { return len(rows), nil } 231 func (id identity) Column(i int) int { return i } 232 func (id identity) Schema() *Schema { return id.schema } 233 234 // Convert constructs a conversion function from one parquet schema to another. 235 // 236 // The function supports converting between schemas where the source or target 237 // have extra columns; if there are more columns in the source, they will be 238 // stripped out of the rows. Extra columns in the target schema will be set to 239 // null or zero values. 240 // 241 // The returned function is intended to be used to append the converted source 242 // row to the destination buffer. 243 func Convert(to, from Node) (conv Conversion, err error) { 244 schema, _ := to.(*Schema) 245 if schema == nil { 246 schema = NewSchema("", to) 247 } 248 249 if nodesAreEqual(to, from) { 250 return identity{schema}, nil 251 } 252 253 targetMapping, targetColumns := columnMappingOf(to) 254 sourceMapping, sourceColumns := columnMappingOf(from) 255 columns := make([]conversionColumn, len(targetColumns)) 256 257 for i, path := range targetColumns { 258 targetColumn := targetMapping.lookup(path) 259 sourceColumn := sourceMapping.lookup(path) 260 261 conversions := []conversionFunc{} 262 if sourceColumn.node != nil { 263 targetType := targetColumn.node.Type() 264 sourceType := sourceColumn.node.Type() 265 if !typesAreEqual(targetType, sourceType) { 266 conversions = append(conversions, 267 convertToType(targetType, sourceType), 268 ) 269 } 270 271 repetitionLevels := make([]byte, len(path)+1) 272 definitionLevels := make([]byte, len(path)+1) 273 targetRepetitionLevel := byte(0) 274 targetDefinitionLevel := byte(0) 275 sourceRepetitionLevel := byte(0) 276 sourceDefinitionLevel := byte(0) 277 targetNode := to 278 sourceNode := from 279 280 for j := 0; j < len(path); j++ { 281 targetNode = fieldByName(targetNode, path[j]) 282 sourceNode = fieldByName(sourceNode, path[j]) 283 284 targetRepetitionLevel, targetDefinitionLevel = applyFieldRepetitionType( 285 fieldRepetitionTypeOf(targetNode), 286 targetRepetitionLevel, 287 targetDefinitionLevel, 288 ) 289 sourceRepetitionLevel, sourceDefinitionLevel = applyFieldRepetitionType( 290 fieldRepetitionTypeOf(sourceNode), 291 sourceRepetitionLevel, 292 sourceDefinitionLevel, 293 ) 294 295 repetitionLevels[sourceRepetitionLevel] = targetRepetitionLevel 296 definitionLevels[sourceDefinitionLevel] = targetDefinitionLevel 297 } 298 299 repetitionLevels = repetitionLevels[:sourceRepetitionLevel+1] 300 definitionLevels = definitionLevels[:sourceDefinitionLevel+1] 301 302 if !isDirectLevelMapping(repetitionLevels) || !isDirectLevelMapping(definitionLevels) { 303 conversions = append(conversions, 304 convertToLevels(repetitionLevels, definitionLevels), 305 ) 306 } 307 308 } else { 309 targetType := targetColumn.node.Type() 310 targetKind := targetType.Kind() 311 sourceColumn = sourceMapping.lookupClosest(path) 312 if sourceColumn.node != nil { 313 conversions = append(conversions, 314 convertToZero(targetKind), 315 ) 316 } else { 317 conversions = append(conversions, 318 convertToValue(ZeroValue(targetKind)), 319 ) 320 } 321 } 322 323 columns[i] = conversionColumn{ 324 sourceIndex: int(sourceColumn.columnIndex), 325 convertValues: multiConversionFunc(conversions), 326 } 327 } 328 329 c := &conversion{ 330 columns: columns, 331 schema: schema, 332 numberOfSourceColumns: len(sourceColumns), 333 } 334 return c, nil 335 } 336 337 func isDirectLevelMapping(levels []byte) bool { 338 for i, level := range levels { 339 if level != byte(i) { 340 return false 341 } 342 } 343 return true 344 } 345 346 // ConvertRowGroup constructs a wrapper of the given row group which applies 347 // the given schema conversion to its rows. 348 func ConvertRowGroup(rowGroup RowGroup, conv Conversion) RowGroup { 349 schema := conv.Schema() 350 numRows := rowGroup.NumRows() 351 rowGroupColumns := rowGroup.ColumnChunks() 352 353 columns := make([]ColumnChunk, numLeafColumnsOf(schema)) 354 forEachLeafColumnOf(schema, func(leaf leafColumn) { 355 i := leaf.columnIndex 356 j := conv.Column(int(leaf.columnIndex)) 357 if j < 0 { 358 columns[i] = &missingColumnChunk{ 359 typ: leaf.node.Type(), 360 column: i, 361 // TODO: we assume the number of values is the same as the 362 // number of rows, which may not be accurate when the column is 363 // part of a repeated group; neighbor columns may be repeated in 364 // which case it would be impossible for this chunk not to be. 365 numRows: numRows, 366 numValues: numRows, 367 numNulls: numRows, 368 } 369 } else { 370 columns[i] = rowGroupColumns[j] 371 } 372 }) 373 374 // Sorting columns must exist on the conversion schema in order to be 375 // advertised on the converted row group otherwise the resulting rows 376 // would not be in the right order. 377 sorting := []SortingColumn{} 378 for _, col := range rowGroup.SortingColumns() { 379 if !hasColumnPath(schema, col.Path()) { 380 break 381 } 382 sorting = append(sorting, col) 383 } 384 385 return &convertedRowGroup{ 386 // The pair of rowGroup+conv is retained to construct a converted row 387 // reader by wrapping the underlying row reader of the row group because 388 // it allows proper reconstruction of the repetition and definition 389 // levels. 390 // 391 // TODO: can we figure out how to set the repetition and definition 392 // levels when reading values from missing column pages? At first sight 393 // it appears complex to do, however: 394 // 395 // * It is possible that having these levels when reading values of 396 // missing column pages is not necessary in some scenarios (e.g. when 397 // merging row groups). 398 // 399 // * We may be able to assume the repetition and definition levels at 400 // the call site (e.g. in the functions reading rows from columns). 401 // 402 // Columns of the source row group which do not exist in the target are 403 // masked to prevent loading unneeded pages when reading rows from the 404 // converted row group. 405 rowGroup: maskMissingRowGroupColumns(rowGroup, len(columns), conv), 406 columns: columns, 407 sorting: sorting, 408 conv: conv, 409 } 410 } 411 412 func maskMissingRowGroupColumns(r RowGroup, numColumns int, conv Conversion) RowGroup { 413 rowGroupColumns := r.ColumnChunks() 414 columns := make([]ColumnChunk, len(rowGroupColumns)) 415 missing := make([]missingColumnChunk, len(columns)) 416 numRows := r.NumRows() 417 418 for i := range missing { 419 missing[i] = missingColumnChunk{ 420 typ: rowGroupColumns[i].Type(), 421 column: int16(i), 422 numRows: numRows, 423 numValues: numRows, 424 numNulls: numRows, 425 } 426 } 427 428 for i := range columns { 429 columns[i] = &missing[i] 430 } 431 432 for i := 0; i < numColumns; i++ { 433 j := conv.Column(i) 434 if j >= 0 && j < len(columns) { 435 columns[j] = rowGroupColumns[j] 436 } 437 } 438 439 return &rowGroup{ 440 schema: r.Schema(), 441 numRows: numRows, 442 columns: columns, 443 } 444 } 445 446 type missingColumnChunk struct { 447 typ Type 448 column int16 449 numRows int64 450 numValues int64 451 numNulls int64 452 } 453 454 func (c *missingColumnChunk) Type() Type { return c.typ } 455 func (c *missingColumnChunk) Column() int { return int(c.column) } 456 func (c *missingColumnChunk) Pages() Pages { return onePage(missingPage{c}) } 457 func (c *missingColumnChunk) ColumnIndex() ColumnIndex { return missingColumnIndex{c} } 458 func (c *missingColumnChunk) OffsetIndex() OffsetIndex { return missingOffsetIndex{} } 459 func (c *missingColumnChunk) BloomFilter() BloomFilter { return missingBloomFilter{} } 460 func (c *missingColumnChunk) NumValues() int64 { return 0 } 461 462 type missingColumnIndex struct{ *missingColumnChunk } 463 464 func (i missingColumnIndex) NumPages() int { return 1 } 465 func (i missingColumnIndex) NullCount(int) int64 { return i.numNulls } 466 func (i missingColumnIndex) NullPage(int) bool { return true } 467 func (i missingColumnIndex) MinValue(int) Value { return Value{} } 468 func (i missingColumnIndex) MaxValue(int) Value { return Value{} } 469 func (i missingColumnIndex) IsAscending() bool { return true } 470 func (i missingColumnIndex) IsDescending() bool { return false } 471 472 type missingOffsetIndex struct{} 473 474 func (missingOffsetIndex) NumPages() int { return 1 } 475 func (missingOffsetIndex) Offset(int) int64 { return 0 } 476 func (missingOffsetIndex) CompressedPageSize(int) int64 { return 0 } 477 func (missingOffsetIndex) FirstRowIndex(int) int64 { return 0 } 478 479 type missingBloomFilter struct{} 480 481 func (missingBloomFilter) ReadAt([]byte, int64) (int, error) { return 0, io.EOF } 482 func (missingBloomFilter) Size() int64 { return 0 } 483 func (missingBloomFilter) Check(Value) (bool, error) { return false, nil } 484 485 type missingPage struct{ *missingColumnChunk } 486 487 func (p missingPage) Column() int { return int(p.column) } 488 func (p missingPage) Dictionary() Dictionary { return nil } 489 func (p missingPage) NumRows() int64 { return p.numRows } 490 func (p missingPage) NumValues() int64 { return p.numValues } 491 func (p missingPage) NumNulls() int64 { return p.numNulls } 492 func (p missingPage) Bounds() (min, max Value, ok bool) { return } 493 func (p missingPage) Slice(i, j int64) Page { return p } 494 func (p missingPage) Size() int64 { return 0 } 495 func (p missingPage) RepetitionLevels() []byte { return nil } 496 func (p missingPage) DefinitionLevels() []byte { return nil } 497 func (p missingPage) Data() encoding.Values { return p.typ.NewValues(nil, nil) } 498 func (p missingPage) Values() ValueReader { return &missingPageValues{page: p} } 499 500 type missingPageValues struct { 501 page missingPage 502 read int64 503 } 504 505 func (r *missingPageValues) ReadValues(values []Value) (int, error) { 506 remain := r.page.numValues - r.read 507 if int64(len(values)) > remain { 508 values = values[:remain] 509 } 510 for i := range values { 511 // TODO: how do we set the repetition and definition levels here? 512 values[i] = Value{columnIndex: ^r.page.column} 513 } 514 if r.read += int64(len(values)); r.read == r.page.numValues { 515 return len(values), io.EOF 516 } 517 return len(values), nil 518 } 519 520 func (r *missingPageValues) Close() error { 521 r.read = r.page.numValues 522 return nil 523 } 524 525 type convertedRowGroup struct { 526 rowGroup RowGroup 527 columns []ColumnChunk 528 sorting []SortingColumn 529 conv Conversion 530 } 531 532 func (c *convertedRowGroup) NumRows() int64 { return c.rowGroup.NumRows() } 533 func (c *convertedRowGroup) ColumnChunks() []ColumnChunk { return c.columns } 534 func (c *convertedRowGroup) Schema() *Schema { return c.conv.Schema() } 535 func (c *convertedRowGroup) SortingColumns() []SortingColumn { return c.sorting } 536 func (c *convertedRowGroup) Rows() Rows { 537 rows := c.rowGroup.Rows() 538 return &convertedRows{ 539 Closer: rows, 540 rows: rows, 541 conv: c.conv, 542 } 543 } 544 545 // ConvertRowReader constructs a wrapper of the given row reader which applies 546 // the given schema conversion to the rows. 547 func ConvertRowReader(rows RowReader, conv Conversion) RowReaderWithSchema { 548 return &convertedRows{rows: &forwardRowSeeker{rows: rows}, conv: conv} 549 } 550 551 type convertedRows struct { 552 io.Closer 553 rows RowReadSeeker 554 conv Conversion 555 } 556 557 func (c *convertedRows) ReadRows(rows []Row) (int, error) { 558 n, err := c.rows.ReadRows(rows) 559 if n > 0 { 560 var convErr error 561 n, convErr = c.conv.Convert(rows[:n]) 562 if convErr != nil { 563 err = convErr 564 } 565 } 566 return n, err 567 } 568 569 func (c *convertedRows) Schema() *Schema { 570 return c.conv.Schema() 571 } 572 573 func (c *convertedRows) SeekToRow(rowIndex int64) error { 574 return c.rows.SeekToRow(rowIndex) 575 } 576 577 var ( 578 trueBytes = []byte(`true`) 579 falseBytes = []byte(`false`) 580 unixEpoch = time.Date(1970, time.January, 1, 0, 0, 0, 0, time.UTC) 581 ) 582 583 func convertBooleanToInt32(v Value) (Value, error) { 584 return v.convertToInt32(int32(v.byte())), nil 585 } 586 587 func convertBooleanToInt64(v Value) (Value, error) { 588 return v.convertToInt64(int64(v.byte())), nil 589 } 590 591 func convertBooleanToInt96(v Value) (Value, error) { 592 return v.convertToInt96(deprecated.Int96{0: uint32(v.byte())}), nil 593 } 594 595 func convertBooleanToFloat(v Value) (Value, error) { 596 return v.convertToFloat(float32(v.byte())), nil 597 } 598 599 func convertBooleanToDouble(v Value) (Value, error) { 600 return v.convertToDouble(float64(v.byte())), nil 601 } 602 603 func convertBooleanToByteArray(v Value) (Value, error) { 604 return v.convertToByteArray([]byte{v.byte()}), nil 605 } 606 607 func convertBooleanToFixedLenByteArray(v Value, size int) (Value, error) { 608 b := []byte{v.byte()} 609 c := make([]byte, size) 610 copy(c, b) 611 return v.convertToFixedLenByteArray(c), nil 612 } 613 614 func convertBooleanToString(v Value) (Value, error) { 615 b := ([]byte)(nil) 616 if v.boolean() { 617 b = trueBytes 618 } else { 619 b = falseBytes 620 } 621 return v.convertToByteArray(b), nil 622 } 623 624 func convertInt32ToBoolean(v Value) (Value, error) { 625 return v.convertToBoolean(v.int32() != 0), nil 626 } 627 628 func convertInt32ToInt64(v Value) (Value, error) { 629 return v.convertToInt64(int64(v.int32())), nil 630 } 631 632 func convertInt32ToInt96(v Value) (Value, error) { 633 return v.convertToInt96(deprecated.Int32ToInt96(v.int32())), nil 634 } 635 636 func convertInt32ToFloat(v Value) (Value, error) { 637 return v.convertToFloat(float32(v.int32())), nil 638 } 639 640 func convertInt32ToDouble(v Value) (Value, error) { 641 return v.convertToDouble(float64(v.int32())), nil 642 } 643 644 func convertInt32ToByteArray(v Value) (Value, error) { 645 b := make([]byte, 4) 646 binary.LittleEndian.PutUint32(b, v.uint32()) 647 return v.convertToByteArray(b), nil 648 } 649 650 func convertInt32ToFixedLenByteArray(v Value, size int) (Value, error) { 651 b := make([]byte, 4) 652 c := make([]byte, size) 653 binary.LittleEndian.PutUint32(b, v.uint32()) 654 copy(c, b) 655 return v.convertToFixedLenByteArray(c), nil 656 } 657 658 func convertInt32ToString(v Value) (Value, error) { 659 return v.convertToByteArray(strconv.AppendInt(nil, int64(v.int32()), 10)), nil 660 } 661 662 func convertInt64ToBoolean(v Value) (Value, error) { 663 return v.convertToBoolean(v.int64() != 0), nil 664 } 665 666 func convertInt64ToInt32(v Value) (Value, error) { 667 return v.convertToInt32(int32(v.int64())), nil 668 } 669 670 func convertInt64ToInt96(v Value) (Value, error) { 671 return v.convertToInt96(deprecated.Int64ToInt96(v.int64())), nil 672 } 673 674 func convertInt64ToFloat(v Value) (Value, error) { 675 return v.convertToFloat(float32(v.int64())), nil 676 } 677 678 func convertInt64ToDouble(v Value) (Value, error) { 679 return v.convertToDouble(float64(v.int64())), nil 680 } 681 682 func convertInt64ToByteArray(v Value) (Value, error) { 683 b := make([]byte, 8) 684 binary.LittleEndian.PutUint64(b, v.uint64()) 685 return v.convertToByteArray(b), nil 686 } 687 688 func convertInt64ToFixedLenByteArray(v Value, size int) (Value, error) { 689 b := make([]byte, 8) 690 c := make([]byte, size) 691 binary.LittleEndian.PutUint64(b, v.uint64()) 692 copy(c, b) 693 return v.convertToFixedLenByteArray(c), nil 694 } 695 696 func convertInt64ToString(v Value) (Value, error) { 697 return v.convertToByteArray(strconv.AppendInt(nil, v.int64(), 10)), nil 698 } 699 700 func convertInt96ToBoolean(v Value) (Value, error) { 701 return v.convertToBoolean(!v.int96().IsZero()), nil 702 } 703 704 func convertInt96ToInt32(v Value) (Value, error) { 705 return v.convertToInt32(v.int96().Int32()), nil 706 } 707 708 func convertInt96ToInt64(v Value) (Value, error) { 709 return v.convertToInt64(v.int96().Int64()), nil 710 } 711 712 func convertInt96ToFloat(v Value) (Value, error) { 713 return v, invalidConversion(v, "INT96", "FLOAT") 714 } 715 716 func convertInt96ToDouble(v Value) (Value, error) { 717 return v, invalidConversion(v, "INT96", "DOUBLE") 718 } 719 720 func convertInt96ToByteArray(v Value) (Value, error) { 721 return v.convertToByteArray(v.byteArray()), nil 722 } 723 724 func convertInt96ToFixedLenByteArray(v Value, size int) (Value, error) { 725 b := v.byteArray() 726 if len(b) < size { 727 c := make([]byte, size) 728 copy(c, b) 729 b = c 730 } else { 731 b = b[:size] 732 } 733 return v.convertToFixedLenByteArray(b), nil 734 } 735 736 func convertInt96ToString(v Value) (Value, error) { 737 return v.convertToByteArray([]byte(v.String())), nil 738 } 739 740 func convertFloatToBoolean(v Value) (Value, error) { 741 return v.convertToBoolean(v.float() != 0), nil 742 } 743 744 func convertFloatToInt32(v Value) (Value, error) { 745 return v.convertToInt32(int32(v.float())), nil 746 } 747 748 func convertFloatToInt64(v Value) (Value, error) { 749 return v.convertToInt64(int64(v.float())), nil 750 } 751 752 func convertFloatToInt96(v Value) (Value, error) { 753 return v, invalidConversion(v, "FLOAT", "INT96") 754 } 755 756 func convertFloatToDouble(v Value) (Value, error) { 757 return v.convertToDouble(float64(v.float())), nil 758 } 759 760 func convertFloatToByteArray(v Value) (Value, error) { 761 b := make([]byte, 4) 762 binary.LittleEndian.PutUint32(b, v.uint32()) 763 return v.convertToByteArray(b), nil 764 } 765 766 func convertFloatToFixedLenByteArray(v Value, size int) (Value, error) { 767 b := make([]byte, 4) 768 c := make([]byte, size) 769 binary.LittleEndian.PutUint32(b, v.uint32()) 770 copy(c, b) 771 return v.convertToFixedLenByteArray(c), nil 772 } 773 774 func convertFloatToString(v Value) (Value, error) { 775 return v.convertToByteArray(strconv.AppendFloat(nil, float64(v.float()), 'g', -1, 32)), nil 776 } 777 778 func convertDoubleToBoolean(v Value) (Value, error) { 779 return v.convertToBoolean(v.double() != 0), nil 780 } 781 782 func convertDoubleToInt32(v Value) (Value, error) { 783 return v.convertToInt32(int32(v.double())), nil 784 } 785 786 func convertDoubleToInt64(v Value) (Value, error) { 787 return v.convertToInt64(int64(v.double())), nil 788 } 789 790 func convertDoubleToInt96(v Value) (Value, error) { 791 return v, invalidConversion(v, "FLOAT", "INT96") 792 } 793 794 func convertDoubleToFloat(v Value) (Value, error) { 795 return v.convertToFloat(float32(v.double())), nil 796 } 797 798 func convertDoubleToByteArray(v Value) (Value, error) { 799 b := make([]byte, 8) 800 binary.LittleEndian.PutUint64(b, v.uint64()) 801 return v.convertToByteArray(b), nil 802 } 803 804 func convertDoubleToFixedLenByteArray(v Value, size int) (Value, error) { 805 b := make([]byte, 8) 806 c := make([]byte, size) 807 binary.LittleEndian.PutUint64(b, v.uint64()) 808 copy(c, b) 809 return v.convertToFixedLenByteArray(c), nil 810 } 811 812 func convertDoubleToString(v Value) (Value, error) { 813 return v.convertToByteArray(strconv.AppendFloat(nil, v.double(), 'g', -1, 64)), nil 814 } 815 816 func convertByteArrayToBoolean(v Value) (Value, error) { 817 return v.convertToBoolean(!isZero(v.byteArray())), nil 818 } 819 820 func convertByteArrayToInt32(v Value) (Value, error) { 821 b := make([]byte, 4) 822 copy(b, v.byteArray()) 823 return v.convertToInt32(int32(binary.LittleEndian.Uint32(b))), nil 824 } 825 826 func convertByteArrayToInt64(v Value) (Value, error) { 827 b := make([]byte, 8) 828 copy(b, v.byteArray()) 829 return v.convertToInt64(int64(binary.LittleEndian.Uint64(b))), nil 830 } 831 832 func convertByteArrayToInt96(v Value) (Value, error) { 833 b := make([]byte, 12) 834 copy(b, v.byteArray()) 835 return v.convertToInt96(deprecated.Int96{ 836 0: binary.LittleEndian.Uint32(b[0:4]), 837 1: binary.LittleEndian.Uint32(b[4:8]), 838 2: binary.LittleEndian.Uint32(b[8:12]), 839 }), nil 840 } 841 842 func convertByteArrayToFloat(v Value) (Value, error) { 843 b := make([]byte, 4) 844 copy(b, v.byteArray()) 845 return v.convertToFloat(math.Float32frombits(binary.LittleEndian.Uint32(b))), nil 846 } 847 848 func convertByteArrayToDouble(v Value) (Value, error) { 849 b := make([]byte, 8) 850 copy(b, v.byteArray()) 851 return v.convertToDouble(math.Float64frombits(binary.LittleEndian.Uint64(b))), nil 852 } 853 854 func convertByteArrayToFixedLenByteArray(v Value, size int) (Value, error) { 855 b := v.byteArray() 856 if len(b) < size { 857 c := make([]byte, size) 858 copy(c, b) 859 b = c 860 } else { 861 b = b[:size] 862 } 863 return v.convertToFixedLenByteArray(b), nil 864 } 865 866 func convertFixedLenByteArrayToString(v Value) (Value, error) { 867 b := v.byteArray() 868 c := make([]byte, hex.EncodedLen(len(b))) 869 hex.Encode(c, b) 870 return v.convertToByteArray(c), nil 871 } 872 873 func convertStringToBoolean(v Value) (Value, error) { 874 b, err := strconv.ParseBool(v.string()) 875 if err != nil { 876 return v, conversionError(v, "STRING", "BOOLEAN", err) 877 } 878 return v.convertToBoolean(b), nil 879 } 880 881 func convertStringToInt32(v Value) (Value, error) { 882 i, err := strconv.ParseInt(v.string(), 10, 32) 883 if err != nil { 884 return v, conversionError(v, "STRING", "INT32", err) 885 } 886 return v.convertToInt32(int32(i)), nil 887 } 888 889 func convertStringToInt64(v Value) (Value, error) { 890 i, err := strconv.ParseInt(v.string(), 10, 64) 891 if err != nil { 892 return v, conversionError(v, "STRING", "INT64", err) 893 } 894 return v.convertToInt64(i), nil 895 } 896 897 func convertStringToInt96(v Value) (Value, error) { 898 i, ok := new(big.Int).SetString(v.string(), 10) 899 if !ok { 900 return v, conversionError(v, "STRING", "INT96", strconv.ErrSyntax) 901 } 902 b := i.Bytes() 903 c := make([]byte, 12) 904 copy(c, b) 905 i96 := deprecated.BytesToInt96(c) 906 return v.convertToInt96(i96[0]), nil 907 } 908 909 func convertStringToFloat(v Value) (Value, error) { 910 f, err := strconv.ParseFloat(v.string(), 32) 911 if err != nil { 912 return v, conversionError(v, "STRING", "FLOAT", err) 913 } 914 return v.convertToFloat(float32(f)), nil 915 } 916 917 func convertStringToDouble(v Value) (Value, error) { 918 f, err := strconv.ParseFloat(v.string(), 64) 919 if err != nil { 920 return v, conversionError(v, "STRING", "DOUBLE", err) 921 } 922 return v.convertToDouble(f), nil 923 } 924 925 func convertStringToFixedLenByteArray(v Value, size int) (Value, error) { 926 b := v.byteArray() 927 c := make([]byte, size) 928 _, err := hex.Decode(c, b) 929 if err != nil { 930 return v, conversionError(v, "STRING", "BYTE_ARRAY", err) 931 } 932 return v.convertToFixedLenByteArray(c), nil 933 } 934 935 func convertStringToDate(v Value, tz *time.Location) (Value, error) { 936 t, err := time.ParseInLocation("2006-01-02", v.string(), tz) 937 if err != nil { 938 return v, conversionError(v, "STRING", "DATE", err) 939 } 940 d := daysSinceUnixEpoch(t) 941 return v.convertToInt32(int32(d)), nil 942 } 943 944 func convertStringToTimeMillis(v Value, tz *time.Location) (Value, error) { 945 t, err := time.ParseInLocation("15:04:05.999", v.string(), tz) 946 if err != nil { 947 return v, conversionError(v, "STRING", "TIME", err) 948 } 949 m := nearestMidnightLessThan(t) 950 milliseconds := t.Sub(m).Milliseconds() 951 return v.convertToInt32(int32(milliseconds)), nil 952 } 953 954 func convertStringToTimeMicros(v Value, tz *time.Location) (Value, error) { 955 t, err := time.ParseInLocation("15:04:05.999999", v.string(), tz) 956 if err != nil { 957 return v, conversionError(v, "STRING", "TIME", err) 958 } 959 m := nearestMidnightLessThan(t) 960 microseconds := t.Sub(m).Microseconds() 961 return v.convertToInt64(microseconds), nil 962 } 963 964 func convertDateToTimestamp(v Value, u format.TimeUnit, tz *time.Location) (Value, error) { 965 t := unixEpoch.AddDate(0, 0, int(v.int32())) 966 d := timeUnitDuration(u) 967 return v.convertToInt64(int64(t.In(tz).Sub(unixEpoch) / d)), nil 968 } 969 970 func convertDateToString(v Value) (Value, error) { 971 t := unixEpoch.AddDate(0, 0, int(v.int32())) 972 b := t.AppendFormat(make([]byte, 0, 10), "2006-01-02") 973 return v.convertToByteArray(b), nil 974 } 975 976 func convertTimeMillisToString(v Value, tz *time.Location) (Value, error) { 977 t := time.UnixMilli(int64(v.int32())).In(tz) 978 b := t.AppendFormat(make([]byte, 0, 12), "15:04:05.999") 979 return v.convertToByteArray(b), nil 980 } 981 982 func convertTimeMicrosToString(v Value, tz *time.Location) (Value, error) { 983 t := time.UnixMicro(v.int64()).In(tz) 984 b := t.AppendFormat(make([]byte, 0, 15), "15:04:05.999999") 985 return v.convertToByteArray(b), nil 986 } 987 988 func convertTimestampToDate(v Value, u format.TimeUnit, tz *time.Location) (Value, error) { 989 t := timestamp(v, u, tz) 990 d := daysSinceUnixEpoch(t) 991 return v.convertToInt32(int32(d)), nil 992 } 993 994 func convertTimestampToTimeMillis(v Value, u format.TimeUnit, sourceZone, targetZone *time.Location) (Value, error) { 995 t := timestamp(v, u, sourceZone) 996 m := nearestMidnightLessThan(t) 997 milliseconds := t.In(targetZone).Sub(m).Milliseconds() 998 return v.convertToInt32(int32(milliseconds)), nil 999 } 1000 1001 func convertTimestampToTimeMicros(v Value, u format.TimeUnit, sourceZone, targetZone *time.Location) (Value, error) { 1002 t := timestamp(v, u, sourceZone) 1003 m := nearestMidnightLessThan(t) 1004 microseconds := t.In(targetZone).Sub(m).Microseconds() 1005 return v.convertToInt64(int64(microseconds)), nil 1006 } 1007 1008 func convertTimestampToTimestamp(v Value, sourceUnit, targetUnit format.TimeUnit) (Value, error) { 1009 sourceScale := timeUnitDuration(sourceUnit).Nanoseconds() 1010 targetScale := timeUnitDuration(targetUnit).Nanoseconds() 1011 targetValue := (v.int64() * sourceScale) / targetScale 1012 return v.convertToInt64(targetValue), nil 1013 } 1014 1015 const nanosecondsPerDay = 24 * 60 * 60 * 1e9 1016 1017 func daysSinceUnixEpoch(t time.Time) int { 1018 return int(t.Sub(unixEpoch).Hours()) / 24 1019 } 1020 1021 func nearestMidnightLessThan(t time.Time) time.Time { 1022 y, m, d := t.Date() 1023 return time.Date(y, m, d, 0, 0, 0, 0, t.Location()) 1024 } 1025 1026 func timestamp(v Value, u format.TimeUnit, tz *time.Location) time.Time { 1027 return unixEpoch.In(tz).Add(time.Duration(v.int64()) * timeUnitDuration(u)) 1028 } 1029 1030 func timeUnitDuration(unit format.TimeUnit) time.Duration { 1031 switch { 1032 case unit.Millis != nil: 1033 return time.Millisecond 1034 case unit.Micros != nil: 1035 return time.Microsecond 1036 default: 1037 return time.Nanosecond 1038 } 1039 } 1040 1041 func invalidConversion(value Value, from, to string) error { 1042 return fmt.Errorf("%s to %s: %s: %w", from, to, value, ErrInvalidConversion) 1043 } 1044 1045 func conversionError(value Value, from, to string, err error) error { 1046 return fmt.Errorf("%s to %s: %q: %s: %w", from, to, value.string(), err, ErrInvalidConversion) 1047 }