github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/convert.go (about) 1 package parquet 2 3 import ( 4 "encoding/binary" 5 "encoding/hex" 6 "fmt" 7 "io" 8 "math" 9 "math/big" 10 "strconv" 11 "sync" 12 "time" 13 14 "github.com/parquet-go/parquet-go/deprecated" 15 "github.com/parquet-go/parquet-go/encoding" 16 "github.com/parquet-go/parquet-go/format" 17 ) 18 19 // ConvertError is an error type returned by calls to Convert when the conversion 20 // of parquet schemas is impossible or the input row for the conversion is 21 // malformed. 22 type ConvertError struct { 23 Path []string 24 From Node 25 To Node 26 } 27 28 // Error satisfies the error interface. 29 func (e *ConvertError) Error() string { 30 sourceType := e.From.Type() 31 targetType := e.To.Type() 32 33 sourceRepetition := fieldRepetitionTypeOf(e.From) 34 targetRepetition := fieldRepetitionTypeOf(e.To) 35 36 return fmt.Sprintf("cannot convert parquet column %q from %s %s to %s %s", 37 columnPath(e.Path), 38 sourceRepetition, 39 sourceType, 40 targetRepetition, 41 targetType, 42 ) 43 } 44 45 // Conversion is an interface implemented by types that provide conversion of 46 // parquet rows from one schema to another. 47 // 48 // Conversion instances must be safe to use concurrently from multiple goroutines. 49 type Conversion interface { 50 // Applies the conversion logic on the src row, returning the result 51 // appended to dst. 52 Convert(rows []Row) (int, error) 53 // Converts the given column index in the target schema to the original 54 // column index in the source schema of the conversion. 55 Column(int) int 56 // Returns the target schema of the conversion. 57 Schema() *Schema 58 } 59 60 type conversion struct { 61 columns []conversionColumn 62 schema *Schema 63 buffers sync.Pool 64 // This field is used to size the column buffers held in the sync.Pool since 65 // they are intended to store the source rows being converted from. 66 numberOfSourceColumns int 67 } 68 69 type conversionBuffer struct { 70 columns [][]Value 71 } 72 73 type conversionColumn struct { 74 sourceIndex int 75 convertValues conversionFunc 76 } 77 78 type conversionFunc func([]Value) error 79 80 func convertToSelf(column []Value) error { return nil } 81 82 //go:noinline 83 func convertToType(targetType, sourceType Type) conversionFunc { 84 return func(column []Value) error { 85 for i, v := range column { 86 v, err := sourceType.ConvertValue(v, targetType) 87 if err != nil { 88 return err 89 } 90 column[i].ptr = v.ptr 91 column[i].u64 = v.u64 92 column[i].kind = v.kind 93 } 94 return nil 95 } 96 } 97 98 //go:noinline 99 func convertToValue(value Value) conversionFunc { 100 return func(column []Value) error { 101 for i := range column { 102 column[i] = value 103 } 104 return nil 105 } 106 } 107 108 //go:noinline 109 func convertToZero(kind Kind) conversionFunc { 110 return func(column []Value) error { 111 for i := range column { 112 column[i].ptr = nil 113 column[i].u64 = 0 114 column[i].kind = ^int8(kind) 115 } 116 return nil 117 } 118 } 119 120 //go:noinline 121 func convertToLevels(repetitionLevels, definitionLevels []byte) conversionFunc { 122 return func(column []Value) error { 123 for i := range column { 124 r := column[i].repetitionLevel 125 d := column[i].definitionLevel 126 column[i].repetitionLevel = repetitionLevels[r] 127 column[i].definitionLevel = definitionLevels[d] 128 } 129 return nil 130 } 131 } 132 133 //go:noinline 134 func multiConversionFunc(conversions []conversionFunc) conversionFunc { 135 switch len(conversions) { 136 case 0: 137 return convertToSelf 138 case 1: 139 return conversions[0] 140 default: 141 return func(column []Value) error { 142 for _, conv := range conversions { 143 if err := conv(column); err != nil { 144 return err 145 } 146 } 147 return nil 148 } 149 } 150 } 151 152 func (c *conversion) getBuffer() *conversionBuffer { 153 b, _ := c.buffers.Get().(*conversionBuffer) 154 if b == nil { 155 b = &conversionBuffer{ 156 columns: make([][]Value, c.numberOfSourceColumns), 157 } 158 values := make([]Value, c.numberOfSourceColumns) 159 for i := range b.columns { 160 b.columns[i] = values[i : i : i+1] 161 } 162 } 163 return b 164 } 165 166 func (c *conversion) putBuffer(b *conversionBuffer) { 167 c.buffers.Put(b) 168 } 169 170 // Convert here satisfies the Conversion interface, and does the actual work 171 // to convert between the source and target Rows. 172 func (c *conversion) Convert(rows []Row) (int, error) { 173 source := c.getBuffer() 174 defer c.putBuffer(source) 175 176 for n, row := range rows { 177 for i, values := range source.columns { 178 source.columns[i] = values[:0] 179 } 180 row.Range(func(columnIndex int, columnValues []Value) bool { 181 source.columns[columnIndex] = append(source.columns[columnIndex], columnValues...) 182 return true 183 }) 184 row = row[:0] 185 186 for columnIndex, conv := range c.columns { 187 columnOffset := len(row) 188 if conv.sourceIndex < 0 { 189 // When there is no source column, we put a single value as 190 // placeholder in the column. This is a condition where the 191 // target contained a column which did not exist at had not 192 // other columns existing at that same level. 193 row = append(row, Value{}) 194 } else { 195 // We must copy to the output row first and not mutate the 196 // source columns because multiple target columns may map to 197 // the same source column. 198 row = append(row, source.columns[conv.sourceIndex]...) 199 } 200 columnValues := row[columnOffset:] 201 202 if err := conv.convertValues(columnValues); err != nil { 203 return n, err 204 } 205 206 // Since the column index may have changed between the source and 207 // taget columns we ensure that the right value is always written 208 // to the output row. 209 for i := range columnValues { 210 columnValues[i].columnIndex = ^int16(columnIndex) 211 } 212 } 213 214 rows[n] = row 215 } 216 217 return len(rows), nil 218 } 219 220 func (c *conversion) Column(i int) int { 221 return c.columns[i].sourceIndex 222 } 223 224 func (c *conversion) Schema() *Schema { 225 return c.schema 226 } 227 228 type identity struct{ schema *Schema } 229 230 func (id identity) Convert(rows []Row) (int, error) { return len(rows), nil } 231 func (id identity) Column(i int) int { return i } 232 func (id identity) Schema() *Schema { return id.schema } 233 234 // Convert constructs a conversion function from one parquet schema to another. 235 // 236 // The function supports converting between schemas where the source or target 237 // have extra columns; if there are more columns in the source, they will be 238 // stripped out of the rows. Extra columns in the target schema will be set to 239 // null or zero values. 240 // 241 // The returned function is intended to be used to append the converted source 242 // row to the destination buffer. 243 func Convert(to, from Node) (conv Conversion, err error) { 244 schema, _ := to.(*Schema) 245 if schema == nil { 246 schema = NewSchema("", to) 247 } 248 249 if nodesAreEqual(to, from) { 250 return identity{schema}, nil 251 } 252 253 targetMapping, targetColumns := columnMappingOf(to) 254 sourceMapping, sourceColumns := columnMappingOf(from) 255 columns := make([]conversionColumn, len(targetColumns)) 256 257 for i, path := range targetColumns { 258 targetColumn := targetMapping.lookup(path) 259 sourceColumn := sourceMapping.lookup(path) 260 261 conversions := []conversionFunc{} 262 if sourceColumn.node != nil { 263 targetType := targetColumn.node.Type() 264 sourceType := sourceColumn.node.Type() 265 if !typesAreEqual(targetType, sourceType) { 266 conversions = append(conversions, 267 convertToType(targetType, sourceType), 268 ) 269 } 270 271 repetitionLevels := make([]byte, len(path)+1) 272 definitionLevels := make([]byte, len(path)+1) 273 targetRepetitionLevel := byte(0) 274 targetDefinitionLevel := byte(0) 275 sourceRepetitionLevel := byte(0) 276 sourceDefinitionLevel := byte(0) 277 targetNode := to 278 sourceNode := from 279 280 for j := 0; j < len(path); j++ { 281 targetNode = fieldByName(targetNode, path[j]) 282 sourceNode = fieldByName(sourceNode, path[j]) 283 284 targetRepetitionLevel, targetDefinitionLevel = applyFieldRepetitionType( 285 fieldRepetitionTypeOf(targetNode), 286 targetRepetitionLevel, 287 targetDefinitionLevel, 288 ) 289 sourceRepetitionLevel, sourceDefinitionLevel = applyFieldRepetitionType( 290 fieldRepetitionTypeOf(sourceNode), 291 sourceRepetitionLevel, 292 sourceDefinitionLevel, 293 ) 294 295 repetitionLevels[sourceRepetitionLevel] = targetRepetitionLevel 296 definitionLevels[sourceDefinitionLevel] = targetDefinitionLevel 297 } 298 299 repetitionLevels = repetitionLevels[:sourceRepetitionLevel+1] 300 definitionLevels = definitionLevels[:sourceDefinitionLevel+1] 301 302 if !isDirectLevelMapping(repetitionLevels) || !isDirectLevelMapping(definitionLevels) { 303 conversions = append(conversions, 304 convertToLevels(repetitionLevels, definitionLevels), 305 ) 306 } 307 308 } else { 309 targetType := targetColumn.node.Type() 310 targetKind := targetType.Kind() 311 sourceColumn = sourceMapping.lookupClosest(path) 312 if sourceColumn.node != nil { 313 conversions = append(conversions, 314 convertToZero(targetKind), 315 ) 316 } else { 317 conversions = append(conversions, 318 convertToValue(ZeroValue(targetKind)), 319 ) 320 } 321 } 322 323 columns[i] = conversionColumn{ 324 sourceIndex: int(sourceColumn.columnIndex), 325 convertValues: multiConversionFunc(conversions), 326 } 327 } 328 329 c := &conversion{ 330 columns: columns, 331 schema: schema, 332 numberOfSourceColumns: len(sourceColumns), 333 } 334 return c, nil 335 } 336 337 func isDirectLevelMapping(levels []byte) bool { 338 for i, level := range levels { 339 if level != byte(i) { 340 return false 341 } 342 } 343 return true 344 } 345 346 // ConvertRowGroup constructs a wrapper of the given row group which applies 347 // the given schema conversion to its rows. 348 func ConvertRowGroup(rowGroup RowGroup, conv Conversion) RowGroup { 349 schema := conv.Schema() 350 numRows := rowGroup.NumRows() 351 rowGroupColumns := rowGroup.ColumnChunks() 352 353 columns := make([]ColumnChunk, numLeafColumnsOf(schema)) 354 forEachLeafColumnOf(schema, func(leaf leafColumn) { 355 i := leaf.columnIndex 356 j := conv.Column(int(leaf.columnIndex)) 357 if j < 0 { 358 columns[i] = &missingColumnChunk{ 359 typ: leaf.node.Type(), 360 column: i, 361 // TODO: we assume the number of values is the same as the 362 // number of rows, which may not be accurate when the column is 363 // part of a repeated group; neighbor columns may be repeated in 364 // which case it would be impossible for this chunk not to be. 365 numRows: numRows, 366 numValues: numRows, 367 numNulls: numRows, 368 } 369 } else { 370 columns[i] = rowGroupColumns[j] 371 } 372 }) 373 374 // Sorting columns must exist on the conversion schema in order to be 375 // advertised on the converted row group otherwise the resulting rows 376 // would not be in the right order. 377 sorting := []SortingColumn{} 378 for _, col := range rowGroup.SortingColumns() { 379 if !hasColumnPath(schema, col.Path()) { 380 break 381 } 382 sorting = append(sorting, col) 383 } 384 385 return &convertedRowGroup{ 386 // The pair of rowGroup+conv is retained to construct a converted row 387 // reader by wrapping the underlying row reader of the row group because 388 // it allows proper reconstruction of the repetition and definition 389 // levels. 390 // 391 // TODO: can we figure out how to set the repetition and definition 392 // levels when reading values from missing column pages? At first sight 393 // it appears complex to do, however: 394 // 395 // * It is possible that having these levels when reading values of 396 // missing column pages is not necessary in some scenarios (e.g. when 397 // merging row groups). 398 // 399 // * We may be able to assume the repetition and definition levels at 400 // the call site (e.g. in the functions reading rows from columns). 401 // 402 // Columns of the source row group which do not exist in the target are 403 // masked to prevent loading unneeded pages when reading rows from the 404 // converted row group. 405 rowGroup: maskMissingRowGroupColumns(rowGroup, len(columns), conv), 406 columns: columns, 407 sorting: sorting, 408 conv: conv, 409 } 410 } 411 412 func maskMissingRowGroupColumns(r RowGroup, numColumns int, conv Conversion) RowGroup { 413 rowGroupColumns := r.ColumnChunks() 414 columns := make([]ColumnChunk, len(rowGroupColumns)) 415 missing := make([]missingColumnChunk, len(columns)) 416 numRows := r.NumRows() 417 418 for i := range missing { 419 missing[i] = missingColumnChunk{ 420 typ: rowGroupColumns[i].Type(), 421 column: int16(i), 422 numRows: numRows, 423 numValues: numRows, 424 numNulls: numRows, 425 } 426 } 427 428 for i := range columns { 429 columns[i] = &missing[i] 430 } 431 432 for i := 0; i < numColumns; i++ { 433 j := conv.Column(i) 434 if j >= 0 && j < len(columns) { 435 columns[j] = rowGroupColumns[j] 436 } 437 } 438 439 return &rowGroup{ 440 schema: r.Schema(), 441 numRows: numRows, 442 columns: columns, 443 } 444 } 445 446 type missingColumnChunk struct { 447 typ Type 448 column int16 449 numRows int64 450 numValues int64 451 numNulls int64 452 } 453 454 func (c *missingColumnChunk) Type() Type { return c.typ } 455 func (c *missingColumnChunk) Column() int { return int(c.column) } 456 func (c *missingColumnChunk) Pages() Pages { return onePage(missingPage{c}) } 457 func (c *missingColumnChunk) ColumnIndex() (ColumnIndex, error) { return missingColumnIndex{c}, nil } 458 func (c *missingColumnChunk) OffsetIndex() (OffsetIndex, error) { return missingOffsetIndex{}, nil } 459 func (c *missingColumnChunk) BloomFilter() BloomFilter { return missingBloomFilter{} } 460 func (c *missingColumnChunk) NumValues() int64 { return c.numValues } 461 462 type missingColumnIndex struct{ *missingColumnChunk } 463 464 func (i missingColumnIndex) NumPages() int { return 1 } 465 func (i missingColumnIndex) NullCount(int) int64 { return i.numNulls } 466 func (i missingColumnIndex) NullPage(int) bool { return true } 467 func (i missingColumnIndex) MinValue(int) Value { return Value{} } 468 func (i missingColumnIndex) MaxValue(int) Value { return Value{} } 469 func (i missingColumnIndex) IsAscending() bool { return true } 470 func (i missingColumnIndex) IsDescending() bool { return false } 471 472 type missingOffsetIndex struct{} 473 474 func (missingOffsetIndex) NumPages() int { return 1 } 475 func (missingOffsetIndex) Offset(int) int64 { return 0 } 476 func (missingOffsetIndex) CompressedPageSize(int) int64 { return 0 } 477 func (missingOffsetIndex) FirstRowIndex(int) int64 { return 0 } 478 479 type missingBloomFilter struct{} 480 481 func (missingBloomFilter) ReadAt([]byte, int64) (int, error) { return 0, io.EOF } 482 func (missingBloomFilter) Size() int64 { return 0 } 483 func (missingBloomFilter) Check(Value) (bool, error) { return false, nil } 484 485 type missingPage struct{ *missingColumnChunk } 486 487 func (p missingPage) Column() int { return int(p.column) } 488 func (p missingPage) Dictionary() Dictionary { return nil } 489 func (p missingPage) NumRows() int64 { return p.numRows } 490 func (p missingPage) NumValues() int64 { return p.numValues } 491 func (p missingPage) NumNulls() int64 { return p.numNulls } 492 func (p missingPage) Bounds() (min, max Value, ok bool) { return } 493 func (p missingPage) Slice(i, j int64) Page { 494 return missingPage{ 495 &missingColumnChunk{ 496 typ: p.typ, 497 column: p.column, 498 numRows: j - i, 499 numValues: j - i, 500 numNulls: j - i, 501 }, 502 } 503 } 504 func (p missingPage) Size() int64 { return 0 } 505 func (p missingPage) RepetitionLevels() []byte { return nil } 506 func (p missingPage) DefinitionLevels() []byte { return nil } 507 func (p missingPage) Data() encoding.Values { return p.typ.NewValues(nil, nil) } 508 func (p missingPage) Values() ValueReader { return &missingPageValues{page: p} } 509 510 type missingPageValues struct { 511 page missingPage 512 read int64 513 } 514 515 func (r *missingPageValues) ReadValues(values []Value) (int, error) { 516 remain := r.page.numValues - r.read 517 if int64(len(values)) > remain { 518 values = values[:remain] 519 } 520 for i := range values { 521 // TODO: how do we set the repetition and definition levels here? 522 values[i] = Value{columnIndex: ^r.page.column} 523 } 524 if r.read += int64(len(values)); r.read == r.page.numValues { 525 return len(values), io.EOF 526 } 527 return len(values), nil 528 } 529 530 func (r *missingPageValues) Close() error { 531 r.read = r.page.numValues 532 return nil 533 } 534 535 type convertedRowGroup struct { 536 rowGroup RowGroup 537 columns []ColumnChunk 538 sorting []SortingColumn 539 conv Conversion 540 } 541 542 func (c *convertedRowGroup) NumRows() int64 { return c.rowGroup.NumRows() } 543 func (c *convertedRowGroup) ColumnChunks() []ColumnChunk { return c.columns } 544 func (c *convertedRowGroup) Schema() *Schema { return c.conv.Schema() } 545 func (c *convertedRowGroup) SortingColumns() []SortingColumn { return c.sorting } 546 func (c *convertedRowGroup) Rows() Rows { 547 rows := c.rowGroup.Rows() 548 return &convertedRows{ 549 Closer: rows, 550 rows: rows, 551 conv: c.conv, 552 } 553 } 554 555 // ConvertRowReader constructs a wrapper of the given row reader which applies 556 // the given schema conversion to the rows. 557 func ConvertRowReader(rows RowReader, conv Conversion) RowReaderWithSchema { 558 return &convertedRows{rows: &forwardRowSeeker{rows: rows}, conv: conv} 559 } 560 561 type convertedRows struct { 562 io.Closer 563 rows RowReadSeeker 564 conv Conversion 565 } 566 567 func (c *convertedRows) ReadRows(rows []Row) (int, error) { 568 n, err := c.rows.ReadRows(rows) 569 if n > 0 { 570 var convErr error 571 n, convErr = c.conv.Convert(rows[:n]) 572 if convErr != nil { 573 err = convErr 574 } 575 } 576 return n, err 577 } 578 579 func (c *convertedRows) Schema() *Schema { 580 return c.conv.Schema() 581 } 582 583 func (c *convertedRows) SeekToRow(rowIndex int64) error { 584 return c.rows.SeekToRow(rowIndex) 585 } 586 587 var ( 588 trueBytes = []byte(`true`) 589 falseBytes = []byte(`false`) 590 unixEpoch = time.Date(1970, time.January, 1, 0, 0, 0, 0, time.UTC) 591 ) 592 593 func convertBooleanToInt32(v Value) (Value, error) { 594 return v.convertToInt32(int32(v.byte())), nil 595 } 596 597 func convertBooleanToInt64(v Value) (Value, error) { 598 return v.convertToInt64(int64(v.byte())), nil 599 } 600 601 func convertBooleanToInt96(v Value) (Value, error) { 602 return v.convertToInt96(deprecated.Int96{0: uint32(v.byte())}), nil 603 } 604 605 func convertBooleanToFloat(v Value) (Value, error) { 606 return v.convertToFloat(float32(v.byte())), nil 607 } 608 609 func convertBooleanToDouble(v Value) (Value, error) { 610 return v.convertToDouble(float64(v.byte())), nil 611 } 612 613 func convertBooleanToByteArray(v Value) (Value, error) { 614 return v.convertToByteArray([]byte{v.byte()}), nil 615 } 616 617 func convertBooleanToFixedLenByteArray(v Value, size int) (Value, error) { 618 b := []byte{v.byte()} 619 c := make([]byte, size) 620 copy(c, b) 621 return v.convertToFixedLenByteArray(c), nil 622 } 623 624 func convertBooleanToString(v Value) (Value, error) { 625 b := ([]byte)(nil) 626 if v.boolean() { 627 b = trueBytes 628 } else { 629 b = falseBytes 630 } 631 return v.convertToByteArray(b), nil 632 } 633 634 func convertInt32ToBoolean(v Value) (Value, error) { 635 return v.convertToBoolean(v.int32() != 0), nil 636 } 637 638 func convertInt32ToInt64(v Value) (Value, error) { 639 return v.convertToInt64(int64(v.int32())), nil 640 } 641 642 func convertInt32ToInt96(v Value) (Value, error) { 643 return v.convertToInt96(deprecated.Int32ToInt96(v.int32())), nil 644 } 645 646 func convertInt32ToFloat(v Value) (Value, error) { 647 return v.convertToFloat(float32(v.int32())), nil 648 } 649 650 func convertInt32ToDouble(v Value) (Value, error) { 651 return v.convertToDouble(float64(v.int32())), nil 652 } 653 654 func convertInt32ToByteArray(v Value) (Value, error) { 655 b := make([]byte, 4) 656 binary.LittleEndian.PutUint32(b, v.uint32()) 657 return v.convertToByteArray(b), nil 658 } 659 660 func convertInt32ToFixedLenByteArray(v Value, size int) (Value, error) { 661 b := make([]byte, 4) 662 c := make([]byte, size) 663 binary.LittleEndian.PutUint32(b, v.uint32()) 664 copy(c, b) 665 return v.convertToFixedLenByteArray(c), nil 666 } 667 668 func convertInt32ToString(v Value) (Value, error) { 669 return v.convertToByteArray(strconv.AppendInt(nil, int64(v.int32()), 10)), nil 670 } 671 672 func convertInt64ToBoolean(v Value) (Value, error) { 673 return v.convertToBoolean(v.int64() != 0), nil 674 } 675 676 func convertInt64ToInt32(v Value) (Value, error) { 677 return v.convertToInt32(int32(v.int64())), nil 678 } 679 680 func convertInt64ToInt96(v Value) (Value, error) { 681 return v.convertToInt96(deprecated.Int64ToInt96(v.int64())), nil 682 } 683 684 func convertInt64ToFloat(v Value) (Value, error) { 685 return v.convertToFloat(float32(v.int64())), nil 686 } 687 688 func convertInt64ToDouble(v Value) (Value, error) { 689 return v.convertToDouble(float64(v.int64())), nil 690 } 691 692 func convertInt64ToByteArray(v Value) (Value, error) { 693 b := make([]byte, 8) 694 binary.LittleEndian.PutUint64(b, v.uint64()) 695 return v.convertToByteArray(b), nil 696 } 697 698 func convertInt64ToFixedLenByteArray(v Value, size int) (Value, error) { 699 b := make([]byte, 8) 700 c := make([]byte, size) 701 binary.LittleEndian.PutUint64(b, v.uint64()) 702 copy(c, b) 703 return v.convertToFixedLenByteArray(c), nil 704 } 705 706 func convertInt64ToString(v Value) (Value, error) { 707 return v.convertToByteArray(strconv.AppendInt(nil, v.int64(), 10)), nil 708 } 709 710 func convertInt96ToBoolean(v Value) (Value, error) { 711 return v.convertToBoolean(!v.int96().IsZero()), nil 712 } 713 714 func convertInt96ToInt32(v Value) (Value, error) { 715 return v.convertToInt32(v.int96().Int32()), nil 716 } 717 718 func convertInt96ToInt64(v Value) (Value, error) { 719 return v.convertToInt64(v.int96().Int64()), nil 720 } 721 722 func convertInt96ToFloat(v Value) (Value, error) { 723 return v, invalidConversion(v, "INT96", "FLOAT") 724 } 725 726 func convertInt96ToDouble(v Value) (Value, error) { 727 return v, invalidConversion(v, "INT96", "DOUBLE") 728 } 729 730 func convertInt96ToByteArray(v Value) (Value, error) { 731 return v.convertToByteArray(v.byteArray()), nil 732 } 733 734 func convertInt96ToFixedLenByteArray(v Value, size int) (Value, error) { 735 b := v.byteArray() 736 if len(b) < size { 737 c := make([]byte, size) 738 copy(c, b) 739 b = c 740 } else { 741 b = b[:size] 742 } 743 return v.convertToFixedLenByteArray(b), nil 744 } 745 746 func convertInt96ToString(v Value) (Value, error) { 747 return v.convertToByteArray([]byte(v.String())), nil 748 } 749 750 func convertFloatToBoolean(v Value) (Value, error) { 751 return v.convertToBoolean(v.float() != 0), nil 752 } 753 754 func convertFloatToInt32(v Value) (Value, error) { 755 return v.convertToInt32(int32(v.float())), nil 756 } 757 758 func convertFloatToInt64(v Value) (Value, error) { 759 return v.convertToInt64(int64(v.float())), nil 760 } 761 762 func convertFloatToInt96(v Value) (Value, error) { 763 return v, invalidConversion(v, "FLOAT", "INT96") 764 } 765 766 func convertFloatToDouble(v Value) (Value, error) { 767 return v.convertToDouble(float64(v.float())), nil 768 } 769 770 func convertFloatToByteArray(v Value) (Value, error) { 771 b := make([]byte, 4) 772 binary.LittleEndian.PutUint32(b, v.uint32()) 773 return v.convertToByteArray(b), nil 774 } 775 776 func convertFloatToFixedLenByteArray(v Value, size int) (Value, error) { 777 b := make([]byte, 4) 778 c := make([]byte, size) 779 binary.LittleEndian.PutUint32(b, v.uint32()) 780 copy(c, b) 781 return v.convertToFixedLenByteArray(c), nil 782 } 783 784 func convertFloatToString(v Value) (Value, error) { 785 return v.convertToByteArray(strconv.AppendFloat(nil, float64(v.float()), 'g', -1, 32)), nil 786 } 787 788 func convertDoubleToBoolean(v Value) (Value, error) { 789 return v.convertToBoolean(v.double() != 0), nil 790 } 791 792 func convertDoubleToInt32(v Value) (Value, error) { 793 return v.convertToInt32(int32(v.double())), nil 794 } 795 796 func convertDoubleToInt64(v Value) (Value, error) { 797 return v.convertToInt64(int64(v.double())), nil 798 } 799 800 func convertDoubleToInt96(v Value) (Value, error) { 801 return v, invalidConversion(v, "FLOAT", "INT96") 802 } 803 804 func convertDoubleToFloat(v Value) (Value, error) { 805 return v.convertToFloat(float32(v.double())), nil 806 } 807 808 func convertDoubleToByteArray(v Value) (Value, error) { 809 b := make([]byte, 8) 810 binary.LittleEndian.PutUint64(b, v.uint64()) 811 return v.convertToByteArray(b), nil 812 } 813 814 func convertDoubleToFixedLenByteArray(v Value, size int) (Value, error) { 815 b := make([]byte, 8) 816 c := make([]byte, size) 817 binary.LittleEndian.PutUint64(b, v.uint64()) 818 copy(c, b) 819 return v.convertToFixedLenByteArray(c), nil 820 } 821 822 func convertDoubleToString(v Value) (Value, error) { 823 return v.convertToByteArray(strconv.AppendFloat(nil, v.double(), 'g', -1, 64)), nil 824 } 825 826 func convertByteArrayToBoolean(v Value) (Value, error) { 827 return v.convertToBoolean(!isZero(v.byteArray())), nil 828 } 829 830 func convertByteArrayToInt32(v Value) (Value, error) { 831 b := make([]byte, 4) 832 copy(b, v.byteArray()) 833 return v.convertToInt32(int32(binary.LittleEndian.Uint32(b))), nil 834 } 835 836 func convertByteArrayToInt64(v Value) (Value, error) { 837 b := make([]byte, 8) 838 copy(b, v.byteArray()) 839 return v.convertToInt64(int64(binary.LittleEndian.Uint64(b))), nil 840 } 841 842 func convertByteArrayToInt96(v Value) (Value, error) { 843 b := make([]byte, 12) 844 copy(b, v.byteArray()) 845 return v.convertToInt96(deprecated.Int96{ 846 0: binary.LittleEndian.Uint32(b[0:4]), 847 1: binary.LittleEndian.Uint32(b[4:8]), 848 2: binary.LittleEndian.Uint32(b[8:12]), 849 }), nil 850 } 851 852 func convertByteArrayToFloat(v Value) (Value, error) { 853 b := make([]byte, 4) 854 copy(b, v.byteArray()) 855 return v.convertToFloat(math.Float32frombits(binary.LittleEndian.Uint32(b))), nil 856 } 857 858 func convertByteArrayToDouble(v Value) (Value, error) { 859 b := make([]byte, 8) 860 copy(b, v.byteArray()) 861 return v.convertToDouble(math.Float64frombits(binary.LittleEndian.Uint64(b))), nil 862 } 863 864 func convertByteArrayToFixedLenByteArray(v Value, size int) (Value, error) { 865 b := v.byteArray() 866 if len(b) < size { 867 c := make([]byte, size) 868 copy(c, b) 869 b = c 870 } else { 871 b = b[:size] 872 } 873 return v.convertToFixedLenByteArray(b), nil 874 } 875 876 func convertFixedLenByteArrayToString(v Value) (Value, error) { 877 b := v.byteArray() 878 c := make([]byte, hex.EncodedLen(len(b))) 879 hex.Encode(c, b) 880 return v.convertToByteArray(c), nil 881 } 882 883 func convertStringToBoolean(v Value) (Value, error) { 884 b, err := strconv.ParseBool(v.string()) 885 if err != nil { 886 return v, conversionError(v, "STRING", "BOOLEAN", err) 887 } 888 return v.convertToBoolean(b), nil 889 } 890 891 func convertStringToInt32(v Value) (Value, error) { 892 i, err := strconv.ParseInt(v.string(), 10, 32) 893 if err != nil { 894 return v, conversionError(v, "STRING", "INT32", err) 895 } 896 return v.convertToInt32(int32(i)), nil 897 } 898 899 func convertStringToInt64(v Value) (Value, error) { 900 i, err := strconv.ParseInt(v.string(), 10, 64) 901 if err != nil { 902 return v, conversionError(v, "STRING", "INT64", err) 903 } 904 return v.convertToInt64(i), nil 905 } 906 907 func convertStringToInt96(v Value) (Value, error) { 908 i, ok := new(big.Int).SetString(v.string(), 10) 909 if !ok { 910 return v, conversionError(v, "STRING", "INT96", strconv.ErrSyntax) 911 } 912 b := i.Bytes() 913 c := make([]byte, 12) 914 copy(c, b) 915 i96 := deprecated.BytesToInt96(c) 916 return v.convertToInt96(i96[0]), nil 917 } 918 919 func convertStringToFloat(v Value) (Value, error) { 920 f, err := strconv.ParseFloat(v.string(), 32) 921 if err != nil { 922 return v, conversionError(v, "STRING", "FLOAT", err) 923 } 924 return v.convertToFloat(float32(f)), nil 925 } 926 927 func convertStringToDouble(v Value) (Value, error) { 928 f, err := strconv.ParseFloat(v.string(), 64) 929 if err != nil { 930 return v, conversionError(v, "STRING", "DOUBLE", err) 931 } 932 return v.convertToDouble(f), nil 933 } 934 935 func convertStringToFixedLenByteArray(v Value, size int) (Value, error) { 936 b := v.byteArray() 937 c := make([]byte, size) 938 _, err := hex.Decode(c, b) 939 if err != nil { 940 return v, conversionError(v, "STRING", "BYTE_ARRAY", err) 941 } 942 return v.convertToFixedLenByteArray(c), nil 943 } 944 945 func convertStringToDate(v Value, tz *time.Location) (Value, error) { 946 t, err := time.ParseInLocation("2006-01-02", v.string(), tz) 947 if err != nil { 948 return v, conversionError(v, "STRING", "DATE", err) 949 } 950 d := daysSinceUnixEpoch(t) 951 return v.convertToInt32(int32(d)), nil 952 } 953 954 func convertStringToTimeMillis(v Value, tz *time.Location) (Value, error) { 955 t, err := time.ParseInLocation("15:04:05.999", v.string(), tz) 956 if err != nil { 957 return v, conversionError(v, "STRING", "TIME", err) 958 } 959 m := nearestMidnightLessThan(t) 960 milliseconds := t.Sub(m).Milliseconds() 961 return v.convertToInt32(int32(milliseconds)), nil 962 } 963 964 func convertStringToTimeMicros(v Value, tz *time.Location) (Value, error) { 965 t, err := time.ParseInLocation("15:04:05.999999", v.string(), tz) 966 if err != nil { 967 return v, conversionError(v, "STRING", "TIME", err) 968 } 969 m := nearestMidnightLessThan(t) 970 microseconds := t.Sub(m).Microseconds() 971 return v.convertToInt64(microseconds), nil 972 } 973 974 func convertDateToTimestamp(v Value, u format.TimeUnit, tz *time.Location) (Value, error) { 975 t := unixEpoch.AddDate(0, 0, int(v.int32())) 976 d := timeUnitDuration(u) 977 return v.convertToInt64(int64(t.In(tz).Sub(unixEpoch) / d)), nil 978 } 979 980 func convertDateToString(v Value) (Value, error) { 981 t := unixEpoch.AddDate(0, 0, int(v.int32())) 982 b := t.AppendFormat(make([]byte, 0, 10), "2006-01-02") 983 return v.convertToByteArray(b), nil 984 } 985 986 func convertTimeMillisToString(v Value, tz *time.Location) (Value, error) { 987 t := time.UnixMilli(int64(v.int32())).In(tz) 988 b := t.AppendFormat(make([]byte, 0, 12), "15:04:05.999") 989 return v.convertToByteArray(b), nil 990 } 991 992 func convertTimeMicrosToString(v Value, tz *time.Location) (Value, error) { 993 t := time.UnixMicro(v.int64()).In(tz) 994 b := t.AppendFormat(make([]byte, 0, 15), "15:04:05.999999") 995 return v.convertToByteArray(b), nil 996 } 997 998 func convertTimestampToDate(v Value, u format.TimeUnit, tz *time.Location) (Value, error) { 999 t := timestamp(v, u, tz) 1000 d := daysSinceUnixEpoch(t) 1001 return v.convertToInt32(int32(d)), nil 1002 } 1003 1004 func convertTimestampToTimeMillis(v Value, u format.TimeUnit, sourceZone, targetZone *time.Location) (Value, error) { 1005 t := timestamp(v, u, sourceZone) 1006 m := nearestMidnightLessThan(t) 1007 milliseconds := t.In(targetZone).Sub(m).Milliseconds() 1008 return v.convertToInt32(int32(milliseconds)), nil 1009 } 1010 1011 func convertTimestampToTimeMicros(v Value, u format.TimeUnit, sourceZone, targetZone *time.Location) (Value, error) { 1012 t := timestamp(v, u, sourceZone) 1013 m := nearestMidnightLessThan(t) 1014 microseconds := t.In(targetZone).Sub(m).Microseconds() 1015 return v.convertToInt64(int64(microseconds)), nil 1016 } 1017 1018 func convertTimestampToTimestamp(v Value, sourceUnit, targetUnit format.TimeUnit) (Value, error) { 1019 sourceScale := timeUnitDuration(sourceUnit).Nanoseconds() 1020 targetScale := timeUnitDuration(targetUnit).Nanoseconds() 1021 targetValue := (v.int64() * sourceScale) / targetScale 1022 return v.convertToInt64(targetValue), nil 1023 } 1024 1025 const nanosecondsPerDay = 24 * 60 * 60 * 1e9 1026 1027 func daysSinceUnixEpoch(t time.Time) int { 1028 return int(t.Sub(unixEpoch).Hours()) / 24 1029 } 1030 1031 func nearestMidnightLessThan(t time.Time) time.Time { 1032 y, m, d := t.Date() 1033 return time.Date(y, m, d, 0, 0, 0, 0, t.Location()) 1034 } 1035 1036 func timestamp(v Value, u format.TimeUnit, tz *time.Location) time.Time { 1037 return unixEpoch.In(tz).Add(time.Duration(v.int64()) * timeUnitDuration(u)) 1038 } 1039 1040 func timeUnitDuration(unit format.TimeUnit) time.Duration { 1041 switch { 1042 case unit.Millis != nil: 1043 return time.Millisecond 1044 case unit.Micros != nil: 1045 return time.Microsecond 1046 default: 1047 return time.Nanosecond 1048 } 1049 } 1050 1051 func invalidConversion(value Value, from, to string) error { 1052 return fmt.Errorf("%s to %s: %s: %w", from, to, value, ErrInvalidConversion) 1053 } 1054 1055 func conversionError(value Value, from, to string, err error) error { 1056 return fmt.Errorf("%s to %s: %q: %s: %w", from, to, value.string(), err, ErrInvalidConversion) 1057 }