github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/value.go (about) 1 package parquet 2 3 import ( 4 "bytes" 5 "encoding/binary" 6 "fmt" 7 "io" 8 "math" 9 "reflect" 10 "strconv" 11 "time" 12 "unsafe" 13 14 "github.com/google/uuid" 15 "github.com/segmentio/parquet-go/deprecated" 16 "github.com/segmentio/parquet-go/format" 17 "github.com/segmentio/parquet-go/internal/unsafecast" 18 ) 19 20 const ( 21 // 170 x sizeof(Value) = 4KB 22 defaultValueBufferSize = 170 23 ) 24 25 // The Value type is similar to the reflect.Value abstraction of Go values, but 26 // for parquet values. Value instances wrap underlying Go values mapped to one 27 // of the parquet physical types. 28 // 29 // Value instances are small, immutable objects, and usually passed by value 30 // between function calls. 31 // 32 // The zero-value of Value represents the null parquet value. 33 type Value struct { 34 // data 35 ptr *byte 36 u64 uint64 37 // type 38 kind int8 // XOR(Kind) so the zero-value is <null> 39 // levels 40 definitionLevel byte 41 repetitionLevel byte 42 columnIndex int16 // XOR so the zero-value is -1 43 } 44 45 // ValueReader is an interface implemented by types that support reading 46 // batches of values. 47 type ValueReader interface { 48 // Read values into the buffer passed as argument and return the number of 49 // values read. When all values have been read, the error will be io.EOF. 50 ReadValues([]Value) (int, error) 51 } 52 53 // ValueReaderAt is an interface implemented by types that support reading 54 // values at offsets specified by the application. 55 type ValueReaderAt interface { 56 ReadValuesAt([]Value, int64) (int, error) 57 } 58 59 // ValueReaderFrom is an interface implemented by value writers to read values 60 // from a reader. 61 type ValueReaderFrom interface { 62 ReadValuesFrom(ValueReader) (int64, error) 63 } 64 65 // ValueWriter is an interface implemented by types that support reading 66 // batches of values. 67 type ValueWriter interface { 68 // Write values from the buffer passed as argument and returns the number 69 // of values written. 70 WriteValues([]Value) (int, error) 71 } 72 73 // ValueWriterTo is an interface implemented by value readers to write values to 74 // a writer. 75 type ValueWriterTo interface { 76 WriteValuesTo(ValueWriter) (int64, error) 77 } 78 79 // ValueReaderFunc is a function type implementing the ValueReader interface. 80 type ValueReaderFunc func([]Value) (int, error) 81 82 func (f ValueReaderFunc) ReadValues(values []Value) (int, error) { return f(values) } 83 84 // ValueWriterFunc is a function type implementing the ValueWriter interface. 85 type ValueWriterFunc func([]Value) (int, error) 86 87 func (f ValueWriterFunc) WriteValues(values []Value) (int, error) { return f(values) } 88 89 // CopyValues copies values from src to dst, returning the number of values 90 // that were written. 91 // 92 // As an optimization, the reader and writer may choose to implement 93 // ValueReaderFrom and ValueWriterTo to provide their own copy logic. 94 // 95 // The function returns any error it encounters reading or writing pages, except 96 // for io.EOF from the reader which indicates that there were no more values to 97 // read. 98 func CopyValues(dst ValueWriter, src ValueReader) (int64, error) { 99 return copyValues(dst, src, nil) 100 } 101 102 func copyValues(dst ValueWriter, src ValueReader, buf []Value) (written int64, err error) { 103 if wt, ok := src.(ValueWriterTo); ok { 104 return wt.WriteValuesTo(dst) 105 } 106 107 if rf, ok := dst.(ValueReaderFrom); ok { 108 return rf.ReadValuesFrom(src) 109 } 110 111 if len(buf) == 0 { 112 buf = make([]Value, defaultValueBufferSize) 113 } 114 115 defer clearValues(buf) 116 117 for { 118 n, err := src.ReadValues(buf) 119 120 if n > 0 { 121 wn, werr := dst.WriteValues(buf[:n]) 122 written += int64(wn) 123 if werr != nil { 124 return written, werr 125 } 126 } 127 128 if err != nil { 129 if err == io.EOF { 130 err = nil 131 } 132 return written, err 133 } 134 135 if n == 0 { 136 return written, io.ErrNoProgress 137 } 138 } 139 } 140 141 // ValueOf constructs a parquet value from a Go value v. 142 // 143 // The physical type of the value is assumed from the Go type of v using the 144 // following conversion table: 145 // 146 // Go type | Parquet physical type 147 // ------- | --------------------- 148 // nil | NULL 149 // bool | BOOLEAN 150 // int8 | INT32 151 // int16 | INT32 152 // int32 | INT32 153 // int64 | INT64 154 // int | INT64 155 // uint8 | INT32 156 // uint16 | INT32 157 // uint32 | INT32 158 // uint64 | INT64 159 // uintptr | INT64 160 // float32 | FLOAT 161 // float64 | DOUBLE 162 // string | BYTE_ARRAY 163 // []byte | BYTE_ARRAY 164 // [*]byte | FIXED_LEN_BYTE_ARRAY 165 // 166 // When converting a []byte or [*]byte value, the underlying byte array is not 167 // copied; instead, the returned parquet value holds a reference to it. 168 // 169 // The repetition and definition levels of the returned value are both zero. 170 // 171 // The function panics if the Go value cannot be represented in parquet. 172 func ValueOf(v interface{}) Value { 173 k := Kind(-1) 174 t := reflect.TypeOf(v) 175 176 switch value := v.(type) { 177 case nil: 178 return Value{} 179 case uuid.UUID: 180 return makeValueBytes(FixedLenByteArray, value[:]) 181 case deprecated.Int96: 182 return makeValueInt96(value) 183 case time.Time: 184 k = Int64 185 } 186 187 switch t.Kind() { 188 case reflect.Bool: 189 k = Boolean 190 case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Uint8, reflect.Uint16, reflect.Uint32: 191 k = Int32 192 case reflect.Int64, reflect.Int, reflect.Uint64, reflect.Uint, reflect.Uintptr: 193 k = Int64 194 case reflect.Float32: 195 k = Float 196 case reflect.Float64: 197 k = Double 198 case reflect.String: 199 k = ByteArray 200 case reflect.Slice: 201 if t.Elem().Kind() == reflect.Uint8 { 202 k = ByteArray 203 } 204 case reflect.Array: 205 if t.Elem().Kind() == reflect.Uint8 { 206 k = FixedLenByteArray 207 } 208 } 209 210 if k < 0 { 211 panic("cannot create parquet value from go value of type " + t.String()) 212 } 213 214 return makeValue(k, nil, reflect.ValueOf(v)) 215 } 216 217 // NulLValue constructs a null value, which is the zero-value of the Value type. 218 func NullValue() Value { return Value{} } 219 220 // ZeroValue constructs a zero value of the given kind. 221 func ZeroValue(kind Kind) Value { return makeValueKind(kind) } 222 223 // BooleanValue constructs a BOOLEAN parquet value from the bool passed as 224 // argument. 225 func BooleanValue(value bool) Value { return makeValueBoolean(value) } 226 227 // Int32Value constructs a INT32 parquet value from the int32 passed as 228 // argument. 229 func Int32Value(value int32) Value { return makeValueInt32(value) } 230 231 // Int64Value constructs a INT64 parquet value from the int64 passed as 232 // argument. 233 func Int64Value(value int64) Value { return makeValueInt64(value) } 234 235 // Int96Value constructs a INT96 parquet value from the deprecated.Int96 passed 236 // as argument. 237 func Int96Value(value deprecated.Int96) Value { return makeValueInt96(value) } 238 239 // FloatValue constructs a FLOAT parquet value from the float32 passed as 240 // argument. 241 func FloatValue(value float32) Value { return makeValueFloat(value) } 242 243 // DoubleValue constructs a DOUBLE parquet value from the float64 passed as 244 // argument. 245 func DoubleValue(value float64) Value { return makeValueDouble(value) } 246 247 // ByteArrayValue constructs a BYTE_ARRAY parquet value from the byte slice 248 // passed as argument. 249 func ByteArrayValue(value []byte) Value { return makeValueBytes(ByteArray, value) } 250 251 // FixedLenByteArrayValue constructs a BYTE_ARRAY parquet value from the byte 252 // slice passed as argument. 253 func FixedLenByteArrayValue(value []byte) Value { return makeValueBytes(FixedLenByteArray, value) } 254 255 func makeValue(k Kind, lt *format.LogicalType, v reflect.Value) Value { 256 switch v.Type() { 257 case reflect.TypeOf(time.Time{}): 258 unit := Nanosecond.TimeUnit() 259 if lt != nil && lt.Timestamp != nil { 260 unit = lt.Timestamp.Unit 261 } 262 263 t := v.Interface().(time.Time) 264 var val int64 265 switch { 266 case unit.Millis != nil: 267 val = t.UnixMilli() 268 case unit.Micros != nil: 269 val = t.UnixMicro() 270 default: 271 val = t.UnixNano() 272 } 273 return makeValueInt64(val) 274 } 275 276 switch k { 277 case Boolean: 278 return makeValueBoolean(v.Bool()) 279 280 case Int32: 281 switch v.Kind() { 282 case reflect.Int8, reflect.Int16, reflect.Int32: 283 return makeValueInt32(int32(v.Int())) 284 case reflect.Uint8, reflect.Uint16, reflect.Uint32: 285 return makeValueInt32(int32(v.Uint())) 286 } 287 288 case Int64: 289 switch v.Kind() { 290 case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int: 291 return makeValueInt64(v.Int()) 292 case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint, reflect.Uintptr: 293 return makeValueUint64(v.Uint()) 294 } 295 296 case Int96: 297 switch v.Type() { 298 case reflect.TypeOf(deprecated.Int96{}): 299 return makeValueInt96(v.Interface().(deprecated.Int96)) 300 } 301 302 case Float: 303 switch v.Kind() { 304 case reflect.Float32: 305 return makeValueFloat(float32(v.Float())) 306 } 307 308 case Double: 309 switch v.Kind() { 310 case reflect.Float32, reflect.Float64: 311 return makeValueDouble(v.Float()) 312 } 313 314 case ByteArray: 315 switch v.Kind() { 316 case reflect.String: 317 return makeValueString(k, v.String()) 318 case reflect.Slice: 319 if v.Type().Elem().Kind() == reflect.Uint8 { 320 return makeValueBytes(k, v.Bytes()) 321 } 322 } 323 324 case FixedLenByteArray: 325 switch v.Kind() { 326 case reflect.String: // uuid 327 return makeValueString(k, v.String()) 328 case reflect.Array: 329 if v.Type().Elem().Kind() == reflect.Uint8 { 330 return makeValueFixedLenByteArray(v) 331 } 332 case reflect.Slice: 333 if v.Type().Elem().Kind() == reflect.Uint8 { 334 return makeValueBytes(k, v.Bytes()) 335 } 336 } 337 } 338 339 panic("cannot create parquet value of type " + k.String() + " from go value of type " + v.Type().String()) 340 } 341 342 func makeValueKind(kind Kind) Value { 343 return Value{kind: ^int8(kind)} 344 } 345 346 func makeValueBoolean(value bool) Value { 347 v := Value{kind: ^int8(Boolean)} 348 if value { 349 v.u64 = 1 350 } 351 return v 352 } 353 354 func makeValueInt32(value int32) Value { 355 return Value{ 356 kind: ^int8(Int32), 357 u64: uint64(value), 358 } 359 } 360 361 func makeValueInt64(value int64) Value { 362 return Value{ 363 kind: ^int8(Int64), 364 u64: uint64(value), 365 } 366 } 367 368 func makeValueInt96(value deprecated.Int96) Value { 369 // TODO: this is highly inefficient because we need a heap allocation to 370 // store the value; we don't expect INT96 to be used frequently since it 371 // is a deprecated feature of parquet, and it helps keep the Value type 372 // compact for all the other more common cases. 373 bits := [12]byte{} 374 binary.LittleEndian.PutUint32(bits[0:4], value[0]) 375 binary.LittleEndian.PutUint32(bits[4:8], value[1]) 376 binary.LittleEndian.PutUint32(bits[8:12], value[2]) 377 return Value{ 378 kind: ^int8(Int96), 379 ptr: &bits[0], 380 u64: 12, // set the length so we can use the ByteArray method 381 } 382 } 383 384 func makeValueUint32(value uint32) Value { 385 return Value{ 386 kind: ^int8(Int32), 387 u64: uint64(value), 388 } 389 } 390 391 func makeValueUint64(value uint64) Value { 392 return Value{ 393 kind: ^int8(Int64), 394 u64: value, 395 } 396 } 397 398 func makeValueFloat(value float32) Value { 399 return Value{ 400 kind: ^int8(Float), 401 u64: uint64(math.Float32bits(value)), 402 } 403 } 404 405 func makeValueDouble(value float64) Value { 406 return Value{ 407 kind: ^int8(Double), 408 u64: math.Float64bits(value), 409 } 410 } 411 412 func makeValueBytes(kind Kind, value []byte) Value { 413 return makeValueByteArray(kind, unsafecast.AddressOfBytes(value), len(value)) 414 } 415 416 func makeValueString(kind Kind, value string) Value { 417 return makeValueByteArray(kind, unsafecast.AddressOfString(value), len(value)) 418 } 419 420 func makeValueFixedLenByteArray(v reflect.Value) Value { 421 t := v.Type() 422 // When the array is addressable, we take advantage of this 423 // condition to avoid the heap allocation otherwise needed 424 // to pack the reference into an interface{} value. 425 if v.CanAddr() { 426 v = v.Addr() 427 } else { 428 u := reflect.New(t) 429 u.Elem().Set(v) 430 v = u 431 } 432 return makeValueByteArray(FixedLenByteArray, (*byte)(unsafePointer(v)), t.Len()) 433 } 434 435 func makeValueByteArray(kind Kind, data *byte, size int) Value { 436 return Value{ 437 kind: ^int8(kind), 438 ptr: data, 439 u64: uint64(size), 440 } 441 } 442 443 // These methods are internal versions of methods exported by the Value type, 444 // they are usually inlined by the compiler and intended to be used inside the 445 // parquet-go package because they tend to generate better code than their 446 // exported counter part, which requires making a copy of the receiver. 447 func (v *Value) isNull() bool { return v.kind == 0 } 448 func (v *Value) byte() byte { return byte(v.u64) } 449 func (v *Value) boolean() bool { return v.u64 != 0 } 450 func (v *Value) int32() int32 { return int32(v.u64) } 451 func (v *Value) int64() int64 { return int64(v.u64) } 452 func (v *Value) int96() deprecated.Int96 { return makeInt96(v.byteArray()) } 453 func (v *Value) float() float32 { return math.Float32frombits(uint32(v.u64)) } 454 func (v *Value) double() float64 { return math.Float64frombits(uint64(v.u64)) } 455 func (v *Value) uint32() uint32 { return uint32(v.u64) } 456 func (v *Value) uint64() uint64 { return v.u64 } 457 func (v *Value) byteArray() []byte { return unsafecast.Bytes(v.ptr, int(v.u64)) } 458 func (v *Value) string() string { return unsafecast.BytesToString(v.byteArray()) } 459 func (v *Value) be128() *[16]byte { return (*[16]byte)(unsafe.Pointer(v.ptr)) } 460 func (v *Value) column() int { return int(^v.columnIndex) } 461 462 func (v Value) convertToBoolean(x bool) Value { 463 v.kind = ^int8(Boolean) 464 v.ptr = nil 465 v.u64 = 0 466 if x { 467 v.u64 = 1 468 } 469 return v 470 } 471 472 func (v Value) convertToInt32(x int32) Value { 473 v.kind = ^int8(Int32) 474 v.ptr = nil 475 v.u64 = uint64(x) 476 return v 477 } 478 479 func (v Value) convertToInt64(x int64) Value { 480 v.kind = ^int8(Int64) 481 v.ptr = nil 482 v.u64 = uint64(x) 483 return v 484 } 485 486 func (v Value) convertToInt96(x deprecated.Int96) Value { 487 i96 := makeValueInt96(x) 488 v.kind = i96.kind 489 v.ptr = i96.ptr 490 v.u64 = i96.u64 491 return v 492 } 493 494 func (v Value) convertToFloat(x float32) Value { 495 v.kind = ^int8(Float) 496 v.ptr = nil 497 v.u64 = uint64(math.Float32bits(x)) 498 return v 499 } 500 501 func (v Value) convertToDouble(x float64) Value { 502 v.kind = ^int8(Double) 503 v.ptr = nil 504 v.u64 = math.Float64bits(x) 505 return v 506 } 507 508 func (v Value) convertToByteArray(x []byte) Value { 509 v.kind = ^int8(ByteArray) 510 v.ptr = unsafecast.AddressOfBytes(x) 511 v.u64 = uint64(len(x)) 512 return v 513 } 514 515 func (v Value) convertToFixedLenByteArray(x []byte) Value { 516 v.kind = ^int8(FixedLenByteArray) 517 v.ptr = unsafecast.AddressOfBytes(x) 518 v.u64 = uint64(len(x)) 519 return v 520 } 521 522 // Kind returns the kind of v, which represents its parquet physical type. 523 func (v Value) Kind() Kind { return ^Kind(v.kind) } 524 525 // IsNull returns true if v is the null value. 526 func (v Value) IsNull() bool { return v.isNull() } 527 528 // Byte returns v as a byte, which may truncate the underlying byte. 529 func (v Value) Byte() byte { return v.byte() } 530 531 // Boolean returns v as a bool, assuming the underlying type is BOOLEAN. 532 func (v Value) Boolean() bool { return v.boolean() } 533 534 // Int32 returns v as a int32, assuming the underlying type is INT32. 535 func (v Value) Int32() int32 { return v.int32() } 536 537 // Int64 returns v as a int64, assuming the underlying type is INT64. 538 func (v Value) Int64() int64 { return v.int64() } 539 540 // Int96 returns v as a int96, assuming the underlying type is INT96. 541 func (v Value) Int96() deprecated.Int96 { 542 var val deprecated.Int96 543 if !v.isNull() { 544 val = v.int96() 545 } 546 return val 547 } 548 549 // Float returns v as a float32, assuming the underlying type is FLOAT. 550 func (v Value) Float() float32 { return v.float() } 551 552 // Double returns v as a float64, assuming the underlying type is DOUBLE. 553 func (v Value) Double() float64 { return v.double() } 554 555 // Uint32 returns v as a uint32, assuming the underlying type is INT32. 556 func (v Value) Uint32() uint32 { return v.uint32() } 557 558 // Uint64 returns v as a uint64, assuming the underlying type is INT64. 559 func (v Value) Uint64() uint64 { return v.uint64() } 560 561 // ByteArray returns v as a []byte, assuming the underlying type is either 562 // BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY. 563 // 564 // The application must treat the returned byte slice as a read-only value, 565 // mutating the content will result in undefined behaviors. 566 func (v Value) ByteArray() []byte { return v.byteArray() } 567 568 // RepetitionLevel returns the repetition level of v. 569 func (v Value) RepetitionLevel() int { return int(v.repetitionLevel) } 570 571 // DefinitionLevel returns the definition level of v. 572 func (v Value) DefinitionLevel() int { return int(v.definitionLevel) } 573 574 // Column returns the column index within the row that v was created from. 575 // 576 // Returns -1 if the value does not carry a column index. 577 func (v Value) Column() int { return v.column() } 578 579 // Bytes returns the binary representation of v. 580 // 581 // If v is the null value, an nil byte slice is returned. 582 func (v Value) Bytes() []byte { 583 switch v.Kind() { 584 case Boolean: 585 buf := [8]byte{} 586 binary.LittleEndian.PutUint32(buf[:4], v.uint32()) 587 return buf[0:1] 588 case Int32, Float: 589 buf := [8]byte{} 590 binary.LittleEndian.PutUint32(buf[:4], v.uint32()) 591 return buf[:4] 592 case Int64, Double: 593 buf := [8]byte{} 594 binary.LittleEndian.PutUint64(buf[:8], v.uint64()) 595 return buf[:8] 596 case ByteArray, FixedLenByteArray, Int96: 597 return v.byteArray() 598 default: 599 return nil 600 } 601 } 602 603 // AppendBytes appends the binary representation of v to b. 604 // 605 // If v is the null value, b is returned unchanged. 606 func (v Value) AppendBytes(b []byte) []byte { 607 buf := [8]byte{} 608 switch v.Kind() { 609 case Boolean: 610 binary.LittleEndian.PutUint32(buf[:4], v.uint32()) 611 return append(b, buf[0]) 612 case Int32, Float: 613 binary.LittleEndian.PutUint32(buf[:4], v.uint32()) 614 return append(b, buf[:4]...) 615 case Int64, Double: 616 binary.LittleEndian.PutUint64(buf[:8], v.uint64()) 617 return append(b, buf[:8]...) 618 case ByteArray, FixedLenByteArray, Int96: 619 return append(b, v.byteArray()...) 620 default: 621 return b 622 } 623 } 624 625 // Format outputs a human-readable representation of v to w, using r as the 626 // formatting verb to describe how the value should be printed. 627 // 628 // The following formatting options are supported: 629 // 630 // %c prints the column index 631 // %+c prints the column index, prefixed with "C:" 632 // %d prints the definition level 633 // %+d prints the definition level, prefixed with "D:" 634 // %r prints the repetition level 635 // %+r prints the repetition level, prefixed with "R:" 636 // %q prints the quoted representation of v 637 // %+q prints the quoted representation of v, prefixed with "V:" 638 // %s prints the string representation of v 639 // %+s prints the string representation of v, prefixed with "V:" 640 // %v same as %s 641 // %+v prints a verbose representation of v 642 // %#v prints a Go value representation of v 643 // 644 // Format satisfies the fmt.Formatter interface. 645 func (v Value) Format(w fmt.State, r rune) { 646 switch r { 647 case 'c': 648 if w.Flag('+') { 649 io.WriteString(w, "C:") 650 } 651 fmt.Fprint(w, v.column()) 652 653 case 'd': 654 if w.Flag('+') { 655 io.WriteString(w, "D:") 656 } 657 fmt.Fprint(w, v.definitionLevel) 658 659 case 'r': 660 if w.Flag('+') { 661 io.WriteString(w, "R:") 662 } 663 fmt.Fprint(w, v.repetitionLevel) 664 665 case 'q': 666 if w.Flag('+') { 667 io.WriteString(w, "V:") 668 } 669 switch v.Kind() { 670 case ByteArray, FixedLenByteArray: 671 fmt.Fprintf(w, "%q", v.byteArray()) 672 default: 673 fmt.Fprintf(w, `"%s"`, v) 674 } 675 676 case 's': 677 if w.Flag('+') { 678 io.WriteString(w, "V:") 679 } 680 switch v.Kind() { 681 case Boolean: 682 fmt.Fprint(w, v.boolean()) 683 case Int32: 684 fmt.Fprint(w, v.int32()) 685 case Int64: 686 fmt.Fprint(w, v.int64()) 687 case Int96: 688 fmt.Fprint(w, v.int96()) 689 case Float: 690 fmt.Fprint(w, v.float()) 691 case Double: 692 fmt.Fprint(w, v.double()) 693 case ByteArray, FixedLenByteArray: 694 w.Write(v.byteArray()) 695 default: 696 io.WriteString(w, "<null>") 697 } 698 699 case 'v': 700 switch { 701 case w.Flag('+'): 702 fmt.Fprintf(w, "%+[1]c %+[1]d %+[1]r %+[1]s", v) 703 case w.Flag('#'): 704 v.formatGoString(w) 705 default: 706 v.Format(w, 's') 707 } 708 } 709 } 710 711 func (v Value) formatGoString(w fmt.State) { 712 io.WriteString(w, "parquet.") 713 switch v.Kind() { 714 case Boolean: 715 fmt.Fprintf(w, "BooleanValue(%t)", v.boolean()) 716 case Int32: 717 fmt.Fprintf(w, "Int32Value(%d)", v.int32()) 718 case Int64: 719 fmt.Fprintf(w, "Int64Value(%d)", v.int64()) 720 case Int96: 721 fmt.Fprintf(w, "Int96Value(%#v)", v.int96()) 722 case Float: 723 fmt.Fprintf(w, "FloatValue(%g)", v.float()) 724 case Double: 725 fmt.Fprintf(w, "DoubleValue(%g)", v.double()) 726 case ByteArray: 727 fmt.Fprintf(w, "ByteArrayValue(%q)", v.byteArray()) 728 case FixedLenByteArray: 729 fmt.Fprintf(w, "FixedLenByteArrayValue(%#v)", v.byteArray()) 730 default: 731 io.WriteString(w, "Value{}") 732 return 733 } 734 fmt.Fprintf(w, ".Level(%d,%d,%d)", 735 v.RepetitionLevel(), 736 v.DefinitionLevel(), 737 v.Column(), 738 ) 739 } 740 741 // String returns a string representation of v. 742 func (v Value) String() string { 743 switch v.Kind() { 744 case Boolean: 745 return strconv.FormatBool(v.boolean()) 746 case Int32: 747 return strconv.FormatInt(int64(v.int32()), 10) 748 case Int64: 749 return strconv.FormatInt(v.int64(), 10) 750 case Int96: 751 return v.Int96().String() 752 case Float: 753 return strconv.FormatFloat(float64(v.float()), 'g', -1, 32) 754 case Double: 755 return strconv.FormatFloat(v.double(), 'g', -1, 32) 756 case ByteArray, FixedLenByteArray: 757 return string(v.byteArray()) 758 default: 759 return "<null>" 760 } 761 } 762 763 // GoString returns a Go value string representation of v. 764 func (v Value) GoString() string { return fmt.Sprintf("%#v", v) } 765 766 // Level returns v with the repetition level, definition level, and column index 767 // set to the values passed as arguments. 768 // 769 // The method panics if either argument is negative. 770 func (v Value) Level(repetitionLevel, definitionLevel, columnIndex int) Value { 771 v.repetitionLevel = makeRepetitionLevel(repetitionLevel) 772 v.definitionLevel = makeDefinitionLevel(definitionLevel) 773 v.columnIndex = ^makeColumnIndex(columnIndex) 774 return v 775 } 776 777 // Clone returns a copy of v which does not share any pointers with it. 778 func (v Value) Clone() Value { 779 switch k := v.Kind(); k { 780 case ByteArray, FixedLenByteArray: 781 v.ptr = unsafecast.AddressOfBytes(copyBytes(v.byteArray())) 782 } 783 return v 784 } 785 786 func makeInt96(bits []byte) (i96 deprecated.Int96) { 787 return deprecated.Int96{ 788 2: binary.LittleEndian.Uint32(bits[8:12]), 789 1: binary.LittleEndian.Uint32(bits[4:8]), 790 0: binary.LittleEndian.Uint32(bits[0:4]), 791 } 792 } 793 794 func parseValue(kind Kind, data []byte) (val Value, err error) { 795 switch kind { 796 case Boolean: 797 if len(data) == 1 { 798 val = makeValueBoolean(data[0] != 0) 799 } 800 case Int32: 801 if len(data) == 4 { 802 val = makeValueInt32(int32(binary.LittleEndian.Uint32(data))) 803 } 804 case Int64: 805 if len(data) == 8 { 806 val = makeValueInt64(int64(binary.LittleEndian.Uint64(data))) 807 } 808 case Int96: 809 if len(data) == 12 { 810 val = makeValueInt96(makeInt96(data)) 811 } 812 case Float: 813 if len(data) == 4 { 814 val = makeValueFloat(float32(math.Float32frombits(binary.LittleEndian.Uint32(data)))) 815 } 816 case Double: 817 if len(data) == 8 { 818 val = makeValueDouble(float64(math.Float64frombits(binary.LittleEndian.Uint64(data)))) 819 } 820 case ByteArray, FixedLenByteArray: 821 val = makeValueBytes(kind, data) 822 } 823 if val.isNull() { 824 err = fmt.Errorf("cannot decode %s value from input of length %d", kind, len(data)) 825 } 826 return val, err 827 } 828 829 func copyBytes(b []byte) []byte { 830 c := make([]byte, len(b)) 831 copy(c, b) 832 return c 833 } 834 835 // Equal returns true if v1 and v2 are equal. 836 // 837 // Values are considered equal if they are of the same physical type and hold 838 // the same Go values. For BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY, the content of 839 // the underlying byte arrays are tested for equality. 840 // 841 // Note that the repetition levels, definition levels, and column indexes are 842 // not compared by this function, use DeepEqual instead. 843 func Equal(v1, v2 Value) bool { 844 if v1.kind != v2.kind { 845 return false 846 } 847 switch ^Kind(v1.kind) { 848 case Boolean: 849 return v1.boolean() == v2.boolean() 850 case Int32: 851 return v1.int32() == v2.int32() 852 case Int64: 853 return v1.int64() == v2.int64() 854 case Int96: 855 return v1.int96() == v2.int96() 856 case Float: 857 return v1.float() == v2.float() 858 case Double: 859 return v1.double() == v2.double() 860 case ByteArray, FixedLenByteArray: 861 return bytes.Equal(v1.byteArray(), v2.byteArray()) 862 case -1: // null 863 return true 864 default: 865 return false 866 } 867 } 868 869 // DeepEqual returns true if v1 and v2 are equal, including their repetition 870 // levels, definition levels, and column indexes. 871 // 872 // See Equal for details about how value equality is determined. 873 func DeepEqual(v1, v2 Value) bool { 874 return Equal(v1, v2) && 875 v1.repetitionLevel == v2.repetitionLevel && 876 v1.definitionLevel == v2.definitionLevel && 877 v1.columnIndex == v2.columnIndex 878 } 879 880 var ( 881 _ fmt.Formatter = Value{} 882 _ fmt.Stringer = Value{} 883 ) 884 885 func clearValues(values []Value) { 886 for i := range values { 887 values[i] = Value{} 888 } 889 } 890 891 // BooleanReader is an interface implemented by ValueReader instances which 892 // expose the content of a column of boolean values. 893 type BooleanReader interface { 894 // Read boolean values into the buffer passed as argument. 895 // 896 // The method returns io.EOF when all values have been read. 897 ReadBooleans(values []bool) (int, error) 898 } 899 900 // BooleanWriter is an interface implemented by ValueWriter instances which 901 // support writing columns of boolean values. 902 type BooleanWriter interface { 903 // Write boolean values. 904 // 905 // The method returns the number of values written, and any error that 906 // occurred while writing the values. 907 WriteBooleans(values []bool) (int, error) 908 } 909 910 // Int32Reader is an interface implemented by ValueReader instances which expose 911 // the content of a column of int32 values. 912 type Int32Reader interface { 913 // Read 32 bits integer values into the buffer passed as argument. 914 // 915 // The method returns io.EOF when all values have been read. 916 ReadInt32s(values []int32) (int, error) 917 } 918 919 // Int32Writer is an interface implemented by ValueWriter instances which 920 // support writing columns of 32 bits signed integer values. 921 type Int32Writer interface { 922 // Write 32 bits signed integer values. 923 // 924 // The method returns the number of values written, and any error that 925 // occurred while writing the values. 926 WriteInt32s(values []int32) (int, error) 927 } 928 929 // Int64Reader is an interface implemented by ValueReader instances which expose 930 // the content of a column of int64 values. 931 type Int64Reader interface { 932 // Read 64 bits integer values into the buffer passed as argument. 933 // 934 // The method returns io.EOF when all values have been read. 935 ReadInt64s(values []int64) (int, error) 936 } 937 938 // Int64Writer is an interface implemented by ValueWriter instances which 939 // support writing columns of 64 bits signed integer values. 940 type Int64Writer interface { 941 // Write 64 bits signed integer values. 942 // 943 // The method returns the number of values written, and any error that 944 // occurred while writing the values. 945 WriteInt64s(values []int64) (int, error) 946 } 947 948 // Int96Reader is an interface implemented by ValueReader instances which expose 949 // the content of a column of int96 values. 950 type Int96Reader interface { 951 // Read 96 bits integer values into the buffer passed as argument. 952 // 953 // The method returns io.EOF when all values have been read. 954 ReadInt96s(values []deprecated.Int96) (int, error) 955 } 956 957 // Int96Writer is an interface implemented by ValueWriter instances which 958 // support writing columns of 96 bits signed integer values. 959 type Int96Writer interface { 960 // Write 96 bits signed integer values. 961 // 962 // The method returns the number of values written, and any error that 963 // occurred while writing the values. 964 WriteInt96s(values []deprecated.Int96) (int, error) 965 } 966 967 // FloatReader is an interface implemented by ValueReader instances which expose 968 // the content of a column of single-precision floating point values. 969 type FloatReader interface { 970 // Read single-precision floating point values into the buffer passed as 971 // argument. 972 // 973 // The method returns io.EOF when all values have been read. 974 ReadFloats(values []float32) (int, error) 975 } 976 977 // FloatWriter is an interface implemented by ValueWriter instances which 978 // support writing columns of single-precision floating point values. 979 type FloatWriter interface { 980 // Write single-precision floating point values. 981 // 982 // The method returns the number of values written, and any error that 983 // occurred while writing the values. 984 WriteFloats(values []float32) (int, error) 985 } 986 987 // DoubleReader is an interface implemented by ValueReader instances which 988 // expose the content of a column of double-precision float point values. 989 type DoubleReader interface { 990 // Read double-precision floating point values into the buffer passed as 991 // argument. 992 // 993 // The method returns io.EOF when all values have been read. 994 ReadDoubles(values []float64) (int, error) 995 } 996 997 // DoubleWriter is an interface implemented by ValueWriter instances which 998 // support writing columns of double-precision floating point values. 999 type DoubleWriter interface { 1000 // Write double-precision floating point values. 1001 // 1002 // The method returns the number of values written, and any error that 1003 // occurred while writing the values. 1004 WriteDoubles(values []float64) (int, error) 1005 } 1006 1007 // ByteArrayReader is an interface implemented by ValueReader instances which 1008 // expose the content of a column of variable length byte array values. 1009 type ByteArrayReader interface { 1010 // Read values into the byte buffer passed as argument, returning the number 1011 // of values written to the buffer (not the number of bytes). Values are 1012 // written using the PLAIN encoding, each byte array prefixed with its 1013 // length encoded as a 4 bytes little endian unsigned integer. 1014 // 1015 // The method returns io.EOF when all values have been read. 1016 // 1017 // If the buffer was not empty, but too small to hold at least one value, 1018 // io.ErrShortBuffer is returned. 1019 ReadByteArrays(values []byte) (int, error) 1020 } 1021 1022 // ByteArrayWriter is an interface implemented by ValueWriter instances which 1023 // support writing columns of variable length byte array values. 1024 type ByteArrayWriter interface { 1025 // Write variable length byte array values. 1026 // 1027 // The values passed as input must be laid out using the PLAIN encoding, 1028 // with each byte array prefixed with the four bytes little endian unsigned 1029 // integer length. 1030 // 1031 // The method returns the number of values written to the underlying column 1032 // (not the number of bytes), or any error that occurred while attempting to 1033 // write the values. 1034 WriteByteArrays(values []byte) (int, error) 1035 } 1036 1037 // FixedLenByteArrayReader is an interface implemented by ValueReader instances 1038 // which expose the content of a column of fixed length byte array values. 1039 type FixedLenByteArrayReader interface { 1040 // Read values into the byte buffer passed as argument, returning the number 1041 // of values written to the buffer (not the number of bytes). 1042 // 1043 // The method returns io.EOF when all values have been read. 1044 // 1045 // If the buffer was not empty, but too small to hold at least one value, 1046 // io.ErrShortBuffer is returned. 1047 ReadFixedLenByteArrays(values []byte) (int, error) 1048 } 1049 1050 // FixedLenByteArrayWriter is an interface implemented by ValueWriter instances 1051 // which support writing columns of fixed length byte array values. 1052 type FixedLenByteArrayWriter interface { 1053 // Writes the fixed length byte array values. 1054 // 1055 // The size of the values is assumed to be the same as the expected size of 1056 // items in the column. The method errors if the length of the input values 1057 // is not a multiple of the expected item size. 1058 WriteFixedLenByteArrays(values []byte) (int, error) 1059 }