github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/value.go (about) 1 package parquet 2 3 import ( 4 "bytes" 5 "encoding/binary" 6 "fmt" 7 "io" 8 "math" 9 "reflect" 10 "strconv" 11 "time" 12 "unsafe" 13 14 "github.com/google/uuid" 15 "github.com/parquet-go/parquet-go/deprecated" 16 "github.com/parquet-go/parquet-go/format" 17 "github.com/parquet-go/parquet-go/internal/unsafecast" 18 ) 19 20 const ( 21 // 170 x sizeof(Value) = 4KB 22 defaultValueBufferSize = 170 23 ) 24 25 // The Value type is similar to the reflect.Value abstraction of Go values, but 26 // for parquet values. Value instances wrap underlying Go values mapped to one 27 // of the parquet physical types. 28 // 29 // Value instances are small, immutable objects, and usually passed by value 30 // between function calls. 31 // 32 // The zero-value of Value represents the null parquet value. 33 type Value struct { 34 // data 35 ptr *byte 36 u64 uint64 37 // type 38 kind int8 // XOR(Kind) so the zero-value is <null> 39 // levels 40 definitionLevel byte 41 repetitionLevel byte 42 columnIndex int16 // XOR so the zero-value is -1 43 } 44 45 // ValueReader is an interface implemented by types that support reading 46 // batches of values. 47 type ValueReader interface { 48 // Read values into the buffer passed as argument and return the number of 49 // values read. When all values have been read, the error will be io.EOF. 50 ReadValues([]Value) (int, error) 51 } 52 53 // ValueReaderAt is an interface implemented by types that support reading 54 // values at offsets specified by the application. 55 type ValueReaderAt interface { 56 ReadValuesAt([]Value, int64) (int, error) 57 } 58 59 // ValueReaderFrom is an interface implemented by value writers to read values 60 // from a reader. 61 type ValueReaderFrom interface { 62 ReadValuesFrom(ValueReader) (int64, error) 63 } 64 65 // ValueWriter is an interface implemented by types that support reading 66 // batches of values. 67 type ValueWriter interface { 68 // Write values from the buffer passed as argument and returns the number 69 // of values written. 70 WriteValues([]Value) (int, error) 71 } 72 73 // ValueWriterTo is an interface implemented by value readers to write values to 74 // a writer. 75 type ValueWriterTo interface { 76 WriteValuesTo(ValueWriter) (int64, error) 77 } 78 79 // ValueReaderFunc is a function type implementing the ValueReader interface. 80 type ValueReaderFunc func([]Value) (int, error) 81 82 func (f ValueReaderFunc) ReadValues(values []Value) (int, error) { return f(values) } 83 84 // ValueWriterFunc is a function type implementing the ValueWriter interface. 85 type ValueWriterFunc func([]Value) (int, error) 86 87 func (f ValueWriterFunc) WriteValues(values []Value) (int, error) { return f(values) } 88 89 // CopyValues copies values from src to dst, returning the number of values 90 // that were written. 91 // 92 // As an optimization, the reader and writer may choose to implement 93 // ValueReaderFrom and ValueWriterTo to provide their own copy logic. 94 // 95 // The function returns any error it encounters reading or writing pages, except 96 // for io.EOF from the reader which indicates that there were no more values to 97 // read. 98 func CopyValues(dst ValueWriter, src ValueReader) (int64, error) { 99 return copyValues(dst, src, nil) 100 } 101 102 func copyValues(dst ValueWriter, src ValueReader, buf []Value) (written int64, err error) { 103 if wt, ok := src.(ValueWriterTo); ok { 104 return wt.WriteValuesTo(dst) 105 } 106 107 if rf, ok := dst.(ValueReaderFrom); ok { 108 return rf.ReadValuesFrom(src) 109 } 110 111 if len(buf) == 0 { 112 buf = make([]Value, defaultValueBufferSize) 113 } 114 115 defer clearValues(buf) 116 117 for { 118 n, err := src.ReadValues(buf) 119 120 if n > 0 { 121 wn, werr := dst.WriteValues(buf[:n]) 122 written += int64(wn) 123 if werr != nil { 124 return written, werr 125 } 126 } 127 128 if err != nil { 129 if err == io.EOF { 130 err = nil 131 } 132 return written, err 133 } 134 135 if n == 0 { 136 return written, io.ErrNoProgress 137 } 138 } 139 } 140 141 // ValueOf constructs a parquet value from a Go value v. 142 // 143 // The physical type of the value is assumed from the Go type of v using the 144 // following conversion table: 145 // 146 // Go type | Parquet physical type 147 // ------- | --------------------- 148 // nil | NULL 149 // bool | BOOLEAN 150 // int8 | INT32 151 // int16 | INT32 152 // int32 | INT32 153 // int64 | INT64 154 // int | INT64 155 // uint8 | INT32 156 // uint16 | INT32 157 // uint32 | INT32 158 // uint64 | INT64 159 // uintptr | INT64 160 // float32 | FLOAT 161 // float64 | DOUBLE 162 // string | BYTE_ARRAY 163 // []byte | BYTE_ARRAY 164 // [*]byte | FIXED_LEN_BYTE_ARRAY 165 // 166 // When converting a []byte or [*]byte value, the underlying byte array is not 167 // copied; instead, the returned parquet value holds a reference to it. 168 // 169 // The repetition and definition levels of the returned value are both zero. 170 // 171 // The function panics if the Go value cannot be represented in parquet. 172 func ValueOf(v interface{}) Value { 173 k := Kind(-1) 174 t := reflect.TypeOf(v) 175 176 switch value := v.(type) { 177 case nil: 178 return Value{} 179 case uuid.UUID: 180 return makeValueBytes(FixedLenByteArray, value[:]) 181 case deprecated.Int96: 182 return makeValueInt96(value) 183 case time.Time: 184 k = Int64 185 } 186 187 switch t.Kind() { 188 case reflect.Bool: 189 k = Boolean 190 case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Uint8, reflect.Uint16, reflect.Uint32: 191 k = Int32 192 case reflect.Int64, reflect.Int, reflect.Uint64, reflect.Uint, reflect.Uintptr: 193 k = Int64 194 case reflect.Float32: 195 k = Float 196 case reflect.Float64: 197 k = Double 198 case reflect.String: 199 k = ByteArray 200 case reflect.Slice: 201 if t.Elem().Kind() == reflect.Uint8 { 202 k = ByteArray 203 } 204 case reflect.Array: 205 if t.Elem().Kind() == reflect.Uint8 { 206 k = FixedLenByteArray 207 } 208 } 209 210 if k < 0 { 211 panic("cannot create parquet value from go value of type " + t.String()) 212 } 213 214 return makeValue(k, nil, reflect.ValueOf(v)) 215 } 216 217 // NulLValue constructs a null value, which is the zero-value of the Value type. 218 func NullValue() Value { return Value{} } 219 220 // ZeroValue constructs a zero value of the given kind. 221 func ZeroValue(kind Kind) Value { return makeValueKind(kind) } 222 223 // BooleanValue constructs a BOOLEAN parquet value from the bool passed as 224 // argument. 225 func BooleanValue(value bool) Value { return makeValueBoolean(value) } 226 227 // Int32Value constructs a INT32 parquet value from the int32 passed as 228 // argument. 229 func Int32Value(value int32) Value { return makeValueInt32(value) } 230 231 // Int64Value constructs a INT64 parquet value from the int64 passed as 232 // argument. 233 func Int64Value(value int64) Value { return makeValueInt64(value) } 234 235 // Int96Value constructs a INT96 parquet value from the deprecated.Int96 passed 236 // as argument. 237 func Int96Value(value deprecated.Int96) Value { return makeValueInt96(value) } 238 239 // FloatValue constructs a FLOAT parquet value from the float32 passed as 240 // argument. 241 func FloatValue(value float32) Value { return makeValueFloat(value) } 242 243 // DoubleValue constructs a DOUBLE parquet value from the float64 passed as 244 // argument. 245 func DoubleValue(value float64) Value { return makeValueDouble(value) } 246 247 // ByteArrayValue constructs a BYTE_ARRAY parquet value from the byte slice 248 // passed as argument. 249 func ByteArrayValue(value []byte) Value { return makeValueBytes(ByteArray, value) } 250 251 // FixedLenByteArrayValue constructs a BYTE_ARRAY parquet value from the byte 252 // slice passed as argument. 253 func FixedLenByteArrayValue(value []byte) Value { return makeValueBytes(FixedLenByteArray, value) } 254 255 func makeValue(k Kind, lt *format.LogicalType, v reflect.Value) Value { 256 if v.Kind() == reflect.Interface { 257 if v.IsNil() { 258 return Value{} 259 } 260 if v = v.Elem(); v.Kind() == reflect.Pointer && v.IsNil() { 261 return Value{} 262 } 263 } 264 265 switch v.Type() { 266 case reflect.TypeOf(time.Time{}): 267 unit := Nanosecond.TimeUnit() 268 if lt != nil && lt.Timestamp != nil { 269 unit = lt.Timestamp.Unit 270 } 271 272 t := v.Interface().(time.Time) 273 var val int64 274 switch { 275 case unit.Millis != nil: 276 val = t.UnixMilli() 277 case unit.Micros != nil: 278 val = t.UnixMicro() 279 default: 280 val = t.UnixNano() 281 } 282 return makeValueInt64(val) 283 } 284 285 switch k { 286 case Boolean: 287 return makeValueBoolean(v.Bool()) 288 289 case Int32: 290 switch v.Kind() { 291 case reflect.Int8, reflect.Int16, reflect.Int32: 292 return makeValueInt32(int32(v.Int())) 293 case reflect.Uint8, reflect.Uint16, reflect.Uint32: 294 return makeValueInt32(int32(v.Uint())) 295 } 296 297 case Int64: 298 switch v.Kind() { 299 case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int: 300 return makeValueInt64(v.Int()) 301 case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint, reflect.Uintptr: 302 return makeValueUint64(v.Uint()) 303 } 304 305 case Int96: 306 switch v.Type() { 307 case reflect.TypeOf(deprecated.Int96{}): 308 return makeValueInt96(v.Interface().(deprecated.Int96)) 309 } 310 311 case Float: 312 switch v.Kind() { 313 case reflect.Float32: 314 return makeValueFloat(float32(v.Float())) 315 } 316 317 case Double: 318 switch v.Kind() { 319 case reflect.Float32, reflect.Float64: 320 return makeValueDouble(v.Float()) 321 } 322 323 case ByteArray: 324 switch v.Kind() { 325 case reflect.String: 326 return makeValueString(k, v.String()) 327 case reflect.Slice: 328 if v.Type().Elem().Kind() == reflect.Uint8 { 329 return makeValueBytes(k, v.Bytes()) 330 } 331 } 332 333 case FixedLenByteArray: 334 switch v.Kind() { 335 case reflect.String: // uuid 336 return makeValueString(k, v.String()) 337 case reflect.Array: 338 if v.Type().Elem().Kind() == reflect.Uint8 { 339 return makeValueFixedLenByteArray(v) 340 } 341 case reflect.Slice: 342 if v.Type().Elem().Kind() == reflect.Uint8 { 343 return makeValueBytes(k, v.Bytes()) 344 } 345 } 346 } 347 348 panic("cannot create parquet value of type " + k.String() + " from go value of type " + v.Type().String()) 349 } 350 351 func makeValueKind(kind Kind) Value { 352 return Value{kind: ^int8(kind)} 353 } 354 355 func makeValueBoolean(value bool) Value { 356 v := Value{kind: ^int8(Boolean)} 357 if value { 358 v.u64 = 1 359 } 360 return v 361 } 362 363 func makeValueInt32(value int32) Value { 364 return Value{ 365 kind: ^int8(Int32), 366 u64: uint64(value), 367 } 368 } 369 370 func makeValueInt64(value int64) Value { 371 return Value{ 372 kind: ^int8(Int64), 373 u64: uint64(value), 374 } 375 } 376 377 func makeValueInt96(value deprecated.Int96) Value { 378 // TODO: this is highly inefficient because we need a heap allocation to 379 // store the value; we don't expect INT96 to be used frequently since it 380 // is a deprecated feature of parquet, and it helps keep the Value type 381 // compact for all the other more common cases. 382 bits := [12]byte{} 383 binary.LittleEndian.PutUint32(bits[0:4], value[0]) 384 binary.LittleEndian.PutUint32(bits[4:8], value[1]) 385 binary.LittleEndian.PutUint32(bits[8:12], value[2]) 386 return Value{ 387 kind: ^int8(Int96), 388 ptr: &bits[0], 389 u64: 12, // set the length so we can use the ByteArray method 390 } 391 } 392 393 func makeValueUint32(value uint32) Value { 394 return Value{ 395 kind: ^int8(Int32), 396 u64: uint64(value), 397 } 398 } 399 400 func makeValueUint64(value uint64) Value { 401 return Value{ 402 kind: ^int8(Int64), 403 u64: value, 404 } 405 } 406 407 func makeValueFloat(value float32) Value { 408 return Value{ 409 kind: ^int8(Float), 410 u64: uint64(math.Float32bits(value)), 411 } 412 } 413 414 func makeValueDouble(value float64) Value { 415 return Value{ 416 kind: ^int8(Double), 417 u64: math.Float64bits(value), 418 } 419 } 420 421 func makeValueBytes(kind Kind, value []byte) Value { 422 return makeValueByteArray(kind, unsafecast.AddressOfBytes(value), len(value)) 423 } 424 425 func makeValueString(kind Kind, value string) Value { 426 return makeValueByteArray(kind, unsafecast.AddressOfString(value), len(value)) 427 } 428 429 func makeValueFixedLenByteArray(v reflect.Value) Value { 430 t := v.Type() 431 // When the array is addressable, we take advantage of this 432 // condition to avoid the heap allocation otherwise needed 433 // to pack the reference into an interface{} value. 434 if v.CanAddr() { 435 v = v.Addr() 436 } else { 437 u := reflect.New(t) 438 u.Elem().Set(v) 439 v = u 440 } 441 return makeValueByteArray(FixedLenByteArray, (*byte)(v.UnsafePointer()), t.Len()) 442 } 443 444 func makeValueByteArray(kind Kind, data *byte, size int) Value { 445 return Value{ 446 kind: ^int8(kind), 447 ptr: data, 448 u64: uint64(size), 449 } 450 } 451 452 // These methods are internal versions of methods exported by the Value type, 453 // they are usually inlined by the compiler and intended to be used inside the 454 // parquet-go package because they tend to generate better code than their 455 // exported counter part, which requires making a copy of the receiver. 456 func (v *Value) isNull() bool { return v.kind == 0 } 457 func (v *Value) byte() byte { return byte(v.u64) } 458 func (v *Value) boolean() bool { return v.u64 != 0 } 459 func (v *Value) int32() int32 { return int32(v.u64) } 460 func (v *Value) int64() int64 { return int64(v.u64) } 461 func (v *Value) int96() deprecated.Int96 { return makeInt96(v.byteArray()) } 462 func (v *Value) float() float32 { return math.Float32frombits(uint32(v.u64)) } 463 func (v *Value) double() float64 { return math.Float64frombits(uint64(v.u64)) } 464 func (v *Value) uint32() uint32 { return uint32(v.u64) } 465 func (v *Value) uint64() uint64 { return v.u64 } 466 func (v *Value) byteArray() []byte { return unsafecast.Bytes(v.ptr, int(v.u64)) } 467 func (v *Value) string() string { return unsafecast.BytesToString(v.byteArray()) } 468 func (v *Value) be128() *[16]byte { return (*[16]byte)(unsafe.Pointer(v.ptr)) } 469 func (v *Value) column() int { return int(^v.columnIndex) } 470 471 func (v Value) convertToBoolean(x bool) Value { 472 v.kind = ^int8(Boolean) 473 v.ptr = nil 474 v.u64 = 0 475 if x { 476 v.u64 = 1 477 } 478 return v 479 } 480 481 func (v Value) convertToInt32(x int32) Value { 482 v.kind = ^int8(Int32) 483 v.ptr = nil 484 v.u64 = uint64(x) 485 return v 486 } 487 488 func (v Value) convertToInt64(x int64) Value { 489 v.kind = ^int8(Int64) 490 v.ptr = nil 491 v.u64 = uint64(x) 492 return v 493 } 494 495 func (v Value) convertToInt96(x deprecated.Int96) Value { 496 i96 := makeValueInt96(x) 497 v.kind = i96.kind 498 v.ptr = i96.ptr 499 v.u64 = i96.u64 500 return v 501 } 502 503 func (v Value) convertToFloat(x float32) Value { 504 v.kind = ^int8(Float) 505 v.ptr = nil 506 v.u64 = uint64(math.Float32bits(x)) 507 return v 508 } 509 510 func (v Value) convertToDouble(x float64) Value { 511 v.kind = ^int8(Double) 512 v.ptr = nil 513 v.u64 = math.Float64bits(x) 514 return v 515 } 516 517 func (v Value) convertToByteArray(x []byte) Value { 518 v.kind = ^int8(ByteArray) 519 v.ptr = unsafecast.AddressOfBytes(x) 520 v.u64 = uint64(len(x)) 521 return v 522 } 523 524 func (v Value) convertToFixedLenByteArray(x []byte) Value { 525 v.kind = ^int8(FixedLenByteArray) 526 v.ptr = unsafecast.AddressOfBytes(x) 527 v.u64 = uint64(len(x)) 528 return v 529 } 530 531 // Kind returns the kind of v, which represents its parquet physical type. 532 func (v Value) Kind() Kind { return ^Kind(v.kind) } 533 534 // IsNull returns true if v is the null value. 535 func (v Value) IsNull() bool { return v.isNull() } 536 537 // Byte returns v as a byte, which may truncate the underlying byte. 538 func (v Value) Byte() byte { return v.byte() } 539 540 // Boolean returns v as a bool, assuming the underlying type is BOOLEAN. 541 func (v Value) Boolean() bool { return v.boolean() } 542 543 // Int32 returns v as a int32, assuming the underlying type is INT32. 544 func (v Value) Int32() int32 { return v.int32() } 545 546 // Int64 returns v as a int64, assuming the underlying type is INT64. 547 func (v Value) Int64() int64 { return v.int64() } 548 549 // Int96 returns v as a int96, assuming the underlying type is INT96. 550 func (v Value) Int96() deprecated.Int96 { 551 var val deprecated.Int96 552 if !v.isNull() { 553 val = v.int96() 554 } 555 return val 556 } 557 558 // Float returns v as a float32, assuming the underlying type is FLOAT. 559 func (v Value) Float() float32 { return v.float() } 560 561 // Double returns v as a float64, assuming the underlying type is DOUBLE. 562 func (v Value) Double() float64 { return v.double() } 563 564 // Uint32 returns v as a uint32, assuming the underlying type is INT32. 565 func (v Value) Uint32() uint32 { return v.uint32() } 566 567 // Uint64 returns v as a uint64, assuming the underlying type is INT64. 568 func (v Value) Uint64() uint64 { return v.uint64() } 569 570 // ByteArray returns v as a []byte, assuming the underlying type is either 571 // BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY. 572 // 573 // The application must treat the returned byte slice as a read-only value, 574 // mutating the content will result in undefined behaviors. 575 func (v Value) ByteArray() []byte { return v.byteArray() } 576 577 // RepetitionLevel returns the repetition level of v. 578 func (v Value) RepetitionLevel() int { return int(v.repetitionLevel) } 579 580 // DefinitionLevel returns the definition level of v. 581 func (v Value) DefinitionLevel() int { return int(v.definitionLevel) } 582 583 // Column returns the column index within the row that v was created from. 584 // 585 // Returns -1 if the value does not carry a column index. 586 func (v Value) Column() int { return v.column() } 587 588 // Bytes returns the binary representation of v. 589 // 590 // If v is the null value, an nil byte slice is returned. 591 func (v Value) Bytes() []byte { 592 switch v.Kind() { 593 case Boolean: 594 buf := [8]byte{} 595 binary.LittleEndian.PutUint32(buf[:4], v.uint32()) 596 return buf[0:1] 597 case Int32, Float: 598 buf := [8]byte{} 599 binary.LittleEndian.PutUint32(buf[:4], v.uint32()) 600 return buf[:4] 601 case Int64, Double: 602 buf := [8]byte{} 603 binary.LittleEndian.PutUint64(buf[:8], v.uint64()) 604 return buf[:8] 605 case ByteArray, FixedLenByteArray, Int96: 606 return v.byteArray() 607 default: 608 return nil 609 } 610 } 611 612 // AppendBytes appends the binary representation of v to b. 613 // 614 // If v is the null value, b is returned unchanged. 615 func (v Value) AppendBytes(b []byte) []byte { 616 buf := [8]byte{} 617 switch v.Kind() { 618 case Boolean: 619 binary.LittleEndian.PutUint32(buf[:4], v.uint32()) 620 return append(b, buf[0]) 621 case Int32, Float: 622 binary.LittleEndian.PutUint32(buf[:4], v.uint32()) 623 return append(b, buf[:4]...) 624 case Int64, Double: 625 binary.LittleEndian.PutUint64(buf[:8], v.uint64()) 626 return append(b, buf[:8]...) 627 case ByteArray, FixedLenByteArray, Int96: 628 return append(b, v.byteArray()...) 629 default: 630 return b 631 } 632 } 633 634 // Format outputs a human-readable representation of v to w, using r as the 635 // formatting verb to describe how the value should be printed. 636 // 637 // The following formatting options are supported: 638 // 639 // %c prints the column index 640 // %+c prints the column index, prefixed with "C:" 641 // %d prints the definition level 642 // %+d prints the definition level, prefixed with "D:" 643 // %r prints the repetition level 644 // %+r prints the repetition level, prefixed with "R:" 645 // %q prints the quoted representation of v 646 // %+q prints the quoted representation of v, prefixed with "V:" 647 // %s prints the string representation of v 648 // %+s prints the string representation of v, prefixed with "V:" 649 // %v same as %s 650 // %+v prints a verbose representation of v 651 // %#v prints a Go value representation of v 652 // 653 // Format satisfies the fmt.Formatter interface. 654 func (v Value) Format(w fmt.State, r rune) { 655 switch r { 656 case 'c': 657 if w.Flag('+') { 658 io.WriteString(w, "C:") 659 } 660 fmt.Fprint(w, v.column()) 661 662 case 'd': 663 if w.Flag('+') { 664 io.WriteString(w, "D:") 665 } 666 fmt.Fprint(w, v.definitionLevel) 667 668 case 'r': 669 if w.Flag('+') { 670 io.WriteString(w, "R:") 671 } 672 fmt.Fprint(w, v.repetitionLevel) 673 674 case 'q': 675 if w.Flag('+') { 676 io.WriteString(w, "V:") 677 } 678 switch v.Kind() { 679 case ByteArray, FixedLenByteArray: 680 fmt.Fprintf(w, "%q", v.byteArray()) 681 default: 682 fmt.Fprintf(w, `"%s"`, v) 683 } 684 685 case 's': 686 if w.Flag('+') { 687 io.WriteString(w, "V:") 688 } 689 switch v.Kind() { 690 case Boolean: 691 fmt.Fprint(w, v.boolean()) 692 case Int32: 693 fmt.Fprint(w, v.int32()) 694 case Int64: 695 fmt.Fprint(w, v.int64()) 696 case Int96: 697 fmt.Fprint(w, v.int96()) 698 case Float: 699 fmt.Fprint(w, v.float()) 700 case Double: 701 fmt.Fprint(w, v.double()) 702 case ByteArray, FixedLenByteArray: 703 w.Write(v.byteArray()) 704 default: 705 io.WriteString(w, "<null>") 706 } 707 708 case 'v': 709 switch { 710 case w.Flag('+'): 711 fmt.Fprintf(w, "%+[1]c %+[1]d %+[1]r %+[1]s", v) 712 case w.Flag('#'): 713 v.formatGoString(w) 714 default: 715 v.Format(w, 's') 716 } 717 } 718 } 719 720 func (v Value) formatGoString(w fmt.State) { 721 io.WriteString(w, "parquet.") 722 switch v.Kind() { 723 case Boolean: 724 fmt.Fprintf(w, "BooleanValue(%t)", v.boolean()) 725 case Int32: 726 fmt.Fprintf(w, "Int32Value(%d)", v.int32()) 727 case Int64: 728 fmt.Fprintf(w, "Int64Value(%d)", v.int64()) 729 case Int96: 730 fmt.Fprintf(w, "Int96Value(%#v)", v.int96()) 731 case Float: 732 fmt.Fprintf(w, "FloatValue(%g)", v.float()) 733 case Double: 734 fmt.Fprintf(w, "DoubleValue(%g)", v.double()) 735 case ByteArray: 736 fmt.Fprintf(w, "ByteArrayValue(%q)", v.byteArray()) 737 case FixedLenByteArray: 738 fmt.Fprintf(w, "FixedLenByteArrayValue(%#v)", v.byteArray()) 739 default: 740 io.WriteString(w, "Value{}") 741 return 742 } 743 fmt.Fprintf(w, ".Level(%d,%d,%d)", 744 v.RepetitionLevel(), 745 v.DefinitionLevel(), 746 v.Column(), 747 ) 748 } 749 750 // String returns a string representation of v. 751 func (v Value) String() string { 752 switch v.Kind() { 753 case Boolean: 754 return strconv.FormatBool(v.boolean()) 755 case Int32: 756 return strconv.FormatInt(int64(v.int32()), 10) 757 case Int64: 758 return strconv.FormatInt(v.int64(), 10) 759 case Int96: 760 return v.Int96().String() 761 case Float: 762 return strconv.FormatFloat(float64(v.float()), 'g', -1, 32) 763 case Double: 764 return strconv.FormatFloat(v.double(), 'g', -1, 32) 765 case ByteArray, FixedLenByteArray: 766 return string(v.byteArray()) 767 default: 768 return "<null>" 769 } 770 } 771 772 // GoString returns a Go value string representation of v. 773 func (v Value) GoString() string { return fmt.Sprintf("%#v", v) } 774 775 // Level returns v with the repetition level, definition level, and column index 776 // set to the values passed as arguments. 777 // 778 // The method panics if either argument is negative. 779 func (v Value) Level(repetitionLevel, definitionLevel, columnIndex int) Value { 780 v.repetitionLevel = makeRepetitionLevel(repetitionLevel) 781 v.definitionLevel = makeDefinitionLevel(definitionLevel) 782 v.columnIndex = ^makeColumnIndex(columnIndex) 783 return v 784 } 785 786 // Clone returns a copy of v which does not share any pointers with it. 787 func (v Value) Clone() Value { 788 switch k := v.Kind(); k { 789 case ByteArray, FixedLenByteArray: 790 v.ptr = unsafecast.AddressOfBytes(copyBytes(v.byteArray())) 791 } 792 return v 793 } 794 795 func makeInt96(bits []byte) (i96 deprecated.Int96) { 796 return deprecated.Int96{ 797 2: binary.LittleEndian.Uint32(bits[8:12]), 798 1: binary.LittleEndian.Uint32(bits[4:8]), 799 0: binary.LittleEndian.Uint32(bits[0:4]), 800 } 801 } 802 803 func parseValue(kind Kind, data []byte) (val Value, err error) { 804 switch kind { 805 case Boolean: 806 if len(data) == 1 { 807 val = makeValueBoolean(data[0] != 0) 808 } 809 case Int32: 810 if len(data) == 4 { 811 val = makeValueInt32(int32(binary.LittleEndian.Uint32(data))) 812 } 813 case Int64: 814 if len(data) == 8 { 815 val = makeValueInt64(int64(binary.LittleEndian.Uint64(data))) 816 } 817 case Int96: 818 if len(data) == 12 { 819 val = makeValueInt96(makeInt96(data)) 820 } 821 case Float: 822 if len(data) == 4 { 823 val = makeValueFloat(float32(math.Float32frombits(binary.LittleEndian.Uint32(data)))) 824 } 825 case Double: 826 if len(data) == 8 { 827 val = makeValueDouble(float64(math.Float64frombits(binary.LittleEndian.Uint64(data)))) 828 } 829 case ByteArray, FixedLenByteArray: 830 val = makeValueBytes(kind, data) 831 } 832 if val.isNull() { 833 err = fmt.Errorf("cannot decode %s value from input of length %d", kind, len(data)) 834 } 835 return val, err 836 } 837 838 func copyBytes(b []byte) []byte { 839 c := make([]byte, len(b)) 840 copy(c, b) 841 return c 842 } 843 844 // Equal returns true if v1 and v2 are equal. 845 // 846 // Values are considered equal if they are of the same physical type and hold 847 // the same Go values. For BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY, the content of 848 // the underlying byte arrays are tested for equality. 849 // 850 // Note that the repetition levels, definition levels, and column indexes are 851 // not compared by this function, use DeepEqual instead. 852 func Equal(v1, v2 Value) bool { 853 if v1.kind != v2.kind { 854 return false 855 } 856 switch ^Kind(v1.kind) { 857 case Boolean: 858 return v1.boolean() == v2.boolean() 859 case Int32: 860 return v1.int32() == v2.int32() 861 case Int64: 862 return v1.int64() == v2.int64() 863 case Int96: 864 return v1.int96() == v2.int96() 865 case Float: 866 return v1.float() == v2.float() 867 case Double: 868 return v1.double() == v2.double() 869 case ByteArray, FixedLenByteArray: 870 return bytes.Equal(v1.byteArray(), v2.byteArray()) 871 case -1: // null 872 return true 873 default: 874 return false 875 } 876 } 877 878 // DeepEqual returns true if v1 and v2 are equal, including their repetition 879 // levels, definition levels, and column indexes. 880 // 881 // See Equal for details about how value equality is determined. 882 func DeepEqual(v1, v2 Value) bool { 883 return Equal(v1, v2) && 884 v1.repetitionLevel == v2.repetitionLevel && 885 v1.definitionLevel == v2.definitionLevel && 886 v1.columnIndex == v2.columnIndex 887 } 888 889 var ( 890 _ fmt.Formatter = Value{} 891 _ fmt.Stringer = Value{} 892 ) 893 894 func clearValues(values []Value) { 895 for i := range values { 896 values[i] = Value{} 897 } 898 } 899 900 // BooleanReader is an interface implemented by ValueReader instances which 901 // expose the content of a column of boolean values. 902 type BooleanReader interface { 903 // Read boolean values into the buffer passed as argument. 904 // 905 // The method returns io.EOF when all values have been read. 906 ReadBooleans(values []bool) (int, error) 907 } 908 909 // BooleanWriter is an interface implemented by ValueWriter instances which 910 // support writing columns of boolean values. 911 type BooleanWriter interface { 912 // Write boolean values. 913 // 914 // The method returns the number of values written, and any error that 915 // occurred while writing the values. 916 WriteBooleans(values []bool) (int, error) 917 } 918 919 // Int32Reader is an interface implemented by ValueReader instances which expose 920 // the content of a column of int32 values. 921 type Int32Reader interface { 922 // Read 32 bits integer values into the buffer passed as argument. 923 // 924 // The method returns io.EOF when all values have been read. 925 ReadInt32s(values []int32) (int, error) 926 } 927 928 // Int32Writer is an interface implemented by ValueWriter instances which 929 // support writing columns of 32 bits signed integer values. 930 type Int32Writer interface { 931 // Write 32 bits signed integer values. 932 // 933 // The method returns the number of values written, and any error that 934 // occurred while writing the values. 935 WriteInt32s(values []int32) (int, error) 936 } 937 938 // Int64Reader is an interface implemented by ValueReader instances which expose 939 // the content of a column of int64 values. 940 type Int64Reader interface { 941 // Read 64 bits integer values into the buffer passed as argument. 942 // 943 // The method returns io.EOF when all values have been read. 944 ReadInt64s(values []int64) (int, error) 945 } 946 947 // Int64Writer is an interface implemented by ValueWriter instances which 948 // support writing columns of 64 bits signed integer values. 949 type Int64Writer interface { 950 // Write 64 bits signed integer values. 951 // 952 // The method returns the number of values written, and any error that 953 // occurred while writing the values. 954 WriteInt64s(values []int64) (int, error) 955 } 956 957 // Int96Reader is an interface implemented by ValueReader instances which expose 958 // the content of a column of int96 values. 959 type Int96Reader interface { 960 // Read 96 bits integer values into the buffer passed as argument. 961 // 962 // The method returns io.EOF when all values have been read. 963 ReadInt96s(values []deprecated.Int96) (int, error) 964 } 965 966 // Int96Writer is an interface implemented by ValueWriter instances which 967 // support writing columns of 96 bits signed integer values. 968 type Int96Writer interface { 969 // Write 96 bits signed integer values. 970 // 971 // The method returns the number of values written, and any error that 972 // occurred while writing the values. 973 WriteInt96s(values []deprecated.Int96) (int, error) 974 } 975 976 // FloatReader is an interface implemented by ValueReader instances which expose 977 // the content of a column of single-precision floating point values. 978 type FloatReader interface { 979 // Read single-precision floating point values into the buffer passed as 980 // argument. 981 // 982 // The method returns io.EOF when all values have been read. 983 ReadFloats(values []float32) (int, error) 984 } 985 986 // FloatWriter is an interface implemented by ValueWriter instances which 987 // support writing columns of single-precision floating point values. 988 type FloatWriter interface { 989 // Write single-precision floating point values. 990 // 991 // The method returns the number of values written, and any error that 992 // occurred while writing the values. 993 WriteFloats(values []float32) (int, error) 994 } 995 996 // DoubleReader is an interface implemented by ValueReader instances which 997 // expose the content of a column of double-precision float point values. 998 type DoubleReader interface { 999 // Read double-precision floating point values into the buffer passed as 1000 // argument. 1001 // 1002 // The method returns io.EOF when all values have been read. 1003 ReadDoubles(values []float64) (int, error) 1004 } 1005 1006 // DoubleWriter is an interface implemented by ValueWriter instances which 1007 // support writing columns of double-precision floating point values. 1008 type DoubleWriter interface { 1009 // Write double-precision floating point values. 1010 // 1011 // The method returns the number of values written, and any error that 1012 // occurred while writing the values. 1013 WriteDoubles(values []float64) (int, error) 1014 } 1015 1016 // ByteArrayReader is an interface implemented by ValueReader instances which 1017 // expose the content of a column of variable length byte array values. 1018 type ByteArrayReader interface { 1019 // Read values into the byte buffer passed as argument, returning the number 1020 // of values written to the buffer (not the number of bytes). Values are 1021 // written using the PLAIN encoding, each byte array prefixed with its 1022 // length encoded as a 4 bytes little endian unsigned integer. 1023 // 1024 // The method returns io.EOF when all values have been read. 1025 // 1026 // If the buffer was not empty, but too small to hold at least one value, 1027 // io.ErrShortBuffer is returned. 1028 ReadByteArrays(values []byte) (int, error) 1029 } 1030 1031 // ByteArrayWriter is an interface implemented by ValueWriter instances which 1032 // support writing columns of variable length byte array values. 1033 type ByteArrayWriter interface { 1034 // Write variable length byte array values. 1035 // 1036 // The values passed as input must be laid out using the PLAIN encoding, 1037 // with each byte array prefixed with the four bytes little endian unsigned 1038 // integer length. 1039 // 1040 // The method returns the number of values written to the underlying column 1041 // (not the number of bytes), or any error that occurred while attempting to 1042 // write the values. 1043 WriteByteArrays(values []byte) (int, error) 1044 } 1045 1046 // FixedLenByteArrayReader is an interface implemented by ValueReader instances 1047 // which expose the content of a column of fixed length byte array values. 1048 type FixedLenByteArrayReader interface { 1049 // Read values into the byte buffer passed as argument, returning the number 1050 // of values written to the buffer (not the number of bytes). 1051 // 1052 // The method returns io.EOF when all values have been read. 1053 // 1054 // If the buffer was not empty, but too small to hold at least one value, 1055 // io.ErrShortBuffer is returned. 1056 ReadFixedLenByteArrays(values []byte) (int, error) 1057 } 1058 1059 // FixedLenByteArrayWriter is an interface implemented by ValueWriter instances 1060 // which support writing columns of fixed length byte array values. 1061 type FixedLenByteArrayWriter interface { 1062 // Writes the fixed length byte array values. 1063 // 1064 // The size of the values is assumed to be the same as the expected size of 1065 // items in the column. The method errors if the length of the input values 1066 // is not a multiple of the expected item size. 1067 WriteFixedLenByteArrays(values []byte) (int, error) 1068 }