github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/val/tuple_descriptor.go (about) 1 // Copyright 2021 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package val 16 17 import ( 18 "encoding/hex" 19 "fmt" 20 "os" 21 "strconv" 22 "strings" 23 "time" 24 25 "github.com/shopspring/decimal" 26 27 "github.com/dolthub/dolt/go/libraries/doltcore/dconfig" 28 "github.com/dolthub/dolt/go/store/hash" 29 ) 30 31 func init() { 32 if v := os.Getenv(dconfig.EnvDisableFixedAccess); v != "" { 33 disableFixedAccess = true 34 } 35 } 36 37 // disableFixedAccess disables fast-access optimizations for 38 // not-null, fixed-width tuple values. See |makeFixedAccess|. 39 var disableFixedAccess = false 40 41 // TupleDesc describes a Tuple set. 42 // Data structures that contain Tuples and algorithms that process Tuples 43 // use a TupleDesc's types to interpret the fields of a Tuple. 44 type TupleDesc struct { 45 Types []Type 46 Handlers []TupleTypeHandler 47 cmp TupleComparator 48 fast FixedAccess 49 } 50 51 // TupleTypeHandler is used to specifically handle types that use extended encoding. Such types are declared by GMS, and 52 // this is a forward reference for the interface functions that are necessary here. 53 type TupleTypeHandler interface { 54 // SerializedCompare compares two byte slices that each represent a serialized value, without first deserializing 55 // the value. 56 SerializedCompare(v1 []byte, v2 []byte) (int, error) 57 // SerializeValue converts the given value into a binary representation. 58 SerializeValue(val any) ([]byte, error) 59 // DeserializeValue converts a binary representation of a value into its canonical type. 60 DeserializeValue(val []byte) (any, error) 61 // FormatValue returns a string version of the value. Primarily intended for display. 62 FormatValue(val any) (string, error) 63 } 64 65 // TupleDescriptorArgs are a set of optional arguments for TupleDesc creation. 66 type TupleDescriptorArgs struct { 67 Comparator TupleComparator 68 Handlers []TupleTypeHandler 69 } 70 71 // NewTupleDescriptor makes a TupleDescriptor from |types|. 72 func NewTupleDescriptor(types ...Type) TupleDesc { 73 return NewTupleDescriptorWithArgs(TupleDescriptorArgs{}, types...) 74 } 75 76 // NewTupleDescriptorWithArgs returns a TupleDesc based on the given arguments. 77 func NewTupleDescriptorWithArgs(args TupleDescriptorArgs, types ...Type) (td TupleDesc) { 78 if len(types) > MaxTupleFields { 79 panic("tuple field maxIdx exceeds maximum") 80 } 81 for _, typ := range types { 82 if typ.Enc == NullEnc { 83 panic("invalid encoding") 84 } 85 } 86 if args.Comparator == nil { 87 args.Comparator = DefaultTupleComparator{} 88 } 89 args.Comparator = ExtendedTupleComparator{args.Comparator, args.Handlers}.Validated(types) 90 91 td = TupleDesc{ 92 Types: types, 93 Handlers: args.Handlers, 94 cmp: args.Comparator, 95 fast: makeFixedAccess(types), 96 } 97 return 98 } 99 100 func IterAddressFields(td TupleDesc, cb func(int, Type)) { 101 for i, typ := range td.Types { 102 switch typ.Enc { 103 case BytesAddrEnc, StringAddrEnc, 104 JSONAddrEnc, CommitAddrEnc, GeomAddrEnc: 105 cb(i, typ) 106 } 107 } 108 } 109 110 type FixedAccess [][2]ByteSize 111 112 func makeFixedAccess(types []Type) (acc FixedAccess) { 113 if disableFixedAccess { 114 return nil 115 } 116 117 acc = make(FixedAccess, 0, len(types)) 118 119 off := ByteSize(0) 120 for _, typ := range types { 121 if typ.Nullable { 122 break 123 } 124 sz, ok := sizeFromType(typ) 125 if !ok { 126 break 127 } 128 acc = append(acc, [2]ByteSize{off, off + sz}) 129 off += sz 130 } 131 return 132 } 133 134 func (td TupleDesc) AddressFieldCount() (n int) { 135 IterAddressFields(td, func(int, Type) { 136 n++ 137 }) 138 return 139 } 140 141 // PrefixDesc returns a descriptor for the first n types. 142 func (td TupleDesc) PrefixDesc(n int) TupleDesc { 143 if len(td.Handlers) == 0 { 144 return NewTupleDescriptorWithArgs(TupleDescriptorArgs{Comparator: td.cmp.Prefix(n)}, td.Types[:n]...) 145 } 146 return NewTupleDescriptorWithArgs(TupleDescriptorArgs{Comparator: td.cmp.Prefix(n), Handlers: td.Handlers[:n]}, td.Types[:n]...) 147 } 148 149 // GetField returns the ith field of |tup|. 150 func (td TupleDesc) GetField(i int, tup Tuple) []byte { 151 if i < len(td.fast) { 152 start, stop := td.fast[i][0], td.fast[i][1] 153 return tup[start:stop] 154 } 155 return tup.GetField(i) 156 } 157 158 // Compare compares |left| and |right|. 159 func (td TupleDesc) Compare(left, right Tuple) (cmp int) { 160 return td.cmp.Compare(left, right, td) 161 } 162 163 // CompareField compares |value| with the ith field of |tup|. 164 func (td TupleDesc) CompareField(value []byte, i int, tup Tuple) (cmp int) { 165 var v []byte 166 if i < len(td.fast) { 167 start, stop := td.fast[i][0], td.fast[i][1] 168 v = tup[start:stop] 169 } else { 170 v = tup.GetField(i) 171 } 172 return td.cmp.CompareValues(i, value, v, td.Types[i]) 173 } 174 175 // Comparator returns the TupleDescriptor's TupleComparator. 176 func (td TupleDesc) Comparator() TupleComparator { 177 return td.cmp 178 } 179 180 // Count returns the number of fields in the TupleDesc. 181 func (td TupleDesc) Count() int { 182 return len(td.Types) 183 } 184 185 // IsNull returns true if the ith field of the Tuple is NULL. 186 func (td TupleDesc) IsNull(i int, tup Tuple) bool { 187 b := td.GetField(i, tup) 188 return b == nil 189 } 190 191 func (td TupleDesc) HasNulls(tup Tuple) bool { 192 if tup.Count() < td.Count() { 193 return true 194 } 195 for i := range td.Types { 196 if tup.FieldIsNull(i) { 197 return true 198 } 199 } 200 return false 201 } 202 203 // GetFixedAccess returns the FixedAccess for this tuple descriptor. 204 func (td TupleDesc) GetFixedAccess() FixedAccess { 205 return td.fast 206 } 207 208 // WithoutFixedAccess returns a copy of |td| without fixed access metadata. 209 func (td TupleDesc) WithoutFixedAccess() TupleDesc { 210 return TupleDesc{Types: td.Types, Handlers: td.Handlers, cmp: td.cmp} 211 } 212 213 // GetBool reads a bool from the ith field of the Tuple. 214 // If the ith field is NULL, |ok| is set to false. 215 func (td TupleDesc) GetBool(i int, tup Tuple) (v bool, ok bool) { 216 td.expectEncoding(i, Int8Enc) 217 b := td.GetField(i, tup) 218 if b != nil { 219 v, ok = readBool(b), true 220 } 221 return 222 } 223 224 // GetInt8 reads an int8 from the ith field of the Tuple. 225 // If the ith field is NULL, |ok| is set to false. 226 func (td TupleDesc) GetInt8(i int, tup Tuple) (v int8, ok bool) { 227 td.expectEncoding(i, Int8Enc) 228 b := td.GetField(i, tup) 229 if b != nil { 230 v, ok = readInt8(b), true 231 } 232 return 233 } 234 235 // GetUint8 reads a uint8 from the ith field of the Tuple. 236 // If the ith field is NULL, |ok| is set to false. 237 func (td TupleDesc) GetUint8(i int, tup Tuple) (v uint8, ok bool) { 238 td.expectEncoding(i, Uint8Enc) 239 b := td.GetField(i, tup) 240 if b != nil { 241 v, ok = readUint8(b), true 242 } 243 return 244 } 245 246 // GetInt16 reads an int16 from the ith field of the Tuple. 247 // If the ith field is NULL, |ok| is set to false. 248 func (td TupleDesc) GetInt16(i int, tup Tuple) (v int16, ok bool) { 249 td.expectEncoding(i, Int16Enc) 250 b := td.GetField(i, tup) 251 if b != nil { 252 v, ok = readInt16(b), true 253 } 254 return 255 } 256 257 // GetUint16 reads a uint16 from the ith field of the Tuple. 258 // If the ith field is NULL, |ok| is set to false. 259 func (td TupleDesc) GetUint16(i int, tup Tuple) (v uint16, ok bool) { 260 td.expectEncoding(i, Uint16Enc) 261 b := td.GetField(i, tup) 262 if b != nil { 263 v, ok = ReadUint16(b), true 264 } 265 return 266 } 267 268 // GetInt32 reads an int32 from the ith field of the Tuple. 269 // If the ith field is NULL, |ok| is set to false. 270 func (td TupleDesc) GetInt32(i int, tup Tuple) (v int32, ok bool) { 271 td.expectEncoding(i, Int32Enc) 272 b := td.GetField(i, tup) 273 if b != nil { 274 v, ok = readInt32(b), true 275 } 276 return 277 } 278 279 // GetUint32 reads a uint32 from the ith field of the Tuple. 280 // If the ith field is NULL, |ok| is set to false. 281 func (td TupleDesc) GetUint32(i int, tup Tuple) (v uint32, ok bool) { 282 td.expectEncoding(i, Uint32Enc) 283 b := td.GetField(i, tup) 284 if b != nil { 285 v, ok = readUint32(b), true 286 } 287 return 288 } 289 290 // GetInt64 reads an int64 from the ith field of the Tuple. 291 // If the ith field is NULL, |ok| is set to false. 292 func (td TupleDesc) GetInt64(i int, tup Tuple) (v int64, ok bool) { 293 td.expectEncoding(i, Int64Enc) 294 b := td.GetField(i, tup) 295 if b != nil { 296 v, ok = readInt64(b), true 297 } 298 return 299 } 300 301 // GetUint64 reads a uint64 from the ith field of the Tuple. 302 // If the ith field is NULL, |ok| is set to false. 303 func (td TupleDesc) GetUint64(i int, tup Tuple) (v uint64, ok bool) { 304 td.expectEncoding(i, Uint64Enc) 305 b := td.GetField(i, tup) 306 if b != nil { 307 v, ok = readUint64(b), true 308 } 309 return 310 } 311 312 // GetFloat32 reads a float32 from the ith field of the Tuple. 313 // If the ith field is NULL, |ok| is set to false. 314 func (td TupleDesc) GetFloat32(i int, tup Tuple) (v float32, ok bool) { 315 td.expectEncoding(i, Float32Enc) 316 b := td.GetField(i, tup) 317 if b != nil { 318 v, ok = readFloat32(b), true 319 } 320 return 321 } 322 323 // GetFloat64 reads a float64 from the ith field of the Tuple. 324 // If the ith field is NULL, |ok| is set to false. 325 func (td TupleDesc) GetFloat64(i int, tup Tuple) (v float64, ok bool) { 326 td.expectEncoding(i, Float64Enc) 327 b := td.GetField(i, tup) 328 if b != nil { 329 v, ok = readFloat64(b), true 330 } 331 return 332 } 333 334 // GetBit reads a uint64 from the ith field of the Tuple. 335 // If the ith field is NULL, |ok| is set to false. 336 func (td TupleDesc) GetBit(i int, tup Tuple) (v uint64, ok bool) { 337 td.expectEncoding(i, Bit64Enc) 338 b := td.GetField(i, tup) 339 if b != nil { 340 v, ok = readBit64(b), true 341 } 342 return 343 } 344 345 // GetDecimal reads a float64 from the ith field of the Tuple. 346 // If the ith field is NULL, |ok| is set to false. 347 func (td TupleDesc) GetDecimal(i int, tup Tuple) (v decimal.Decimal, ok bool) { 348 td.expectEncoding(i, DecimalEnc) 349 b := td.GetField(i, tup) 350 if b != nil { 351 v, ok = readDecimal(b), true 352 } 353 return 354 } 355 356 // GetYear reads an int16 from the ith field of the Tuple. 357 // If the ith field is NULL, |ok| is set to false. 358 func (td TupleDesc) GetYear(i int, tup Tuple) (v int16, ok bool) { 359 td.expectEncoding(i, YearEnc) 360 b := td.GetField(i, tup) 361 if b != nil { 362 v, ok = readYear(b), true 363 } 364 return 365 } 366 367 // GetDate reads a time.Time from the ith field of the Tuple. 368 // If the ith field is NULL, |ok| is set to false. 369 func (td TupleDesc) GetDate(i int, tup Tuple) (v time.Time, ok bool) { 370 td.expectEncoding(i, DateEnc) 371 b := td.GetField(i, tup) 372 if b != nil { 373 v, ok = readDate(b), true 374 } 375 return 376 } 377 378 // GetSqlTime reads a string encoded Time value from the ith field of the Tuple. 379 // If the ith field is NULL, |ok| is set to false. 380 func (td TupleDesc) GetSqlTime(i int, tup Tuple) (v int64, ok bool) { 381 td.expectEncoding(i, TimeEnc) 382 b := td.GetField(i, tup) 383 if b != nil { 384 v, ok = readInt64(b), true 385 } 386 return 387 } 388 389 // GetDatetime reads a time.Time from the ith field of the Tuple. 390 // If the ith field is NULL, |ok| is set to false. 391 func (td TupleDesc) GetDatetime(i int, tup Tuple) (v time.Time, ok bool) { 392 td.expectEncoding(i, DatetimeEnc) 393 b := td.GetField(i, tup) 394 if b != nil { 395 v, ok = readDatetime(b), true 396 } 397 return 398 } 399 400 // GetEnum reads a uin16 from the ith field of the Tuple. 401 // If the ith field is NULL, |ok| is set to false. 402 func (td TupleDesc) GetEnum(i int, tup Tuple) (v uint16, ok bool) { 403 td.expectEncoding(i, EnumEnc) 404 b := td.GetField(i, tup) 405 if b != nil { 406 v, ok = readEnum(b), true 407 } 408 return 409 } 410 411 // GetSet reads a uint64 from the ith field of the Tuple. 412 // If the ith field is NULL, |ok| is set to false. 413 func (td TupleDesc) GetSet(i int, tup Tuple) (v uint64, ok bool) { 414 td.expectEncoding(i, SetEnc) 415 b := td.GetField(i, tup) 416 if b != nil { 417 v, ok = readSet(b), true 418 } 419 return 420 } 421 422 // GetString reads a string from the ith field of the Tuple. 423 // If the ith field is NULL, |ok| is set to false. 424 func (td TupleDesc) GetString(i int, tup Tuple) (v string, ok bool) { 425 td.expectEncoding(i, StringEnc) 426 b := td.GetField(i, tup) 427 if b != nil { 428 v = readString(b) 429 ok = true 430 } 431 return 432 } 433 434 // GetBytes reads a []byte from the ith field of the Tuple. 435 // If the ith field is NULL, |ok| is set to false. 436 func (td TupleDesc) GetBytes(i int, tup Tuple) (v []byte, ok bool) { 437 td.expectEncoding(i, ByteStringEnc) 438 b := td.GetField(i, tup) 439 if b != nil { 440 v = readByteString(b) 441 ok = true 442 } 443 return 444 } 445 446 // GetJSON reads a []byte from the ith field of the Tuple. 447 // If the ith field is NULL, |ok| is set to false. 448 func (td TupleDesc) GetJSON(i int, tup Tuple) (v []byte, ok bool) { 449 td.expectEncoding(i, JSONEnc) 450 b := td.GetField(i, tup) 451 if b != nil { 452 v = readByteString(b) 453 ok = true 454 } 455 return 456 } 457 458 // GetGeometry reads a []byte from the ith field of the Tuple. 459 // If the ith field is NULL, |ok| is set to false. 460 func (td TupleDesc) GetGeometry(i int, tup Tuple) (v []byte, ok bool) { 461 // TODO: we are support both Geometry and GeometryAddr for now, so we can't expect just one 462 // td.expectEncoding(i, GeometryEnc) 463 b := td.GetField(i, tup) 464 if b != nil { 465 v = readByteString(b) 466 ok = true 467 } 468 return 469 } 470 471 func (td TupleDesc) GetGeometryAddr(i int, tup Tuple) (hash.Hash, bool) { 472 // TODO: we are support both Geometry and GeometryAddr for now, so we can't expect just one 473 // td.expectEncoding(i, GeomAddrEnc) 474 return td.getAddr(i, tup) 475 } 476 477 func (td TupleDesc) GetHash128(i int, tup Tuple) (v []byte, ok bool) { 478 td.expectEncoding(i, Hash128Enc) 479 b := td.GetField(i, tup) 480 if b != nil { 481 v = b 482 ok = true 483 } 484 return 485 } 486 487 // GetExtended reads a byte slice from the ith field of the Tuple. 488 func (td TupleDesc) GetExtended(i int, tup Tuple) ([]byte, bool) { 489 td.expectEncoding(i, ExtendedEnc) 490 v := td.GetField(i, tup) 491 return v, v != nil 492 } 493 494 // GetExtendedAddr reads a hash from the ith field of the Tuple. 495 func (td TupleDesc) GetExtendedAddr(i int, tup Tuple) (hash.Hash, bool) { 496 td.expectEncoding(i, ExtendedAddrEnc) 497 return td.getAddr(i, tup) 498 } 499 500 func (td TupleDesc) GetJSONAddr(i int, tup Tuple) (hash.Hash, bool) { 501 td.expectEncoding(i, JSONAddrEnc) 502 return td.getAddr(i, tup) 503 } 504 505 func (td TupleDesc) GetStringAddr(i int, tup Tuple) (hash.Hash, bool) { 506 td.expectEncoding(i, StringAddrEnc) 507 return td.getAddr(i, tup) 508 } 509 510 func (td TupleDesc) GetBytesAddr(i int, tup Tuple) (hash.Hash, bool) { 511 td.expectEncoding(i, BytesAddrEnc) 512 return td.getAddr(i, tup) 513 } 514 515 func (td TupleDesc) GetCommitAddr(i int, tup Tuple) (v hash.Hash, ok bool) { 516 td.expectEncoding(i, CommitAddrEnc) 517 return td.getAddr(i, tup) 518 } 519 520 func (td TupleDesc) getAddr(i int, tup Tuple) (hash.Hash, bool) { 521 b := td.GetField(i, tup) 522 if b == nil { 523 return hash.Hash{}, false 524 } 525 return hash.New(b), true 526 } 527 528 func (td TupleDesc) expectEncoding(i int, encodings ...Encoding) { 529 for _, enc := range encodings { 530 if enc == td.Types[i].Enc { 531 return 532 } 533 } 534 panic("incorrect value encoding") 535 } 536 537 func (td TupleDesc) GetCell(i int, tup Tuple) (v Cell, ok bool) { 538 td.expectEncoding(i, CellEnc) 539 b := td.GetField(i, tup) 540 if b != nil { 541 v = readCell(b) 542 ok = true 543 } 544 return 545 } 546 547 // Format prints a Tuple as a string. 548 func (td TupleDesc) Format(tup Tuple) string { 549 if tup == nil || tup.Count() == 0 { 550 return "( )" 551 } 552 553 var sb strings.Builder 554 sb.WriteString("( ") 555 556 seenOne := false 557 for i := range td.Types { 558 if seenOne { 559 sb.WriteString(", ") 560 } 561 seenOne = true 562 sb.WriteString(td.FormatValue(i, tup.GetField(i))) 563 } 564 sb.WriteString(" )") 565 return sb.String() 566 } 567 568 func (td TupleDesc) FormatValue(i int, value []byte) string { 569 if value == nil { 570 return "NULL" 571 } 572 return td.formatValue(td.Types[i].Enc, i, value) 573 } 574 575 func (td TupleDesc) formatValue(enc Encoding, i int, value []byte) string { 576 switch enc { 577 case Int8Enc: 578 v := readInt8(value) 579 return strconv.Itoa(int(v)) 580 case Uint8Enc: 581 v := readUint8(value) 582 return strconv.Itoa(int(v)) 583 case Int16Enc: 584 v := readInt16(value) 585 return strconv.Itoa(int(v)) 586 case Uint16Enc: 587 v := ReadUint16(value) 588 return strconv.Itoa(int(v)) 589 case Int32Enc: 590 v := readInt32(value) 591 return strconv.Itoa(int(v)) 592 case Uint32Enc: 593 v := readUint32(value) 594 return strconv.Itoa(int(v)) 595 case Int64Enc: 596 v := readInt64(value) 597 return strconv.FormatInt(v, 10) 598 case Uint64Enc: 599 v := readUint64(value) 600 return strconv.FormatUint(v, 10) 601 case Float32Enc: 602 v := readFloat32(value) 603 return fmt.Sprintf("%f", v) 604 case Float64Enc: 605 v := readFloat64(value) 606 return fmt.Sprintf("%f", v) 607 case Bit64Enc: 608 v := readUint64(value) 609 return strconv.FormatUint(v, 10) 610 case DecimalEnc: 611 v := readDecimal(value) 612 return v.String() 613 case YearEnc: 614 v := readYear(value) 615 return strconv.Itoa(int(v)) 616 case DateEnc: 617 v := readDate(value) 618 return v.Format("2006-01-02") 619 case TimeEnc: 620 v := readTime(value) 621 return strconv.FormatInt(v, 10) 622 case DatetimeEnc: 623 v := readDatetime(value) 624 return v.Format(time.RFC3339) 625 case EnumEnc: 626 v := readEnum(value) 627 return strconv.Itoa(int(v)) 628 case SetEnc: 629 v := readSet(value) 630 return strconv.FormatUint(v, 10) 631 case StringEnc: 632 return readString(value) 633 case ByteStringEnc: 634 return hex.EncodeToString(value) 635 case Hash128Enc: 636 return hex.EncodeToString(value) 637 case BytesAddrEnc: 638 return hex.EncodeToString(value) 639 case CommitAddrEnc: 640 return hex.EncodeToString(value) 641 case CellEnc: 642 return hex.EncodeToString(value) 643 case ExtendedEnc: 644 handler := td.Handlers[i] 645 v := readExtended(handler, value) 646 str, err := handler.FormatValue(v) 647 if err != nil { 648 panic(err) 649 } 650 return str 651 case ExtendedAddrEnc: 652 return hex.EncodeToString(value) 653 default: 654 return string(value) 655 } 656 } 657 658 // Equals returns true if |td| and |other| have equal type slices. 659 func (td TupleDesc) Equals(other TupleDesc) bool { 660 if len(td.Types) != len(other.Types) { 661 return false 662 } 663 for i, typ := range td.Types { 664 if typ != other.Types[i] { 665 return false 666 } 667 } 668 return true 669 }