github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/val/codec.go (about) 1 // Copyright 2021 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package val 16 17 import ( 18 "bytes" 19 "encoding/binary" 20 "math" 21 "math/big" 22 "math/bits" 23 "time" 24 "unsafe" 25 26 "github.com/dolthub/dolt/go/gen/fb/serial" 27 "github.com/dolthub/dolt/go/store/hash" 28 29 "github.com/shopspring/decimal" 30 ) 31 32 type Type struct { 33 Enc Encoding 34 Nullable bool 35 } 36 37 const ( 38 strTerm = byte(0) 39 ) 40 41 type ByteSize uint16 42 43 const ( 44 int8Size ByteSize = 1 45 uint8Size ByteSize = 1 46 int16Size ByteSize = 2 47 uint16Size ByteSize = 2 48 int32Size ByteSize = 4 49 uint32Size ByteSize = 4 50 int64Size ByteSize = 8 51 uint64Size ByteSize = 8 52 float32Size ByteSize = 4 53 float64Size ByteSize = 8 54 bit64Size ByteSize = 8 55 hash128Size ByteSize = 16 56 yearSize ByteSize = 1 57 dateSize ByteSize = 4 58 timeSize ByteSize = 8 59 datetimeSize ByteSize = 8 60 enumSize ByteSize = 2 61 setSize ByteSize = 8 62 bytesAddrEnc ByteSize = hash.ByteLen 63 commitAddrEnc ByteSize = hash.ByteLen 64 stringAddrEnc ByteSize = hash.ByteLen 65 jsonAddrEnc ByteSize = hash.ByteLen 66 cellSize ByteSize = 17 67 geomAddrEnc ByteSize = hash.ByteLen 68 extendedAddrSize ByteSize = hash.ByteLen 69 ) 70 71 type Encoding byte 72 73 // Fixed Width Encodings 74 const ( 75 NullEnc = Encoding(serial.EncodingNull) 76 Int8Enc = Encoding(serial.EncodingInt8) 77 Uint8Enc = Encoding(serial.EncodingUint8) 78 Int16Enc = Encoding(serial.EncodingInt16) 79 Uint16Enc = Encoding(serial.EncodingUint16) 80 Int32Enc = Encoding(serial.EncodingInt32) 81 Uint32Enc = Encoding(serial.EncodingUint32) 82 Int64Enc = Encoding(serial.EncodingInt64) 83 Uint64Enc = Encoding(serial.EncodingUint64) 84 Float32Enc = Encoding(serial.EncodingFloat32) 85 Float64Enc = Encoding(serial.EncodingFloat64) 86 Bit64Enc = Encoding(serial.EncodingBit64) 87 Hash128Enc = Encoding(serial.EncodingHash128) 88 YearEnc = Encoding(serial.EncodingYear) 89 DateEnc = Encoding(serial.EncodingDate) 90 TimeEnc = Encoding(serial.EncodingTime) 91 DatetimeEnc = Encoding(serial.EncodingDatetime) 92 EnumEnc = Encoding(serial.EncodingEnum) 93 SetEnc = Encoding(serial.EncodingSet) 94 BytesAddrEnc = Encoding(serial.EncodingBytesAddr) 95 CommitAddrEnc = Encoding(serial.EncodingCommitAddr) 96 StringAddrEnc = Encoding(serial.EncodingStringAddr) 97 JSONAddrEnc = Encoding(serial.EncodingJSONAddr) 98 CellEnc = Encoding(serial.EncodingCell) 99 GeomAddrEnc = Encoding(serial.EncodingGeomAddr) 100 ExtendedAddrEnc = Encoding(serial.EncodingExtendedAddr) 101 102 sentinel Encoding = 127 103 ) 104 105 func IsAddrEncoding(enc Encoding) bool { 106 switch enc { 107 case BytesAddrEnc, 108 CommitAddrEnc, 109 StringAddrEnc, 110 JSONAddrEnc, 111 GeomAddrEnc, 112 ExtendedAddrEnc: 113 return true 114 default: 115 return false 116 } 117 } 118 119 // Variable Width Encodings 120 const ( 121 StringEnc = Encoding(serial.EncodingString) 122 ByteStringEnc = Encoding(serial.EncodingBytes) 123 DecimalEnc = Encoding(serial.EncodingDecimal) 124 JSONEnc = Encoding(serial.EncodingJSON) 125 GeometryEnc = Encoding(serial.EncodingGeometry) 126 ExtendedEnc = Encoding(serial.EncodingExtended) 127 ) 128 129 func sizeFromType(t Type) (ByteSize, bool) { 130 switch t.Enc { 131 case Int8Enc: 132 return int8Size, true 133 case Uint8Enc: 134 return uint8Size, true 135 case Int16Enc: 136 return int16Size, true 137 case Uint16Enc: 138 return uint16Size, true 139 case Int32Enc: 140 return int32Size, true 141 case Uint32Enc: 142 return uint32Size, true 143 case Int64Enc: 144 return int64Size, true 145 case Uint64Enc: 146 return uint64Size, true 147 case Float32Enc: 148 return float32Size, true 149 case Float64Enc: 150 return float64Size, true 151 case Bit64Enc: 152 return bit64Size, true 153 case Hash128Enc: 154 return hash128Size, true 155 case YearEnc: 156 return yearSize, true 157 case DateEnc: 158 return dateSize, true 159 case TimeEnc: 160 return timeSize, true 161 case DatetimeEnc: 162 return datetimeSize, true 163 case EnumEnc: 164 return enumSize, true 165 case SetEnc: 166 return setSize, true 167 case BytesAddrEnc: 168 return bytesAddrEnc, true 169 case CommitAddrEnc: 170 return commitAddrEnc, true 171 case StringAddrEnc: 172 return stringAddrEnc, true 173 case JSONAddrEnc: 174 return jsonAddrEnc, true 175 case GeomAddrEnc: 176 return geomAddrEnc, true 177 case ExtendedAddrEnc: 178 return extendedAddrSize, true 179 default: 180 return 0, false 181 } 182 } 183 184 func readBool(val []byte) bool { 185 expectSize(val, int8Size) 186 return val[0] == 1 187 } 188 189 func writeBool(buf []byte, val bool) { 190 expectSize(buf, 1) 191 if val { 192 buf[0] = byte(1) 193 } else { 194 buf[0] = byte(0) 195 } 196 } 197 198 // false is less that true 199 func compareBool(l, r bool) int { 200 if l == r { 201 return 0 202 } 203 if !l && r { 204 return -1 205 } 206 return 1 207 } 208 209 func readInt8(val []byte) int8 { 210 expectSize(val, int8Size) 211 return int8(val[0]) 212 } 213 214 func writeInt8(buf []byte, val int8) { 215 expectSize(buf, int8Size) 216 buf[0] = byte(val) 217 } 218 219 func compareInt8(l, r int8) int { 220 if l == r { 221 return 0 222 } else if l < r { 223 return -1 224 } else { 225 return 1 226 } 227 } 228 229 func readUint8(val []byte) uint8 { 230 expectSize(val, uint8Size) 231 return val[0] 232 } 233 234 func writeUint8(buf []byte, val uint8) { 235 expectSize(buf, uint8Size) 236 buf[0] = byte(val) 237 } 238 239 func compareUint8(l, r uint8) int { 240 if l == r { 241 return 0 242 } else if l < r { 243 return -1 244 } else { 245 return 1 246 } 247 } 248 249 func readInt16(val []byte) int16 { 250 expectSize(val, int16Size) 251 return int16(binary.LittleEndian.Uint16(val)) 252 } 253 254 func writeInt16(buf []byte, val int16) { 255 expectSize(buf, int16Size) 256 binary.LittleEndian.PutUint16(buf, uint16(val)) 257 } 258 259 func compareInt16(l, r int16) int { 260 if l == r { 261 return 0 262 } else if l < r { 263 return -1 264 } else { 265 return 1 266 } 267 } 268 269 func ReadUint16(val []byte) uint16 { 270 expectSize(val, uint16Size) 271 return binary.LittleEndian.Uint16(val) 272 } 273 274 func WriteUint16(buf []byte, val uint16) { 275 expectSize(buf, uint16Size) 276 binary.LittleEndian.PutUint16(buf, val) 277 } 278 279 func compareUint16(l, r uint16) int { 280 if l == r { 281 return 0 282 } else if l < r { 283 return -1 284 } else { 285 return 1 286 } 287 } 288 289 func readInt32(val []byte) int32 { 290 expectSize(val, int32Size) 291 return int32(binary.LittleEndian.Uint32(val)) 292 } 293 294 func writeInt32(buf []byte, val int32) { 295 expectSize(buf, int32Size) 296 binary.LittleEndian.PutUint32(buf, uint32(val)) 297 } 298 299 func compareInt32(l, r int32) int { 300 if l == r { 301 return 0 302 } else if l < r { 303 return -1 304 } else { 305 return 1 306 } 307 } 308 309 func readUint32(val []byte) uint32 { 310 expectSize(val, uint32Size) 311 return binary.LittleEndian.Uint32(val) 312 } 313 314 func writeUint32(buf []byte, val uint32) { 315 expectSize(buf, uint32Size) 316 binary.LittleEndian.PutUint32(buf, val) 317 } 318 319 func compareUint32(l, r uint32) int { 320 if l == r { 321 return 0 322 } else if l < r { 323 return -1 324 } else { 325 return 1 326 } 327 } 328 329 func readInt64(val []byte) int64 { 330 expectSize(val, int64Size) 331 return int64(binary.LittleEndian.Uint64(val)) 332 } 333 334 func writeInt64(buf []byte, val int64) { 335 expectSize(buf, int64Size) 336 binary.LittleEndian.PutUint64(buf, uint64(val)) 337 } 338 339 func compareInt64(l, r int64) int { 340 if l == r { 341 return 0 342 } else if l < r { 343 return -1 344 } else { 345 return 1 346 } 347 } 348 349 func readUint64(val []byte) uint64 { 350 expectSize(val, uint64Size) 351 return binary.LittleEndian.Uint64(val) 352 } 353 354 func writeUint64(buf []byte, val uint64) { 355 expectSize(buf, uint64Size) 356 binary.LittleEndian.PutUint64(buf, val) 357 } 358 359 func compareUint64(l, r uint64) int { 360 if l == r { 361 return 0 362 } else if l < r { 363 return -1 364 } else { 365 return 1 366 } 367 } 368 369 func readFloat32(val []byte) float32 { 370 expectSize(val, float32Size) 371 return math.Float32frombits(readUint32(val)) 372 } 373 374 func writeFloat32(buf []byte, val float32) { 375 expectSize(buf, float32Size) 376 binary.LittleEndian.PutUint32(buf, math.Float32bits(val)) 377 } 378 379 func compareFloat32(l, r float32) int { 380 if l == r { 381 return 0 382 } else if l < r { 383 return -1 384 } else { 385 return 1 386 } 387 } 388 389 func readFloat64(val []byte) float64 { 390 expectSize(val, float64Size) 391 return math.Float64frombits(readUint64(val)) 392 } 393 394 func writeFloat64(buf []byte, val float64) { 395 expectSize(buf, float64Size) 396 binary.LittleEndian.PutUint64(buf, math.Float64bits(val)) 397 } 398 399 func compareFloat64(l, r float64) int { 400 if l == r { 401 return 0 402 } else if l < r { 403 return -1 404 } else { 405 return 1 406 } 407 } 408 409 func readBit64(val []byte) uint64 { 410 return readUint64(val) 411 } 412 413 func writeBit64(buf []byte, val uint64) { 414 writeUint64(buf, val) 415 } 416 417 func compareBit64(l, r uint64) int { 418 return compareUint64(l, r) 419 } 420 421 func readDecimal(val []byte) decimal.Decimal { 422 e := readInt32(val[:int32Size]) 423 s := readInt8(val[int32Size : int32Size+int8Size]) 424 b := big.NewInt(0).SetBytes(val[int32Size+int8Size:]) 425 if s < 0 { 426 b = b.Neg(b) 427 } 428 return decimal.NewFromBigInt(b, e) 429 } 430 431 func writeDecimal(buf []byte, val decimal.Decimal) { 432 expectSize(buf, sizeOfDecimal(val)) 433 writeInt32(buf[:int32Size], val.Exponent()) 434 b := val.Coefficient() 435 writeInt8(buf[int32Size:int32Size+int8Size], int8(b.Sign())) 436 b.FillBytes(buf[int32Size+int8Size:]) 437 } 438 439 func sizeOfDecimal(val decimal.Decimal) ByteSize { 440 bsz := len(val.Coefficient().Bits()) * (bits.UintSize / 8) 441 return int32Size + int8Size + ByteSize(bsz) 442 } 443 444 func compareDecimal(l, r decimal.Decimal) int { 445 return l.Cmp(r) 446 } 447 448 const minYear int16 = 1901 449 const maxYear int16 = 2155 450 const zeroToken uint8 = 255 451 452 func readYear(val []byte) int16 { 453 expectSize(val, yearSize) 454 v := readUint8(val) 455 if v == zeroToken { 456 return int16(0) 457 } 458 offset := int16(v) 459 return offset + minYear 460 } 461 462 // writeYear encodes the year |val| as an offset from the minimum year 1901. 463 // |val| must be within 1901 - 2155. If val == 0, 255 is written as a special 464 // token value. 465 func writeYear(buf []byte, val int16) { 466 expectSize(buf, yearSize) 467 if val == 0 { 468 writeUint8(buf, zeroToken) 469 return 470 } 471 if val < minYear || val > maxYear { 472 panic("year is outside of allowed range [1901, 2155]") 473 } 474 offset := uint8(val - minYear) 475 writeUint8(buf, offset) 476 } 477 478 func compareYear(l, r int16) int { 479 return compareInt16(l, r) 480 } 481 482 // adapted from: 483 // https://dev.mysql.com/doc/internals/en/date-and-time-data-type-representation.html 484 const ( 485 yearShift uint32 = 16 486 monthShift uint32 = 8 487 monthMask uint32 = 255 << monthShift 488 dayMask uint32 = 255 489 ) 490 491 func readDate(val []byte) (date time.Time) { 492 expectSize(val, dateSize) 493 t := readUint32(val) 494 y := t >> yearShift 495 m := (t & monthMask) >> monthShift 496 d := (t & dayMask) 497 return time.Date(int(y), time.Month(m), int(d), 0, 0, 0, 0, time.UTC) 498 } 499 500 func writeDate(buf []byte, val time.Time) { 501 expectSize(buf, dateSize) 502 t := uint32(val.Year() << yearShift) 503 t += uint32(val.Month() << monthShift) 504 t += uint32(val.Day()) 505 writeUint32(buf, t) 506 } 507 508 func compareDate(l, r time.Time) int { 509 return compareDatetime(l, r) 510 } 511 512 func readTime(val []byte) int64 { 513 expectSize(val, timeSize) 514 return readInt64(val) 515 } 516 517 func writeTime(buf []byte, val int64) { 518 expectSize(buf, timeSize) 519 writeInt64(buf, val) 520 } 521 522 func compareTime(l, r int64) int { 523 return compareInt64(l, r) 524 } 525 526 func readDatetime(buf []byte) (t time.Time) { 527 expectSize(buf, datetimeSize) 528 t = time.UnixMicro(readInt64(buf)).UTC() 529 return 530 } 531 532 func writeDatetime(buf []byte, val time.Time) { 533 expectSize(buf, datetimeSize) 534 writeInt64(buf, val.UnixMicro()) 535 } 536 537 func compareDatetime(l, r time.Time) int { 538 if l.Equal(r) { 539 return 0 540 } else if l.Before(r) { 541 return -1 542 } else { 543 return 1 544 } 545 } 546 547 func readEnum(val []byte) uint16 { 548 return ReadUint16(val) 549 } 550 551 func writeEnum(buf []byte, val uint16) { 552 WriteUint16(buf, val) 553 } 554 555 func compareEnum(l, r uint16) int { 556 return compareUint16(l, r) 557 } 558 559 func readSet(val []byte) uint64 { 560 return readUint64(val) 561 } 562 563 func writeSet(buf []byte, val uint64) { 564 writeUint64(buf, val) 565 } 566 567 func compareSet(l, r uint64) int { 568 return compareUint64(l, r) 569 } 570 571 func readString(val []byte) string { 572 return stringFromBytes(readByteString(val)) 573 } 574 575 func writeString(buf []byte, val string) { 576 writeByteString(buf, []byte(val)) 577 } 578 579 func compareString(l, r string) int { 580 return bytes.Compare([]byte(l), []byte(r)) 581 } 582 583 func readByteString(val []byte) []byte { 584 length := len(val) - 1 585 return val[:length] 586 } 587 588 func writeByteString(buf, val []byte) { 589 expectSize(buf, ByteSize(len(val))+1) 590 copy(buf, val) 591 buf[len(val)] = strTerm 592 } 593 594 func compareByteString(l, r []byte) int { 595 return bytes.Compare(l, r) 596 } 597 598 func readExtended(handler TupleTypeHandler, val []byte) any { 599 v, err := handler.DeserializeValue(val) 600 if err != nil { 601 panic(err) 602 } 603 return v 604 } 605 606 func writeExtended(handler TupleTypeHandler, buf []byte, val []byte) { 607 expectSize(buf, ByteSize(len(val))) 608 copy(buf, val) 609 } 610 611 func readHash128(val []byte) []byte { 612 expectSize(val, hash128Size) 613 return val 614 } 615 616 func writeHash128(buf, val []byte) { 617 expectSize(buf, hash128Size) 618 copy(buf, val) 619 } 620 621 func compareHash128(l, r []byte) int { 622 return bytes.Compare(l, r) 623 } 624 625 func compareAddr(l, r hash.Hash) int { 626 return l.Compare(r) 627 } 628 629 func writeRaw(buf, val []byte) { 630 expectSize(buf, ByteSize(len(val))) 631 copy(buf, val) 632 } 633 634 func writeAddr(buf []byte, v []byte) { 635 expectSize(buf, hash.ByteLen) 636 copy(buf, v) 637 } 638 639 func readAddr(val []byte) hash.Hash { 640 expectSize(val, hash.ByteLen) 641 return hash.New(val) 642 } 643 644 func expectSize(buf []byte, sz ByteSize) { 645 if ByteSize(len(buf)) != sz { 646 panic("byte slice is not of expected size") 647 } 648 } 649 650 // stringFromBytes converts a []byte to string without a heap allocation. 651 func stringFromBytes(b []byte) string { 652 return *(*string)(unsafe.Pointer(&b)) 653 } 654 655 // Cell is a representation of a subregion for Spatial Indexes 656 // The first byte encodes the level, which is the size of the region 657 // The highest level (the square covering all values floats) is 64 658 // The lowest level (a point) is 0 659 // The next 16 bytes is the z-value encoding of the minimum point of that subregion 660 type Cell [cellSize]byte 661 662 func compareCell(l, r Cell) int { 663 return bytes.Compare(l[:], r[:]) 664 } 665 666 func readCell(val []byte) (res Cell) { 667 expectSize(val, cellSize) 668 copy(res[:], val[:]) 669 return 670 } 671 672 func writeCell(buf []byte, v Cell) { 673 expectSize(buf, cellSize) 674 copy(buf[:], v[:]) 675 }