github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/val/codec.go (about)

     1  // Copyright 2021 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package val
    16  
    17  import (
    18  	"bytes"
    19  	"encoding/binary"
    20  	"math"
    21  	"math/big"
    22  	"math/bits"
    23  	"time"
    24  	"unsafe"
    25  
    26  	"github.com/dolthub/dolt/go/gen/fb/serial"
    27  	"github.com/dolthub/dolt/go/store/hash"
    28  
    29  	"github.com/shopspring/decimal"
    30  )
    31  
    32  type Type struct {
    33  	Enc      Encoding
    34  	Nullable bool
    35  }
    36  
    37  const (
    38  	strTerm = byte(0)
    39  )
    40  
    41  type ByteSize uint16
    42  
    43  const (
    44  	int8Size         ByteSize = 1
    45  	uint8Size        ByteSize = 1
    46  	int16Size        ByteSize = 2
    47  	uint16Size       ByteSize = 2
    48  	int32Size        ByteSize = 4
    49  	uint32Size       ByteSize = 4
    50  	int64Size        ByteSize = 8
    51  	uint64Size       ByteSize = 8
    52  	float32Size      ByteSize = 4
    53  	float64Size      ByteSize = 8
    54  	bit64Size        ByteSize = 8
    55  	hash128Size      ByteSize = 16
    56  	yearSize         ByteSize = 1
    57  	dateSize         ByteSize = 4
    58  	timeSize         ByteSize = 8
    59  	datetimeSize     ByteSize = 8
    60  	enumSize         ByteSize = 2
    61  	setSize          ByteSize = 8
    62  	bytesAddrEnc     ByteSize = hash.ByteLen
    63  	commitAddrEnc    ByteSize = hash.ByteLen
    64  	stringAddrEnc    ByteSize = hash.ByteLen
    65  	jsonAddrEnc      ByteSize = hash.ByteLen
    66  	cellSize         ByteSize = 17
    67  	geomAddrEnc      ByteSize = hash.ByteLen
    68  	extendedAddrSize ByteSize = hash.ByteLen
    69  )
    70  
    71  type Encoding byte
    72  
    73  // Fixed Width Encodings
    74  const (
    75  	NullEnc         = Encoding(serial.EncodingNull)
    76  	Int8Enc         = Encoding(serial.EncodingInt8)
    77  	Uint8Enc        = Encoding(serial.EncodingUint8)
    78  	Int16Enc        = Encoding(serial.EncodingInt16)
    79  	Uint16Enc       = Encoding(serial.EncodingUint16)
    80  	Int32Enc        = Encoding(serial.EncodingInt32)
    81  	Uint32Enc       = Encoding(serial.EncodingUint32)
    82  	Int64Enc        = Encoding(serial.EncodingInt64)
    83  	Uint64Enc       = Encoding(serial.EncodingUint64)
    84  	Float32Enc      = Encoding(serial.EncodingFloat32)
    85  	Float64Enc      = Encoding(serial.EncodingFloat64)
    86  	Bit64Enc        = Encoding(serial.EncodingBit64)
    87  	Hash128Enc      = Encoding(serial.EncodingHash128)
    88  	YearEnc         = Encoding(serial.EncodingYear)
    89  	DateEnc         = Encoding(serial.EncodingDate)
    90  	TimeEnc         = Encoding(serial.EncodingTime)
    91  	DatetimeEnc     = Encoding(serial.EncodingDatetime)
    92  	EnumEnc         = Encoding(serial.EncodingEnum)
    93  	SetEnc          = Encoding(serial.EncodingSet)
    94  	BytesAddrEnc    = Encoding(serial.EncodingBytesAddr)
    95  	CommitAddrEnc   = Encoding(serial.EncodingCommitAddr)
    96  	StringAddrEnc   = Encoding(serial.EncodingStringAddr)
    97  	JSONAddrEnc     = Encoding(serial.EncodingJSONAddr)
    98  	CellEnc         = Encoding(serial.EncodingCell)
    99  	GeomAddrEnc     = Encoding(serial.EncodingGeomAddr)
   100  	ExtendedAddrEnc = Encoding(serial.EncodingExtendedAddr)
   101  
   102  	sentinel Encoding = 127
   103  )
   104  
   105  func IsAddrEncoding(enc Encoding) bool {
   106  	switch enc {
   107  	case BytesAddrEnc,
   108  		CommitAddrEnc,
   109  		StringAddrEnc,
   110  		JSONAddrEnc,
   111  		GeomAddrEnc,
   112  		ExtendedAddrEnc:
   113  		return true
   114  	default:
   115  		return false
   116  	}
   117  }
   118  
   119  // Variable Width Encodings
   120  const (
   121  	StringEnc     = Encoding(serial.EncodingString)
   122  	ByteStringEnc = Encoding(serial.EncodingBytes)
   123  	DecimalEnc    = Encoding(serial.EncodingDecimal)
   124  	JSONEnc       = Encoding(serial.EncodingJSON)
   125  	GeometryEnc   = Encoding(serial.EncodingGeometry)
   126  	ExtendedEnc   = Encoding(serial.EncodingExtended)
   127  )
   128  
   129  func sizeFromType(t Type) (ByteSize, bool) {
   130  	switch t.Enc {
   131  	case Int8Enc:
   132  		return int8Size, true
   133  	case Uint8Enc:
   134  		return uint8Size, true
   135  	case Int16Enc:
   136  		return int16Size, true
   137  	case Uint16Enc:
   138  		return uint16Size, true
   139  	case Int32Enc:
   140  		return int32Size, true
   141  	case Uint32Enc:
   142  		return uint32Size, true
   143  	case Int64Enc:
   144  		return int64Size, true
   145  	case Uint64Enc:
   146  		return uint64Size, true
   147  	case Float32Enc:
   148  		return float32Size, true
   149  	case Float64Enc:
   150  		return float64Size, true
   151  	case Bit64Enc:
   152  		return bit64Size, true
   153  	case Hash128Enc:
   154  		return hash128Size, true
   155  	case YearEnc:
   156  		return yearSize, true
   157  	case DateEnc:
   158  		return dateSize, true
   159  	case TimeEnc:
   160  		return timeSize, true
   161  	case DatetimeEnc:
   162  		return datetimeSize, true
   163  	case EnumEnc:
   164  		return enumSize, true
   165  	case SetEnc:
   166  		return setSize, true
   167  	case BytesAddrEnc:
   168  		return bytesAddrEnc, true
   169  	case CommitAddrEnc:
   170  		return commitAddrEnc, true
   171  	case StringAddrEnc:
   172  		return stringAddrEnc, true
   173  	case JSONAddrEnc:
   174  		return jsonAddrEnc, true
   175  	case GeomAddrEnc:
   176  		return geomAddrEnc, true
   177  	case ExtendedAddrEnc:
   178  		return extendedAddrSize, true
   179  	default:
   180  		return 0, false
   181  	}
   182  }
   183  
   184  func readBool(val []byte) bool {
   185  	expectSize(val, int8Size)
   186  	return val[0] == 1
   187  }
   188  
   189  func writeBool(buf []byte, val bool) {
   190  	expectSize(buf, 1)
   191  	if val {
   192  		buf[0] = byte(1)
   193  	} else {
   194  		buf[0] = byte(0)
   195  	}
   196  }
   197  
   198  // false is less that true
   199  func compareBool(l, r bool) int {
   200  	if l == r {
   201  		return 0
   202  	}
   203  	if !l && r {
   204  		return -1
   205  	}
   206  	return 1
   207  }
   208  
   209  func readInt8(val []byte) int8 {
   210  	expectSize(val, int8Size)
   211  	return int8(val[0])
   212  }
   213  
   214  func writeInt8(buf []byte, val int8) {
   215  	expectSize(buf, int8Size)
   216  	buf[0] = byte(val)
   217  }
   218  
   219  func compareInt8(l, r int8) int {
   220  	if l == r {
   221  		return 0
   222  	} else if l < r {
   223  		return -1
   224  	} else {
   225  		return 1
   226  	}
   227  }
   228  
   229  func readUint8(val []byte) uint8 {
   230  	expectSize(val, uint8Size)
   231  	return val[0]
   232  }
   233  
   234  func writeUint8(buf []byte, val uint8) {
   235  	expectSize(buf, uint8Size)
   236  	buf[0] = byte(val)
   237  }
   238  
   239  func compareUint8(l, r uint8) int {
   240  	if l == r {
   241  		return 0
   242  	} else if l < r {
   243  		return -1
   244  	} else {
   245  		return 1
   246  	}
   247  }
   248  
   249  func readInt16(val []byte) int16 {
   250  	expectSize(val, int16Size)
   251  	return int16(binary.LittleEndian.Uint16(val))
   252  }
   253  
   254  func writeInt16(buf []byte, val int16) {
   255  	expectSize(buf, int16Size)
   256  	binary.LittleEndian.PutUint16(buf, uint16(val))
   257  }
   258  
   259  func compareInt16(l, r int16) int {
   260  	if l == r {
   261  		return 0
   262  	} else if l < r {
   263  		return -1
   264  	} else {
   265  		return 1
   266  	}
   267  }
   268  
   269  func ReadUint16(val []byte) uint16 {
   270  	expectSize(val, uint16Size)
   271  	return binary.LittleEndian.Uint16(val)
   272  }
   273  
   274  func WriteUint16(buf []byte, val uint16) {
   275  	expectSize(buf, uint16Size)
   276  	binary.LittleEndian.PutUint16(buf, val)
   277  }
   278  
   279  func compareUint16(l, r uint16) int {
   280  	if l == r {
   281  		return 0
   282  	} else if l < r {
   283  		return -1
   284  	} else {
   285  		return 1
   286  	}
   287  }
   288  
   289  func readInt32(val []byte) int32 {
   290  	expectSize(val, int32Size)
   291  	return int32(binary.LittleEndian.Uint32(val))
   292  }
   293  
   294  func writeInt32(buf []byte, val int32) {
   295  	expectSize(buf, int32Size)
   296  	binary.LittleEndian.PutUint32(buf, uint32(val))
   297  }
   298  
   299  func compareInt32(l, r int32) int {
   300  	if l == r {
   301  		return 0
   302  	} else if l < r {
   303  		return -1
   304  	} else {
   305  		return 1
   306  	}
   307  }
   308  
   309  func readUint32(val []byte) uint32 {
   310  	expectSize(val, uint32Size)
   311  	return binary.LittleEndian.Uint32(val)
   312  }
   313  
   314  func writeUint32(buf []byte, val uint32) {
   315  	expectSize(buf, uint32Size)
   316  	binary.LittleEndian.PutUint32(buf, val)
   317  }
   318  
   319  func compareUint32(l, r uint32) int {
   320  	if l == r {
   321  		return 0
   322  	} else if l < r {
   323  		return -1
   324  	} else {
   325  		return 1
   326  	}
   327  }
   328  
   329  func readInt64(val []byte) int64 {
   330  	expectSize(val, int64Size)
   331  	return int64(binary.LittleEndian.Uint64(val))
   332  }
   333  
   334  func writeInt64(buf []byte, val int64) {
   335  	expectSize(buf, int64Size)
   336  	binary.LittleEndian.PutUint64(buf, uint64(val))
   337  }
   338  
   339  func compareInt64(l, r int64) int {
   340  	if l == r {
   341  		return 0
   342  	} else if l < r {
   343  		return -1
   344  	} else {
   345  		return 1
   346  	}
   347  }
   348  
   349  func readUint64(val []byte) uint64 {
   350  	expectSize(val, uint64Size)
   351  	return binary.LittleEndian.Uint64(val)
   352  }
   353  
   354  func writeUint64(buf []byte, val uint64) {
   355  	expectSize(buf, uint64Size)
   356  	binary.LittleEndian.PutUint64(buf, val)
   357  }
   358  
   359  func compareUint64(l, r uint64) int {
   360  	if l == r {
   361  		return 0
   362  	} else if l < r {
   363  		return -1
   364  	} else {
   365  		return 1
   366  	}
   367  }
   368  
   369  func readFloat32(val []byte) float32 {
   370  	expectSize(val, float32Size)
   371  	return math.Float32frombits(readUint32(val))
   372  }
   373  
   374  func writeFloat32(buf []byte, val float32) {
   375  	expectSize(buf, float32Size)
   376  	binary.LittleEndian.PutUint32(buf, math.Float32bits(val))
   377  }
   378  
   379  func compareFloat32(l, r float32) int {
   380  	if l == r {
   381  		return 0
   382  	} else if l < r {
   383  		return -1
   384  	} else {
   385  		return 1
   386  	}
   387  }
   388  
   389  func readFloat64(val []byte) float64 {
   390  	expectSize(val, float64Size)
   391  	return math.Float64frombits(readUint64(val))
   392  }
   393  
   394  func writeFloat64(buf []byte, val float64) {
   395  	expectSize(buf, float64Size)
   396  	binary.LittleEndian.PutUint64(buf, math.Float64bits(val))
   397  }
   398  
   399  func compareFloat64(l, r float64) int {
   400  	if l == r {
   401  		return 0
   402  	} else if l < r {
   403  		return -1
   404  	} else {
   405  		return 1
   406  	}
   407  }
   408  
   409  func readBit64(val []byte) uint64 {
   410  	return readUint64(val)
   411  }
   412  
   413  func writeBit64(buf []byte, val uint64) {
   414  	writeUint64(buf, val)
   415  }
   416  
   417  func compareBit64(l, r uint64) int {
   418  	return compareUint64(l, r)
   419  }
   420  
   421  func readDecimal(val []byte) decimal.Decimal {
   422  	e := readInt32(val[:int32Size])
   423  	s := readInt8(val[int32Size : int32Size+int8Size])
   424  	b := big.NewInt(0).SetBytes(val[int32Size+int8Size:])
   425  	if s < 0 {
   426  		b = b.Neg(b)
   427  	}
   428  	return decimal.NewFromBigInt(b, e)
   429  }
   430  
   431  func writeDecimal(buf []byte, val decimal.Decimal) {
   432  	expectSize(buf, sizeOfDecimal(val))
   433  	writeInt32(buf[:int32Size], val.Exponent())
   434  	b := val.Coefficient()
   435  	writeInt8(buf[int32Size:int32Size+int8Size], int8(b.Sign()))
   436  	b.FillBytes(buf[int32Size+int8Size:])
   437  }
   438  
   439  func sizeOfDecimal(val decimal.Decimal) ByteSize {
   440  	bsz := len(val.Coefficient().Bits()) * (bits.UintSize / 8)
   441  	return int32Size + int8Size + ByteSize(bsz)
   442  }
   443  
   444  func compareDecimal(l, r decimal.Decimal) int {
   445  	return l.Cmp(r)
   446  }
   447  
   448  const minYear int16 = 1901
   449  const maxYear int16 = 2155
   450  const zeroToken uint8 = 255
   451  
   452  func readYear(val []byte) int16 {
   453  	expectSize(val, yearSize)
   454  	v := readUint8(val)
   455  	if v == zeroToken {
   456  		return int16(0)
   457  	}
   458  	offset := int16(v)
   459  	return offset + minYear
   460  }
   461  
   462  // writeYear encodes the year |val| as an offset from the minimum year 1901.
   463  // |val| must be within 1901 - 2155. If val == 0, 255 is written as a special
   464  // token value.
   465  func writeYear(buf []byte, val int16) {
   466  	expectSize(buf, yearSize)
   467  	if val == 0 {
   468  		writeUint8(buf, zeroToken)
   469  		return
   470  	}
   471  	if val < minYear || val > maxYear {
   472  		panic("year is outside of allowed range [1901, 2155]")
   473  	}
   474  	offset := uint8(val - minYear)
   475  	writeUint8(buf, offset)
   476  }
   477  
   478  func compareYear(l, r int16) int {
   479  	return compareInt16(l, r)
   480  }
   481  
   482  // adapted from:
   483  // https://dev.mysql.com/doc/internals/en/date-and-time-data-type-representation.html
   484  const (
   485  	yearShift  uint32 = 16
   486  	monthShift uint32 = 8
   487  	monthMask  uint32 = 255 << monthShift
   488  	dayMask    uint32 = 255
   489  )
   490  
   491  func readDate(val []byte) (date time.Time) {
   492  	expectSize(val, dateSize)
   493  	t := readUint32(val)
   494  	y := t >> yearShift
   495  	m := (t & monthMask) >> monthShift
   496  	d := (t & dayMask)
   497  	return time.Date(int(y), time.Month(m), int(d), 0, 0, 0, 0, time.UTC)
   498  }
   499  
   500  func writeDate(buf []byte, val time.Time) {
   501  	expectSize(buf, dateSize)
   502  	t := uint32(val.Year() << yearShift)
   503  	t += uint32(val.Month() << monthShift)
   504  	t += uint32(val.Day())
   505  	writeUint32(buf, t)
   506  }
   507  
   508  func compareDate(l, r time.Time) int {
   509  	return compareDatetime(l, r)
   510  }
   511  
   512  func readTime(val []byte) int64 {
   513  	expectSize(val, timeSize)
   514  	return readInt64(val)
   515  }
   516  
   517  func writeTime(buf []byte, val int64) {
   518  	expectSize(buf, timeSize)
   519  	writeInt64(buf, val)
   520  }
   521  
   522  func compareTime(l, r int64) int {
   523  	return compareInt64(l, r)
   524  }
   525  
   526  func readDatetime(buf []byte) (t time.Time) {
   527  	expectSize(buf, datetimeSize)
   528  	t = time.UnixMicro(readInt64(buf)).UTC()
   529  	return
   530  }
   531  
   532  func writeDatetime(buf []byte, val time.Time) {
   533  	expectSize(buf, datetimeSize)
   534  	writeInt64(buf, val.UnixMicro())
   535  }
   536  
   537  func compareDatetime(l, r time.Time) int {
   538  	if l.Equal(r) {
   539  		return 0
   540  	} else if l.Before(r) {
   541  		return -1
   542  	} else {
   543  		return 1
   544  	}
   545  }
   546  
   547  func readEnum(val []byte) uint16 {
   548  	return ReadUint16(val)
   549  }
   550  
   551  func writeEnum(buf []byte, val uint16) {
   552  	WriteUint16(buf, val)
   553  }
   554  
   555  func compareEnum(l, r uint16) int {
   556  	return compareUint16(l, r)
   557  }
   558  
   559  func readSet(val []byte) uint64 {
   560  	return readUint64(val)
   561  }
   562  
   563  func writeSet(buf []byte, val uint64) {
   564  	writeUint64(buf, val)
   565  }
   566  
   567  func compareSet(l, r uint64) int {
   568  	return compareUint64(l, r)
   569  }
   570  
   571  func readString(val []byte) string {
   572  	return stringFromBytes(readByteString(val))
   573  }
   574  
   575  func writeString(buf []byte, val string) {
   576  	writeByteString(buf, []byte(val))
   577  }
   578  
   579  func compareString(l, r string) int {
   580  	return bytes.Compare([]byte(l), []byte(r))
   581  }
   582  
   583  func readByteString(val []byte) []byte {
   584  	length := len(val) - 1
   585  	return val[:length]
   586  }
   587  
   588  func writeByteString(buf, val []byte) {
   589  	expectSize(buf, ByteSize(len(val))+1)
   590  	copy(buf, val)
   591  	buf[len(val)] = strTerm
   592  }
   593  
   594  func compareByteString(l, r []byte) int {
   595  	return bytes.Compare(l, r)
   596  }
   597  
   598  func readExtended(handler TupleTypeHandler, val []byte) any {
   599  	v, err := handler.DeserializeValue(val)
   600  	if err != nil {
   601  		panic(err)
   602  	}
   603  	return v
   604  }
   605  
   606  func writeExtended(handler TupleTypeHandler, buf []byte, val []byte) {
   607  	expectSize(buf, ByteSize(len(val)))
   608  	copy(buf, val)
   609  }
   610  
   611  func readHash128(val []byte) []byte {
   612  	expectSize(val, hash128Size)
   613  	return val
   614  }
   615  
   616  func writeHash128(buf, val []byte) {
   617  	expectSize(buf, hash128Size)
   618  	copy(buf, val)
   619  }
   620  
   621  func compareHash128(l, r []byte) int {
   622  	return bytes.Compare(l, r)
   623  }
   624  
   625  func compareAddr(l, r hash.Hash) int {
   626  	return l.Compare(r)
   627  }
   628  
   629  func writeRaw(buf, val []byte) {
   630  	expectSize(buf, ByteSize(len(val)))
   631  	copy(buf, val)
   632  }
   633  
   634  func writeAddr(buf []byte, v []byte) {
   635  	expectSize(buf, hash.ByteLen)
   636  	copy(buf, v)
   637  }
   638  
   639  func readAddr(val []byte) hash.Hash {
   640  	expectSize(val, hash.ByteLen)
   641  	return hash.New(val)
   642  }
   643  
   644  func expectSize(buf []byte, sz ByteSize) {
   645  	if ByteSize(len(buf)) != sz {
   646  		panic("byte slice is not of expected size")
   647  	}
   648  }
   649  
   650  // stringFromBytes converts a []byte to string without a heap allocation.
   651  func stringFromBytes(b []byte) string {
   652  	return *(*string)(unsafe.Pointer(&b))
   653  }
   654  
   655  // Cell is a representation of a subregion for Spatial Indexes
   656  // The first byte encodes the level, which is the size of the region
   657  // The highest level (the square covering all values floats) is 64
   658  // The lowest level (a point) is 0
   659  // The next 16 bytes is the z-value encoding of the minimum point of that subregion
   660  type Cell [cellSize]byte
   661  
   662  func compareCell(l, r Cell) int {
   663  	return bytes.Compare(l[:], r[:])
   664  }
   665  
   666  func readCell(val []byte) (res Cell) {
   667  	expectSize(val, cellSize)
   668  	copy(res[:], val[:])
   669  	return
   670  }
   671  
   672  func writeCell(buf []byte, v Cell) {
   673  	expectSize(buf, cellSize)
   674  	copy(buf[:], v[:])
   675  }