github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/tree/prolly_fields.go (about)

     1  // Copyright 2022 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tree
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"encoding/json"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"math"
    25  	"time"
    26  
    27  	"github.com/dolthub/go-mysql-server/sql"
    28  	"github.com/dolthub/go-mysql-server/sql/types"
    29  	"github.com/shopspring/decimal"
    30  
    31  	"github.com/dolthub/dolt/go/store/hash"
    32  	"github.com/dolthub/dolt/go/store/pool"
    33  	"github.com/dolthub/dolt/go/store/val"
    34  )
    35  
    36  var ErrValueExceededMaxFieldSize = errors.New("value exceeded max field size of 65kb")
    37  
    38  // GetField reads the value from the ith field of the Tuple as an interface{}.
    39  func GetField(ctx context.Context, td val.TupleDesc, i int, tup val.Tuple, ns NodeStore) (v interface{}, err error) {
    40  	var ok bool
    41  	switch td.Types[i].Enc {
    42  	case val.Int8Enc:
    43  		v, ok = td.GetInt8(i, tup)
    44  	case val.Uint8Enc:
    45  		v, ok = td.GetUint8(i, tup)
    46  	case val.Int16Enc:
    47  		v, ok = td.GetInt16(i, tup)
    48  	case val.Uint16Enc:
    49  		v, ok = td.GetUint16(i, tup)
    50  	case val.Int32Enc:
    51  		v, ok = td.GetInt32(i, tup)
    52  	case val.Uint32Enc:
    53  		v, ok = td.GetUint32(i, tup)
    54  	case val.Int64Enc:
    55  		v, ok = td.GetInt64(i, tup)
    56  	case val.Uint64Enc:
    57  		v, ok = td.GetUint64(i, tup)
    58  	case val.Float32Enc:
    59  		v, ok = td.GetFloat32(i, tup)
    60  	case val.Float64Enc:
    61  		v, ok = td.GetFloat64(i, tup)
    62  	case val.Bit64Enc:
    63  		v, ok = td.GetBit(i, tup)
    64  	case val.DecimalEnc:
    65  		v, ok = td.GetDecimal(i, tup)
    66  	case val.YearEnc:
    67  		v, ok = td.GetYear(i, tup)
    68  	case val.DateEnc:
    69  		v, ok = td.GetDate(i, tup)
    70  	case val.TimeEnc:
    71  		var t int64
    72  		t, ok = td.GetSqlTime(i, tup)
    73  		if ok {
    74  			v = types.Timespan(t)
    75  		}
    76  	case val.DatetimeEnc:
    77  		v, ok = td.GetDatetime(i, tup)
    78  	case val.EnumEnc:
    79  		v, ok = td.GetEnum(i, tup)
    80  	case val.SetEnc:
    81  		v, ok = td.GetSet(i, tup)
    82  	case val.StringEnc:
    83  		v, ok = td.GetString(i, tup)
    84  	case val.ByteStringEnc:
    85  		v, ok = td.GetBytes(i, tup)
    86  	case val.JSONEnc:
    87  		var buf []byte
    88  		buf, ok = td.GetJSON(i, tup)
    89  		if ok {
    90  			var doc types.JSONDocument
    91  			err = json.Unmarshal(buf, &doc.Val)
    92  			v = doc
    93  		}
    94  	// TODO: eventually remove this, and only read GeomAddrEnc
    95  	case val.GeometryEnc:
    96  		var buf []byte
    97  		buf, ok = td.GetGeometry(i, tup)
    98  		if ok {
    99  			v, err = deserializeGeometry(buf)
   100  		}
   101  	case val.GeomAddrEnc:
   102  		// TODO: until GeometryEnc is removed, we must check if GeomAddrEnc is a GeometryEnc
   103  		var buf []byte
   104  		buf, ok = td.GetGeometry(i, tup)
   105  		if ok {
   106  			v, err = deserializeGeometry(buf)
   107  		}
   108  		if !ok || err != nil {
   109  			var h hash.Hash
   110  			h, ok = td.GetGeometryAddr(i, tup)
   111  			if ok {
   112  				buf, err = NewByteArray(h, ns).ToBytes(ctx)
   113  				if err != nil {
   114  					return nil, err
   115  				}
   116  				v, err = deserializeGeometry(buf)
   117  			}
   118  		}
   119  	case val.Hash128Enc:
   120  		v, ok = td.GetHash128(i, tup)
   121  	case val.BytesAddrEnc:
   122  		var h hash.Hash
   123  		h, ok = td.GetBytesAddr(i, tup)
   124  		if ok {
   125  			v, err = NewByteArray(h, ns).ToBytes(ctx)
   126  		}
   127  	case val.JSONAddrEnc:
   128  		var h hash.Hash
   129  		h, ok = td.GetJSONAddr(i, tup)
   130  		if ok {
   131  			v, err = NewJSONDoc(h, ns).ToLazyJSONDocument(ctx)
   132  		}
   133  	case val.StringAddrEnc:
   134  		var h hash.Hash
   135  		h, ok = td.GetStringAddr(i, tup)
   136  		if ok {
   137  			v, err = NewTextStorage(h, ns).ToString(ctx)
   138  		}
   139  	case val.CommitAddrEnc:
   140  		v, ok = td.GetCommitAddr(i, tup)
   141  	case val.CellEnc:
   142  		v, ok = td.GetCell(i, tup)
   143  	case val.ExtendedEnc:
   144  		var b []byte
   145  		b, ok = td.GetExtended(i, tup)
   146  		if ok {
   147  			v, err = td.Handlers[i].DeserializeValue(b)
   148  		}
   149  	case val.ExtendedAddrEnc:
   150  		var h hash.Hash
   151  		h, ok = td.GetExtendedAddr(i, tup)
   152  		if ok {
   153  			var b []byte
   154  			b, err = NewByteArray(h, ns).ToBytes(ctx)
   155  			if err == nil {
   156  				v, err = td.Handlers[i].DeserializeValue(b)
   157  			}
   158  		}
   159  	default:
   160  		panic("unknown val.encoding")
   161  	}
   162  	if !ok || err != nil {
   163  		return nil, err
   164  	}
   165  	return v, err
   166  }
   167  
   168  // Serialize writes an interface{} into the byte string representation used in val.Tuple, and returns the byte string,
   169  // and a boolean indicating success.
   170  func Serialize(ctx context.Context, ns NodeStore, t val.Type, v interface{}) (result []byte, err error) {
   171  	newTupleDesc := val.NewTupleDescriptor(t)
   172  	tb := val.NewTupleBuilder(newTupleDesc)
   173  	err = PutField(ctx, ns, tb, 0, v)
   174  	if err != nil {
   175  		return nil, err
   176  	}
   177  	return newTupleDesc.GetField(0, tb.Build(pool.NewBuffPool())), nil
   178  }
   179  
   180  // PutField writes an interface{} to the ith field of the Tuple being built.
   181  func PutField(ctx context.Context, ns NodeStore, tb *val.TupleBuilder, i int, v interface{}) error {
   182  	if v == nil {
   183  		return nil // NULL
   184  	}
   185  
   186  	enc := tb.Desc.Types[i].Enc
   187  	switch enc {
   188  	case val.Int8Enc:
   189  		tb.PutInt8(i, int8(convInt(v)))
   190  	case val.Uint8Enc:
   191  		tb.PutUint8(i, uint8(convUint(v)))
   192  	case val.Int16Enc:
   193  		tb.PutInt16(i, int16(convInt(v)))
   194  	case val.Uint16Enc:
   195  		tb.PutUint16(i, uint16(convUint(v)))
   196  	case val.Int32Enc:
   197  		tb.PutInt32(i, int32(convInt(v)))
   198  	case val.Uint32Enc:
   199  		tb.PutUint32(i, uint32(convUint(v)))
   200  	case val.Int64Enc:
   201  		tb.PutInt64(i, int64(convInt(v)))
   202  	case val.Uint64Enc:
   203  		tb.PutUint64(i, uint64(convUint(v)))
   204  	case val.Float32Enc:
   205  		tb.PutFloat32(i, v.(float32))
   206  	case val.Float64Enc:
   207  		tb.PutFloat64(i, v.(float64))
   208  	case val.Bit64Enc:
   209  		tb.PutBit(i, uint64(convUint(v)))
   210  	case val.DecimalEnc:
   211  		tb.PutDecimal(i, v.(decimal.Decimal))
   212  	case val.YearEnc:
   213  		tb.PutYear(i, v.(int16))
   214  	case val.DateEnc:
   215  		tb.PutDate(i, v.(time.Time))
   216  	case val.TimeEnc:
   217  		tb.PutSqlTime(i, int64(v.(types.Timespan)))
   218  	case val.DatetimeEnc:
   219  		tb.PutDatetime(i, v.(time.Time))
   220  	case val.EnumEnc:
   221  		tb.PutEnum(i, v.(uint16))
   222  	case val.SetEnc:
   223  		tb.PutSet(i, v.(uint64))
   224  	case val.StringEnc:
   225  		return tb.PutString(i, v.(string))
   226  	case val.ByteStringEnc:
   227  		if s, ok := v.(string); ok {
   228  			if len(s) > math.MaxUint16 {
   229  				return ErrValueExceededMaxFieldSize
   230  			}
   231  			v = []byte(s)
   232  		}
   233  		tb.PutByteString(i, v.([]byte))
   234  	case val.Hash128Enc:
   235  		tb.PutHash128(i, v.([]byte))
   236  	// TODO: eventually remove GeometryEnc, but in the meantime write them as GeomAddrEnc
   237  	case val.GeometryEnc:
   238  		geo := serializeGeometry(v)
   239  		h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader(geo), len(geo))
   240  		if err != nil {
   241  			return err
   242  		}
   243  		tb.PutGeometryAddr(i, h)
   244  	case val.GeomAddrEnc:
   245  		geo := serializeGeometry(v)
   246  		h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader(geo), len(geo))
   247  		if err != nil {
   248  			return err
   249  		}
   250  		tb.PutGeometryAddr(i, h)
   251  	case val.JSONAddrEnc:
   252  		buf, err := convJson(v)
   253  		if err != nil {
   254  			return err
   255  		}
   256  		h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader(buf), len(buf))
   257  		if err != nil {
   258  			return err
   259  		}
   260  		tb.PutJSONAddr(i, h)
   261  	case val.BytesAddrEnc:
   262  		h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader(v.([]byte)), len(v.([]byte)))
   263  		if err != nil {
   264  			return err
   265  		}
   266  		tb.PutBytesAddr(i, h)
   267  	case val.StringAddrEnc:
   268  		//todo: v will be []byte after daylon's changes
   269  		h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader([]byte(v.(string))), len(v.(string)))
   270  		if err != nil {
   271  			return err
   272  		}
   273  		tb.PutStringAddr(i, h)
   274  	case val.CommitAddrEnc:
   275  		tb.PutCommitAddr(i, v.(hash.Hash))
   276  	case val.CellEnc:
   277  		if _, ok := v.([]byte); ok {
   278  			var err error
   279  			v, err = deserializeGeometry(v.([]byte))
   280  			if err != nil {
   281  				return err
   282  			}
   283  		}
   284  		tb.PutCell(i, ZCell(v.(types.GeometryValue)))
   285  	case val.ExtendedEnc:
   286  		b, err := tb.Desc.Handlers[i].SerializeValue(v)
   287  		if err != nil {
   288  			return err
   289  		}
   290  		if len(b) > math.MaxUint16 {
   291  			return ErrValueExceededMaxFieldSize
   292  		}
   293  		tb.PutExtended(i, b)
   294  	case val.ExtendedAddrEnc:
   295  		b, err := tb.Desc.Handlers[i].SerializeValue(v)
   296  		if err != nil {
   297  			return err
   298  		}
   299  		h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader(b), len(b))
   300  		if err != nil {
   301  			return err
   302  		}
   303  		tb.PutExtendedAddr(i, h)
   304  	default:
   305  		panic(fmt.Sprintf("unknown encoding %v %v", enc, v))
   306  	}
   307  	return nil
   308  }
   309  
   310  func convInt(v interface{}) int {
   311  	switch i := v.(type) {
   312  	case int:
   313  		return i
   314  	case int8:
   315  		return int(i)
   316  	case uint8:
   317  		return int(i)
   318  	case int16:
   319  		return int(i)
   320  	case uint16:
   321  		return int(i)
   322  	case int32:
   323  		return int(i)
   324  	case uint32:
   325  		return int(i)
   326  	case int64:
   327  		return int(i)
   328  	case uint64:
   329  		return int(i)
   330  	default:
   331  		panic("impossible conversion")
   332  	}
   333  }
   334  
   335  func convUint(v interface{}) uint {
   336  	switch i := v.(type) {
   337  	case uint:
   338  		return i
   339  	case int:
   340  		return uint(i)
   341  	case int8:
   342  		return uint(i)
   343  	case uint8:
   344  		return uint(i)
   345  	case int16:
   346  		return uint(i)
   347  	case uint16:
   348  		return uint(i)
   349  	case int32:
   350  		return uint(i)
   351  	case uint32:
   352  		return uint(i)
   353  	case int64:
   354  		return uint(i)
   355  	case uint64:
   356  		return uint(i)
   357  	default:
   358  		panic("impossible conversion")
   359  	}
   360  }
   361  
   362  func deserializeGeometry(buf []byte) (v interface{}, err error) {
   363  	srid, _, typ, err := types.DeserializeEWKBHeader(buf)
   364  	if err != nil {
   365  		return nil, err
   366  	}
   367  	buf = buf[types.EWKBHeaderSize:]
   368  	switch typ {
   369  	case types.WKBPointID:
   370  		v, _, err = types.DeserializePoint(buf, false, srid)
   371  	case types.WKBLineID:
   372  		v, _, err = types.DeserializeLine(buf, false, srid)
   373  	case types.WKBPolyID:
   374  		v, _, err = types.DeserializePoly(buf, false, srid)
   375  	case types.WKBMultiPointID:
   376  		v, _, err = types.DeserializeMPoint(buf, false, srid)
   377  	case types.WKBMultiLineID:
   378  		v, _, err = types.DeserializeMLine(buf, false, srid)
   379  	case types.WKBMultiPolyID:
   380  		v, _, err = types.DeserializeMPoly(buf, false, srid)
   381  	case types.WKBGeomCollID:
   382  		v, _, err = types.DeserializeGeomColl(buf, false, srid)
   383  	default:
   384  		return nil, fmt.Errorf("unknown geometry type %d", typ)
   385  	}
   386  	return
   387  }
   388  
   389  func serializeGeometry(v interface{}) []byte {
   390  	switch t := v.(type) {
   391  	case types.GeometryValue:
   392  		return t.Serialize()
   393  	default:
   394  		panic(fmt.Sprintf("unknown geometry %v", v))
   395  	}
   396  }
   397  
   398  func SerializeBytesToAddr(ctx context.Context, ns NodeStore, r io.Reader, dataSize int) (hash.Hash, error) {
   399  	bb := ns.BlobBuilder()
   400  	bb.Init(dataSize)
   401  	_, addr, err := bb.Chunk(ctx, r)
   402  	if err != nil {
   403  		return hash.Hash{}, err
   404  	}
   405  	return addr, nil
   406  }
   407  
   408  func convJson(v interface{}) (buf []byte, err error) {
   409  	v, _, err = types.JSON.Convert(v)
   410  	if err != nil {
   411  		return nil, err
   412  	}
   413  	return types.MarshallJson(v.(sql.JSONWrapper))
   414  }