github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/tree/prolly_fields.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tree 16 17 import ( 18 "bytes" 19 "context" 20 "encoding/json" 21 "errors" 22 "fmt" 23 "io" 24 "math" 25 "time" 26 27 "github.com/dolthub/go-mysql-server/sql" 28 "github.com/dolthub/go-mysql-server/sql/types" 29 "github.com/shopspring/decimal" 30 31 "github.com/dolthub/dolt/go/store/hash" 32 "github.com/dolthub/dolt/go/store/pool" 33 "github.com/dolthub/dolt/go/store/val" 34 ) 35 36 var ErrValueExceededMaxFieldSize = errors.New("value exceeded max field size of 65kb") 37 38 // GetField reads the value from the ith field of the Tuple as an interface{}. 39 func GetField(ctx context.Context, td val.TupleDesc, i int, tup val.Tuple, ns NodeStore) (v interface{}, err error) { 40 var ok bool 41 switch td.Types[i].Enc { 42 case val.Int8Enc: 43 v, ok = td.GetInt8(i, tup) 44 case val.Uint8Enc: 45 v, ok = td.GetUint8(i, tup) 46 case val.Int16Enc: 47 v, ok = td.GetInt16(i, tup) 48 case val.Uint16Enc: 49 v, ok = td.GetUint16(i, tup) 50 case val.Int32Enc: 51 v, ok = td.GetInt32(i, tup) 52 case val.Uint32Enc: 53 v, ok = td.GetUint32(i, tup) 54 case val.Int64Enc: 55 v, ok = td.GetInt64(i, tup) 56 case val.Uint64Enc: 57 v, ok = td.GetUint64(i, tup) 58 case val.Float32Enc: 59 v, ok = td.GetFloat32(i, tup) 60 case val.Float64Enc: 61 v, ok = td.GetFloat64(i, tup) 62 case val.Bit64Enc: 63 v, ok = td.GetBit(i, tup) 64 case val.DecimalEnc: 65 v, ok = td.GetDecimal(i, tup) 66 case val.YearEnc: 67 v, ok = td.GetYear(i, tup) 68 case val.DateEnc: 69 v, ok = td.GetDate(i, tup) 70 case val.TimeEnc: 71 var t int64 72 t, ok = td.GetSqlTime(i, tup) 73 if ok { 74 v = types.Timespan(t) 75 } 76 case val.DatetimeEnc: 77 v, ok = td.GetDatetime(i, tup) 78 case val.EnumEnc: 79 v, ok = td.GetEnum(i, tup) 80 case val.SetEnc: 81 v, ok = td.GetSet(i, tup) 82 case val.StringEnc: 83 v, ok = td.GetString(i, tup) 84 case val.ByteStringEnc: 85 v, ok = td.GetBytes(i, tup) 86 case val.JSONEnc: 87 var buf []byte 88 buf, ok = td.GetJSON(i, tup) 89 if ok { 90 var doc types.JSONDocument 91 err = json.Unmarshal(buf, &doc.Val) 92 v = doc 93 } 94 // TODO: eventually remove this, and only read GeomAddrEnc 95 case val.GeometryEnc: 96 var buf []byte 97 buf, ok = td.GetGeometry(i, tup) 98 if ok { 99 v, err = deserializeGeometry(buf) 100 } 101 case val.GeomAddrEnc: 102 // TODO: until GeometryEnc is removed, we must check if GeomAddrEnc is a GeometryEnc 103 var buf []byte 104 buf, ok = td.GetGeometry(i, tup) 105 if ok { 106 v, err = deserializeGeometry(buf) 107 } 108 if !ok || err != nil { 109 var h hash.Hash 110 h, ok = td.GetGeometryAddr(i, tup) 111 if ok { 112 buf, err = NewByteArray(h, ns).ToBytes(ctx) 113 if err != nil { 114 return nil, err 115 } 116 v, err = deserializeGeometry(buf) 117 } 118 } 119 case val.Hash128Enc: 120 v, ok = td.GetHash128(i, tup) 121 case val.BytesAddrEnc: 122 var h hash.Hash 123 h, ok = td.GetBytesAddr(i, tup) 124 if ok { 125 v, err = NewByteArray(h, ns).ToBytes(ctx) 126 } 127 case val.JSONAddrEnc: 128 var h hash.Hash 129 h, ok = td.GetJSONAddr(i, tup) 130 if ok { 131 v, err = NewJSONDoc(h, ns).ToLazyJSONDocument(ctx) 132 } 133 case val.StringAddrEnc: 134 var h hash.Hash 135 h, ok = td.GetStringAddr(i, tup) 136 if ok { 137 v, err = NewTextStorage(h, ns).ToString(ctx) 138 } 139 case val.CommitAddrEnc: 140 v, ok = td.GetCommitAddr(i, tup) 141 case val.CellEnc: 142 v, ok = td.GetCell(i, tup) 143 case val.ExtendedEnc: 144 var b []byte 145 b, ok = td.GetExtended(i, tup) 146 if ok { 147 v, err = td.Handlers[i].DeserializeValue(b) 148 } 149 case val.ExtendedAddrEnc: 150 var h hash.Hash 151 h, ok = td.GetExtendedAddr(i, tup) 152 if ok { 153 var b []byte 154 b, err = NewByteArray(h, ns).ToBytes(ctx) 155 if err == nil { 156 v, err = td.Handlers[i].DeserializeValue(b) 157 } 158 } 159 default: 160 panic("unknown val.encoding") 161 } 162 if !ok || err != nil { 163 return nil, err 164 } 165 return v, err 166 } 167 168 // Serialize writes an interface{} into the byte string representation used in val.Tuple, and returns the byte string, 169 // and a boolean indicating success. 170 func Serialize(ctx context.Context, ns NodeStore, t val.Type, v interface{}) (result []byte, err error) { 171 newTupleDesc := val.NewTupleDescriptor(t) 172 tb := val.NewTupleBuilder(newTupleDesc) 173 err = PutField(ctx, ns, tb, 0, v) 174 if err != nil { 175 return nil, err 176 } 177 return newTupleDesc.GetField(0, tb.Build(pool.NewBuffPool())), nil 178 } 179 180 // PutField writes an interface{} to the ith field of the Tuple being built. 181 func PutField(ctx context.Context, ns NodeStore, tb *val.TupleBuilder, i int, v interface{}) error { 182 if v == nil { 183 return nil // NULL 184 } 185 186 enc := tb.Desc.Types[i].Enc 187 switch enc { 188 case val.Int8Enc: 189 tb.PutInt8(i, int8(convInt(v))) 190 case val.Uint8Enc: 191 tb.PutUint8(i, uint8(convUint(v))) 192 case val.Int16Enc: 193 tb.PutInt16(i, int16(convInt(v))) 194 case val.Uint16Enc: 195 tb.PutUint16(i, uint16(convUint(v))) 196 case val.Int32Enc: 197 tb.PutInt32(i, int32(convInt(v))) 198 case val.Uint32Enc: 199 tb.PutUint32(i, uint32(convUint(v))) 200 case val.Int64Enc: 201 tb.PutInt64(i, int64(convInt(v))) 202 case val.Uint64Enc: 203 tb.PutUint64(i, uint64(convUint(v))) 204 case val.Float32Enc: 205 tb.PutFloat32(i, v.(float32)) 206 case val.Float64Enc: 207 tb.PutFloat64(i, v.(float64)) 208 case val.Bit64Enc: 209 tb.PutBit(i, uint64(convUint(v))) 210 case val.DecimalEnc: 211 tb.PutDecimal(i, v.(decimal.Decimal)) 212 case val.YearEnc: 213 tb.PutYear(i, v.(int16)) 214 case val.DateEnc: 215 tb.PutDate(i, v.(time.Time)) 216 case val.TimeEnc: 217 tb.PutSqlTime(i, int64(v.(types.Timespan))) 218 case val.DatetimeEnc: 219 tb.PutDatetime(i, v.(time.Time)) 220 case val.EnumEnc: 221 tb.PutEnum(i, v.(uint16)) 222 case val.SetEnc: 223 tb.PutSet(i, v.(uint64)) 224 case val.StringEnc: 225 return tb.PutString(i, v.(string)) 226 case val.ByteStringEnc: 227 if s, ok := v.(string); ok { 228 if len(s) > math.MaxUint16 { 229 return ErrValueExceededMaxFieldSize 230 } 231 v = []byte(s) 232 } 233 tb.PutByteString(i, v.([]byte)) 234 case val.Hash128Enc: 235 tb.PutHash128(i, v.([]byte)) 236 // TODO: eventually remove GeometryEnc, but in the meantime write them as GeomAddrEnc 237 case val.GeometryEnc: 238 geo := serializeGeometry(v) 239 h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader(geo), len(geo)) 240 if err != nil { 241 return err 242 } 243 tb.PutGeometryAddr(i, h) 244 case val.GeomAddrEnc: 245 geo := serializeGeometry(v) 246 h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader(geo), len(geo)) 247 if err != nil { 248 return err 249 } 250 tb.PutGeometryAddr(i, h) 251 case val.JSONAddrEnc: 252 buf, err := convJson(v) 253 if err != nil { 254 return err 255 } 256 h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader(buf), len(buf)) 257 if err != nil { 258 return err 259 } 260 tb.PutJSONAddr(i, h) 261 case val.BytesAddrEnc: 262 h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader(v.([]byte)), len(v.([]byte))) 263 if err != nil { 264 return err 265 } 266 tb.PutBytesAddr(i, h) 267 case val.StringAddrEnc: 268 //todo: v will be []byte after daylon's changes 269 h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader([]byte(v.(string))), len(v.(string))) 270 if err != nil { 271 return err 272 } 273 tb.PutStringAddr(i, h) 274 case val.CommitAddrEnc: 275 tb.PutCommitAddr(i, v.(hash.Hash)) 276 case val.CellEnc: 277 if _, ok := v.([]byte); ok { 278 var err error 279 v, err = deserializeGeometry(v.([]byte)) 280 if err != nil { 281 return err 282 } 283 } 284 tb.PutCell(i, ZCell(v.(types.GeometryValue))) 285 case val.ExtendedEnc: 286 b, err := tb.Desc.Handlers[i].SerializeValue(v) 287 if err != nil { 288 return err 289 } 290 if len(b) > math.MaxUint16 { 291 return ErrValueExceededMaxFieldSize 292 } 293 tb.PutExtended(i, b) 294 case val.ExtendedAddrEnc: 295 b, err := tb.Desc.Handlers[i].SerializeValue(v) 296 if err != nil { 297 return err 298 } 299 h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader(b), len(b)) 300 if err != nil { 301 return err 302 } 303 tb.PutExtendedAddr(i, h) 304 default: 305 panic(fmt.Sprintf("unknown encoding %v %v", enc, v)) 306 } 307 return nil 308 } 309 310 func convInt(v interface{}) int { 311 switch i := v.(type) { 312 case int: 313 return i 314 case int8: 315 return int(i) 316 case uint8: 317 return int(i) 318 case int16: 319 return int(i) 320 case uint16: 321 return int(i) 322 case int32: 323 return int(i) 324 case uint32: 325 return int(i) 326 case int64: 327 return int(i) 328 case uint64: 329 return int(i) 330 default: 331 panic("impossible conversion") 332 } 333 } 334 335 func convUint(v interface{}) uint { 336 switch i := v.(type) { 337 case uint: 338 return i 339 case int: 340 return uint(i) 341 case int8: 342 return uint(i) 343 case uint8: 344 return uint(i) 345 case int16: 346 return uint(i) 347 case uint16: 348 return uint(i) 349 case int32: 350 return uint(i) 351 case uint32: 352 return uint(i) 353 case int64: 354 return uint(i) 355 case uint64: 356 return uint(i) 357 default: 358 panic("impossible conversion") 359 } 360 } 361 362 func deserializeGeometry(buf []byte) (v interface{}, err error) { 363 srid, _, typ, err := types.DeserializeEWKBHeader(buf) 364 if err != nil { 365 return nil, err 366 } 367 buf = buf[types.EWKBHeaderSize:] 368 switch typ { 369 case types.WKBPointID: 370 v, _, err = types.DeserializePoint(buf, false, srid) 371 case types.WKBLineID: 372 v, _, err = types.DeserializeLine(buf, false, srid) 373 case types.WKBPolyID: 374 v, _, err = types.DeserializePoly(buf, false, srid) 375 case types.WKBMultiPointID: 376 v, _, err = types.DeserializeMPoint(buf, false, srid) 377 case types.WKBMultiLineID: 378 v, _, err = types.DeserializeMLine(buf, false, srid) 379 case types.WKBMultiPolyID: 380 v, _, err = types.DeserializeMPoly(buf, false, srid) 381 case types.WKBGeomCollID: 382 v, _, err = types.DeserializeGeomColl(buf, false, srid) 383 default: 384 return nil, fmt.Errorf("unknown geometry type %d", typ) 385 } 386 return 387 } 388 389 func serializeGeometry(v interface{}) []byte { 390 switch t := v.(type) { 391 case types.GeometryValue: 392 return t.Serialize() 393 default: 394 panic(fmt.Sprintf("unknown geometry %v", v)) 395 } 396 } 397 398 func SerializeBytesToAddr(ctx context.Context, ns NodeStore, r io.Reader, dataSize int) (hash.Hash, error) { 399 bb := ns.BlobBuilder() 400 bb.Init(dataSize) 401 _, addr, err := bb.Chunk(ctx, r) 402 if err != nil { 403 return hash.Hash{}, err 404 } 405 return addr, nil 406 } 407 408 func convJson(v interface{}) (buf []byte, err error) { 409 v, _, err = types.JSON.Convert(v) 410 if err != nil { 411 return nil, err 412 } 413 return types.MarshallJson(v.(sql.JSONWrapper)) 414 }