github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/migrate/tuples.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package migrate 16 17 import ( 18 "bytes" 19 "context" 20 "errors" 21 "fmt" 22 "io" 23 "strings" 24 "time" 25 26 "github.com/shopspring/decimal" 27 28 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 29 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/json" 30 "github.com/dolthub/dolt/go/store/pool" 31 "github.com/dolthub/dolt/go/store/prolly/tree" 32 "github.com/dolthub/dolt/go/store/types" 33 "github.com/dolthub/dolt/go/store/val" 34 ) 35 36 const ( 37 maxInlineValue = 16383 38 ) 39 40 var ErrCannotMigrateText = errors.New("could not migrate TEXT value to VARCHAR, TEXT value exceeds 16383 size limit") 41 var ErrCannotMigrateBlob = errors.New("could not migrate BLOB value to VARBINARY, BLOB value exceeds 16383 size limit") 42 43 type translator struct { 44 builder *val.TupleBuilder 45 46 // maps columns tags to ordinal position 47 mapping map[uint64]int 48 49 ns tree.NodeStore 50 pool pool.BuffPool 51 } 52 53 func tupleTranslatorsFromSchema(sch schema.Schema, ns tree.NodeStore) (kt, vt translator) { 54 kd := sch.GetKeyDescriptor() 55 vd := sch.GetValueDescriptor() 56 57 keyMap := sch.GetPKCols().TagToIdx 58 valMap := sch.GetNonPKCols().TagToIdx 59 60 if !schema.IsKeyless(sch) { 61 kt = newTupleTranslator(ns, keyMap, kd) 62 vt = newTupleTranslator(ns, valMap, vd) 63 return 64 } 65 66 // for keyless tables, we must account for the id and cardinality columns 67 keyMap2 := map[uint64]int{schema.KeylessRowIdTag: 0} 68 valMap2 := map[uint64]int{schema.KeylessRowCardinalityTag: 0} 69 70 // shift positions for other columns 71 for tag, pos := range valMap { 72 valMap2[tag] = pos + 1 73 } 74 // assert previous keyMap was empty 75 assertTrue(len(keyMap) == 0) 76 77 kt = newTupleTranslator(ns, keyMap2, kd) 78 vt = newTupleTranslator(ns, valMap2, vd) 79 return 80 } 81 82 func newTupleTranslator(ns tree.NodeStore, mapping map[uint64]int, desc val.TupleDesc) translator { 83 return translator{ 84 builder: val.NewTupleBuilder(desc), 85 mapping: mapping, 86 ns: ns, 87 pool: pool.NewBuffPool(), 88 } 89 } 90 91 // TranslateTuple translates a types.Tuple into a val.Tuple. 92 func (t translator) TranslateTuple(ctx context.Context, tup types.Tuple) (val.Tuple, error) { 93 if !isEven(tup.Len()) { 94 return nil, fmt.Errorf("expected even-legnth tuple (len %d)", tup.Len()) 95 } 96 97 var tag uint64 98 err := tup.IterFields(func(i uint64, value types.Value) (stop bool, err error) { 99 // even fields are column tags, odd fields are column values 100 if isEven(i) { 101 tag = uint64(value.(types.Uint)) 102 } else { 103 // |tag| set in previous iteration 104 pos, ok := t.mapping[tag] 105 if ok { 106 err = translateNomsField(ctx, t.ns, value, pos, t.builder) 107 stop = err != nil 108 } // else tombstone column 109 } 110 return 111 }) 112 if err != nil { 113 return nil, err 114 } 115 116 defer func() { 117 if r := recover(); r != nil { 118 panic(tup.String()) 119 } 120 }() 121 122 return t.builder.Build(t.pool), nil 123 } 124 125 func translateNomsField(ctx context.Context, ns tree.NodeStore, value types.Value, idx int, b *val.TupleBuilder) error { 126 nk := value.Kind() 127 switch nk { 128 case types.NullKind: 129 return nil // todo(andy): log warning? 130 131 case types.UintKind: 132 translateUintField(value.(types.Uint), idx, b) 133 134 case types.IntKind: 135 translateIntField(value.(types.Int), idx, b) 136 137 case types.FloatKind: 138 translateFloatField(value.(types.Float), idx, b) 139 140 case types.TimestampKind: 141 translateTimestampField(value.(types.Timestamp), idx, b) 142 143 case types.BoolKind: 144 b.PutBool(idx, bool(value.(types.Bool))) 145 146 case types.StringKind: 147 return translateStringField(ctx, ns, value.(types.String), idx, b) 148 149 case types.UUIDKind: 150 uuid := value.(types.UUID) 151 b.PutHash128(idx, uuid[:]) 152 153 case types.InlineBlobKind: 154 b.PutByteString(idx, value.(types.InlineBlob)) 155 156 case types.DecimalKind: 157 b.PutDecimal(idx, decimal.Decimal(value.(types.Decimal))) 158 159 case types.GeometryKind: 160 v := value.(types.Geometry).Inner 161 translateGeometryField(v, idx, b) 162 163 case types.PointKind, 164 types.LineStringKind, 165 types.PolygonKind, 166 types.MultiPointKind, 167 types.MultiLineStringKind, 168 types.MultiPolygonKind, 169 types.GeometryCollectionKind: 170 translateGeometryField(value, idx, b) 171 172 case types.JSONKind: 173 return translateJSONField(ctx, ns, value.(types.JSON), idx, b) 174 175 case types.BlobKind: 176 return translateBlobField(ctx, ns, value.(types.Blob), idx, b) 177 178 case types.ExtendedKind: 179 return fmt.Errorf("extended types are invalid during migration") 180 181 default: 182 return fmt.Errorf("encountered unexpected NomsKind %s", 183 types.KindToString[nk]) 184 } 185 return nil 186 } 187 188 func translateUintField(value types.Uint, idx int, b *val.TupleBuilder) { 189 typ := b.Desc.Types[idx] 190 switch typ.Enc { 191 case val.Uint8Enc: 192 b.PutUint8(idx, uint8(value)) 193 case val.Uint16Enc: 194 b.PutUint16(idx, uint16(value)) 195 case val.Uint32Enc: 196 b.PutUint32(idx, uint32(value)) 197 case val.Uint64Enc: 198 b.PutUint64(idx, uint64(value)) 199 case val.EnumEnc: 200 b.PutEnum(idx, uint16(value)) 201 case val.SetEnc: 202 b.PutSet(idx, uint64(value)) 203 default: 204 panic(fmt.Sprintf("unexpected encoding for uint (%d)", typ.Enc)) 205 } 206 } 207 208 func translateIntField(value types.Int, idx int, b *val.TupleBuilder) { 209 typ := b.Desc.Types[idx] 210 switch typ.Enc { 211 case val.Int8Enc: 212 b.PutInt8(idx, int8(value)) 213 case val.Int16Enc: 214 b.PutInt16(idx, int16(value)) 215 case val.Int32Enc: 216 b.PutInt32(idx, int32(value)) 217 case val.Int64Enc: 218 b.PutInt64(idx, int64(value)) 219 case val.YearEnc: 220 b.PutYear(idx, int16(value)) 221 case val.TimeEnc: 222 b.PutSqlTime(idx, int64(value)) 223 default: 224 panic(fmt.Sprintf("unexpected encoding for int (%d)", typ.Enc)) 225 } 226 } 227 228 func translateFloatField(value types.Float, idx int, b *val.TupleBuilder) { 229 typ := b.Desc.Types[idx] 230 switch typ.Enc { 231 case val.Float32Enc: 232 b.PutFloat32(idx, float32(value)) 233 case val.Float64Enc: 234 b.PutFloat64(idx, float64(value)) 235 default: 236 panic(fmt.Sprintf("unexpected encoding for float (%d)", typ.Enc)) 237 } 238 } 239 240 func translateStringField(ctx context.Context, ns tree.NodeStore, value types.String, idx int, b *val.TupleBuilder) error { 241 typ := b.Desc.Types[idx] 242 switch typ.Enc { 243 case val.StringEnc: 244 b.PutString(idx, string(value)) 245 246 case val.StringAddrEnc: 247 // note: previously, TEXT fields were serialized as types.String 248 rd := strings.NewReader(string(value)) 249 bb := ns.BlobBuilder() 250 bb.Init(len(value)) 251 _, addr, err := bb.Chunk(ctx, rd) 252 if err != nil { 253 return err 254 } 255 b.PutStringAddr(idx, addr) 256 257 default: 258 panic(fmt.Sprintf("unexpected encoding for string (%d)", typ.Enc)) 259 } 260 return nil 261 } 262 263 func translateTimestampField(value types.Timestamp, idx int, b *val.TupleBuilder) { 264 typ := b.Desc.Types[idx] 265 switch typ.Enc { 266 case val.DateEnc: 267 b.PutDate(idx, time.Time(value)) 268 case val.DatetimeEnc: 269 b.PutDatetime(idx, time.Time(value)) 270 default: 271 panic(fmt.Sprintf("unexpected encoding for timestamp (%d)", typ.Enc)) 272 } 273 } 274 275 func translateGeometryField(value types.Value, idx int, b *val.TupleBuilder) { 276 nk := value.Kind() 277 switch nk { 278 case types.PointKind: 279 p := types.ConvertTypesPointToSQLPoint(value.(types.Point)) 280 b.PutGeometry(idx, p.Serialize()) 281 282 case types.LineStringKind: 283 l := types.ConvertTypesLineStringToSQLLineString(value.(types.LineString)) 284 b.PutGeometry(idx, l.Serialize()) 285 286 case types.PolygonKind: 287 p := types.ConvertTypesPolygonToSQLPolygon(value.(types.Polygon)) 288 b.PutGeometry(idx, p.Serialize()) 289 290 case types.MultiPointKind: 291 p := types.ConvertTypesMultiPointToSQLMultiPoint(value.(types.MultiPoint)) 292 b.PutGeometry(idx, p.Serialize()) 293 294 case types.MultiLineStringKind: 295 l := types.ConvertTypesMultiLineStringToSQLMultiLineString(value.(types.MultiLineString)) 296 b.PutGeometry(idx, l.Serialize()) 297 298 case types.MultiPolygonKind: 299 p := types.ConvertTypesMultiPolygonToSQLMultiPolygon(value.(types.MultiPolygon)) 300 b.PutGeometry(idx, p.Serialize()) 301 302 case types.GeometryCollectionKind: 303 p := types.ConvertTypesGeomCollToSQLGeomColl(value.(types.GeomColl)) 304 b.PutGeometry(idx, p.Serialize()) 305 306 default: 307 panic(fmt.Sprintf("unexpected NomsKind for geometry (%d)", nk)) 308 } 309 } 310 311 func translateJSONField(ctx context.Context, ns tree.NodeStore, value types.JSON, idx int, b *val.TupleBuilder) error { 312 s, err := json.NomsJSONToString(ctx, json.NomsJSON(value)) 313 if err != nil { 314 return err 315 } 316 buf := bytes.NewBuffer([]byte(s)) 317 318 bb := ns.BlobBuilder() 319 bb.Init(len(s)) 320 _, addr, err := bb.Chunk(ctx, buf) 321 if err != nil { 322 return err 323 } 324 b.PutJSONAddr(idx, addr) 325 return nil 326 } 327 328 func translateBlobField(ctx context.Context, ns tree.NodeStore, value types.Blob, idx int, b *val.TupleBuilder) error { 329 switch b.Desc.Types[idx].Enc { 330 // maybe convert from TEXT/BLOB to VARBINARY/VARCHAR 331 // if this column is a primary/secondary index key 332 case val.StringEnc, val.ByteStringEnc: 333 return translateBlobValueToInlineField(ctx, value, idx, b) 334 case val.StringAddrEnc, val.BytesAddrEnc: 335 // common case 336 default: 337 return fmt.Errorf("unexpected encoding for blob (%d)", b.Desc.Types[idx].Enc) 338 } 339 340 buf := make([]byte, value.Len()) 341 _, err := value.ReadAt(ctx, buf, 0) 342 if err == io.EOF { 343 err = nil 344 } else if err != nil { 345 return err 346 } 347 348 bb := ns.BlobBuilder() 349 bb.Init(int(value.Len())) 350 _, addr, err := bb.Chunk(ctx, bytes.NewReader(buf)) 351 if err != nil { 352 return err 353 } 354 355 typ := b.Desc.Types[idx] 356 switch typ.Enc { 357 case val.BytesAddrEnc: 358 b.PutBytesAddr(idx, addr) 359 case val.StringAddrEnc: 360 b.PutStringAddr(idx, addr) 361 } 362 return nil 363 } 364 365 func translateBlobValueToInlineField(ctx context.Context, value types.Blob, idx int, b *val.TupleBuilder) error { 366 if value.Len() >= maxInlineValue { 367 if b.Desc.Types[idx].Enc == val.StringEnc { 368 return ErrCannotMigrateText 369 } else { 370 return ErrCannotMigrateBlob 371 } 372 } 373 374 buf := make([]byte, value.Len()) 375 _, err := value.ReadAt(ctx, buf, 0) 376 if err == io.EOF { 377 err = nil 378 } else if err != nil { 379 return err 380 } 381 382 typ := b.Desc.Types[idx] 383 switch typ.Enc { 384 case val.ByteStringEnc: 385 b.PutByteString(idx, buf) 386 case val.StringEnc: 387 b.PutString(idx, string(buf)) 388 default: 389 panic(fmt.Sprintf("unexpected encoding for blob (%d)", typ.Enc)) 390 } 391 return nil 392 } 393 394 func isEven(n uint64) bool { 395 return n%2 == 0 396 }