github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/migrate/tuples.go (about)

     1  // Copyright 2022 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package migrate
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"errors"
    21  	"fmt"
    22  	"io"
    23  	"strings"
    24  	"time"
    25  
    26  	"github.com/shopspring/decimal"
    27  
    28  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    29  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/json"
    30  	"github.com/dolthub/dolt/go/store/pool"
    31  	"github.com/dolthub/dolt/go/store/prolly/tree"
    32  	"github.com/dolthub/dolt/go/store/types"
    33  	"github.com/dolthub/dolt/go/store/val"
    34  )
    35  
    36  const (
    37  	maxInlineValue = 16383
    38  )
    39  
    40  var ErrCannotMigrateText = errors.New("could not migrate TEXT value to VARCHAR, TEXT value exceeds 16383 size limit")
    41  var ErrCannotMigrateBlob = errors.New("could not migrate BLOB value to VARBINARY, BLOB value exceeds 16383 size limit")
    42  
    43  type translator struct {
    44  	builder *val.TupleBuilder
    45  
    46  	// maps columns tags to ordinal position
    47  	mapping map[uint64]int
    48  
    49  	ns   tree.NodeStore
    50  	pool pool.BuffPool
    51  }
    52  
    53  func tupleTranslatorsFromSchema(sch schema.Schema, ns tree.NodeStore) (kt, vt translator) {
    54  	kd := sch.GetKeyDescriptor()
    55  	vd := sch.GetValueDescriptor()
    56  
    57  	keyMap := sch.GetPKCols().TagToIdx
    58  	valMap := sch.GetNonPKCols().TagToIdx
    59  
    60  	if !schema.IsKeyless(sch) {
    61  		kt = newTupleTranslator(ns, keyMap, kd)
    62  		vt = newTupleTranslator(ns, valMap, vd)
    63  		return
    64  	}
    65  
    66  	// for keyless tables, we must account for the id and cardinality columns
    67  	keyMap2 := map[uint64]int{schema.KeylessRowIdTag: 0}
    68  	valMap2 := map[uint64]int{schema.KeylessRowCardinalityTag: 0}
    69  
    70  	// shift positions for other columns
    71  	for tag, pos := range valMap {
    72  		valMap2[tag] = pos + 1
    73  	}
    74  	// assert previous keyMap was empty
    75  	assertTrue(len(keyMap) == 0)
    76  
    77  	kt = newTupleTranslator(ns, keyMap2, kd)
    78  	vt = newTupleTranslator(ns, valMap2, vd)
    79  	return
    80  }
    81  
    82  func newTupleTranslator(ns tree.NodeStore, mapping map[uint64]int, desc val.TupleDesc) translator {
    83  	return translator{
    84  		builder: val.NewTupleBuilder(desc),
    85  		mapping: mapping,
    86  		ns:      ns,
    87  		pool:    pool.NewBuffPool(),
    88  	}
    89  }
    90  
    91  // TranslateTuple translates a types.Tuple into a val.Tuple.
    92  func (t translator) TranslateTuple(ctx context.Context, tup types.Tuple) (val.Tuple, error) {
    93  	if !isEven(tup.Len()) {
    94  		return nil, fmt.Errorf("expected even-legnth tuple (len %d)", tup.Len())
    95  	}
    96  
    97  	var tag uint64
    98  	err := tup.IterFields(func(i uint64, value types.Value) (stop bool, err error) {
    99  		// even fields are column tags, odd fields are column values
   100  		if isEven(i) {
   101  			tag = uint64(value.(types.Uint))
   102  		} else {
   103  			// |tag| set in previous iteration
   104  			pos, ok := t.mapping[tag]
   105  			if ok {
   106  				err = translateNomsField(ctx, t.ns, value, pos, t.builder)
   107  				stop = err != nil
   108  			} // else tombstone column
   109  		}
   110  		return
   111  	})
   112  	if err != nil {
   113  		return nil, err
   114  	}
   115  
   116  	defer func() {
   117  		if r := recover(); r != nil {
   118  			panic(tup.String())
   119  		}
   120  	}()
   121  
   122  	return t.builder.Build(t.pool), nil
   123  }
   124  
   125  func translateNomsField(ctx context.Context, ns tree.NodeStore, value types.Value, idx int, b *val.TupleBuilder) error {
   126  	nk := value.Kind()
   127  	switch nk {
   128  	case types.NullKind:
   129  		return nil // todo(andy): log warning?
   130  
   131  	case types.UintKind:
   132  		translateUintField(value.(types.Uint), idx, b)
   133  
   134  	case types.IntKind:
   135  		translateIntField(value.(types.Int), idx, b)
   136  
   137  	case types.FloatKind:
   138  		translateFloatField(value.(types.Float), idx, b)
   139  
   140  	case types.TimestampKind:
   141  		translateTimestampField(value.(types.Timestamp), idx, b)
   142  
   143  	case types.BoolKind:
   144  		b.PutBool(idx, bool(value.(types.Bool)))
   145  
   146  	case types.StringKind:
   147  		return translateStringField(ctx, ns, value.(types.String), idx, b)
   148  
   149  	case types.UUIDKind:
   150  		uuid := value.(types.UUID)
   151  		b.PutHash128(idx, uuid[:])
   152  
   153  	case types.InlineBlobKind:
   154  		b.PutByteString(idx, value.(types.InlineBlob))
   155  
   156  	case types.DecimalKind:
   157  		b.PutDecimal(idx, decimal.Decimal(value.(types.Decimal)))
   158  
   159  	case types.GeometryKind:
   160  		v := value.(types.Geometry).Inner
   161  		translateGeometryField(v, idx, b)
   162  
   163  	case types.PointKind,
   164  		types.LineStringKind,
   165  		types.PolygonKind,
   166  		types.MultiPointKind,
   167  		types.MultiLineStringKind,
   168  		types.MultiPolygonKind,
   169  		types.GeometryCollectionKind:
   170  		translateGeometryField(value, idx, b)
   171  
   172  	case types.JSONKind:
   173  		return translateJSONField(ctx, ns, value.(types.JSON), idx, b)
   174  
   175  	case types.BlobKind:
   176  		return translateBlobField(ctx, ns, value.(types.Blob), idx, b)
   177  
   178  	case types.ExtendedKind:
   179  		return fmt.Errorf("extended types are invalid during migration")
   180  
   181  	default:
   182  		return fmt.Errorf("encountered unexpected NomsKind %s",
   183  			types.KindToString[nk])
   184  	}
   185  	return nil
   186  }
   187  
   188  func translateUintField(value types.Uint, idx int, b *val.TupleBuilder) {
   189  	typ := b.Desc.Types[idx]
   190  	switch typ.Enc {
   191  	case val.Uint8Enc:
   192  		b.PutUint8(idx, uint8(value))
   193  	case val.Uint16Enc:
   194  		b.PutUint16(idx, uint16(value))
   195  	case val.Uint32Enc:
   196  		b.PutUint32(idx, uint32(value))
   197  	case val.Uint64Enc:
   198  		b.PutUint64(idx, uint64(value))
   199  	case val.EnumEnc:
   200  		b.PutEnum(idx, uint16(value))
   201  	case val.SetEnc:
   202  		b.PutSet(idx, uint64(value))
   203  	default:
   204  		panic(fmt.Sprintf("unexpected encoding for uint (%d)", typ.Enc))
   205  	}
   206  }
   207  
   208  func translateIntField(value types.Int, idx int, b *val.TupleBuilder) {
   209  	typ := b.Desc.Types[idx]
   210  	switch typ.Enc {
   211  	case val.Int8Enc:
   212  		b.PutInt8(idx, int8(value))
   213  	case val.Int16Enc:
   214  		b.PutInt16(idx, int16(value))
   215  	case val.Int32Enc:
   216  		b.PutInt32(idx, int32(value))
   217  	case val.Int64Enc:
   218  		b.PutInt64(idx, int64(value))
   219  	case val.YearEnc:
   220  		b.PutYear(idx, int16(value))
   221  	case val.TimeEnc:
   222  		b.PutSqlTime(idx, int64(value))
   223  	default:
   224  		panic(fmt.Sprintf("unexpected encoding for int (%d)", typ.Enc))
   225  	}
   226  }
   227  
   228  func translateFloatField(value types.Float, idx int, b *val.TupleBuilder) {
   229  	typ := b.Desc.Types[idx]
   230  	switch typ.Enc {
   231  	case val.Float32Enc:
   232  		b.PutFloat32(idx, float32(value))
   233  	case val.Float64Enc:
   234  		b.PutFloat64(idx, float64(value))
   235  	default:
   236  		panic(fmt.Sprintf("unexpected encoding for float (%d)", typ.Enc))
   237  	}
   238  }
   239  
   240  func translateStringField(ctx context.Context, ns tree.NodeStore, value types.String, idx int, b *val.TupleBuilder) error {
   241  	typ := b.Desc.Types[idx]
   242  	switch typ.Enc {
   243  	case val.StringEnc:
   244  		b.PutString(idx, string(value))
   245  
   246  	case val.StringAddrEnc:
   247  		// note: previously, TEXT fields were serialized as types.String
   248  		rd := strings.NewReader(string(value))
   249  		bb := ns.BlobBuilder()
   250  		bb.Init(len(value))
   251  		_, addr, err := bb.Chunk(ctx, rd)
   252  		if err != nil {
   253  			return err
   254  		}
   255  		b.PutStringAddr(idx, addr)
   256  
   257  	default:
   258  		panic(fmt.Sprintf("unexpected encoding for string (%d)", typ.Enc))
   259  	}
   260  	return nil
   261  }
   262  
   263  func translateTimestampField(value types.Timestamp, idx int, b *val.TupleBuilder) {
   264  	typ := b.Desc.Types[idx]
   265  	switch typ.Enc {
   266  	case val.DateEnc:
   267  		b.PutDate(idx, time.Time(value))
   268  	case val.DatetimeEnc:
   269  		b.PutDatetime(idx, time.Time(value))
   270  	default:
   271  		panic(fmt.Sprintf("unexpected encoding for timestamp (%d)", typ.Enc))
   272  	}
   273  }
   274  
   275  func translateGeometryField(value types.Value, idx int, b *val.TupleBuilder) {
   276  	nk := value.Kind()
   277  	switch nk {
   278  	case types.PointKind:
   279  		p := types.ConvertTypesPointToSQLPoint(value.(types.Point))
   280  		b.PutGeometry(idx, p.Serialize())
   281  
   282  	case types.LineStringKind:
   283  		l := types.ConvertTypesLineStringToSQLLineString(value.(types.LineString))
   284  		b.PutGeometry(idx, l.Serialize())
   285  
   286  	case types.PolygonKind:
   287  		p := types.ConvertTypesPolygonToSQLPolygon(value.(types.Polygon))
   288  		b.PutGeometry(idx, p.Serialize())
   289  
   290  	case types.MultiPointKind:
   291  		p := types.ConvertTypesMultiPointToSQLMultiPoint(value.(types.MultiPoint))
   292  		b.PutGeometry(idx, p.Serialize())
   293  
   294  	case types.MultiLineStringKind:
   295  		l := types.ConvertTypesMultiLineStringToSQLMultiLineString(value.(types.MultiLineString))
   296  		b.PutGeometry(idx, l.Serialize())
   297  
   298  	case types.MultiPolygonKind:
   299  		p := types.ConvertTypesMultiPolygonToSQLMultiPolygon(value.(types.MultiPolygon))
   300  		b.PutGeometry(idx, p.Serialize())
   301  
   302  	case types.GeometryCollectionKind:
   303  		p := types.ConvertTypesGeomCollToSQLGeomColl(value.(types.GeomColl))
   304  		b.PutGeometry(idx, p.Serialize())
   305  
   306  	default:
   307  		panic(fmt.Sprintf("unexpected NomsKind for geometry (%d)", nk))
   308  	}
   309  }
   310  
   311  func translateJSONField(ctx context.Context, ns tree.NodeStore, value types.JSON, idx int, b *val.TupleBuilder) error {
   312  	s, err := json.NomsJSONToString(ctx, json.NomsJSON(value))
   313  	if err != nil {
   314  		return err
   315  	}
   316  	buf := bytes.NewBuffer([]byte(s))
   317  
   318  	bb := ns.BlobBuilder()
   319  	bb.Init(len(s))
   320  	_, addr, err := bb.Chunk(ctx, buf)
   321  	if err != nil {
   322  		return err
   323  	}
   324  	b.PutJSONAddr(idx, addr)
   325  	return nil
   326  }
   327  
   328  func translateBlobField(ctx context.Context, ns tree.NodeStore, value types.Blob, idx int, b *val.TupleBuilder) error {
   329  	switch b.Desc.Types[idx].Enc {
   330  	// maybe convert from TEXT/BLOB to VARBINARY/VARCHAR
   331  	// if this column is a primary/secondary index key
   332  	case val.StringEnc, val.ByteStringEnc:
   333  		return translateBlobValueToInlineField(ctx, value, idx, b)
   334  	case val.StringAddrEnc, val.BytesAddrEnc:
   335  		// common case
   336  	default:
   337  		return fmt.Errorf("unexpected encoding for blob (%d)", b.Desc.Types[idx].Enc)
   338  	}
   339  
   340  	buf := make([]byte, value.Len())
   341  	_, err := value.ReadAt(ctx, buf, 0)
   342  	if err == io.EOF {
   343  		err = nil
   344  	} else if err != nil {
   345  		return err
   346  	}
   347  
   348  	bb := ns.BlobBuilder()
   349  	bb.Init(int(value.Len()))
   350  	_, addr, err := bb.Chunk(ctx, bytes.NewReader(buf))
   351  	if err != nil {
   352  		return err
   353  	}
   354  
   355  	typ := b.Desc.Types[idx]
   356  	switch typ.Enc {
   357  	case val.BytesAddrEnc:
   358  		b.PutBytesAddr(idx, addr)
   359  	case val.StringAddrEnc:
   360  		b.PutStringAddr(idx, addr)
   361  	}
   362  	return nil
   363  }
   364  
   365  func translateBlobValueToInlineField(ctx context.Context, value types.Blob, idx int, b *val.TupleBuilder) error {
   366  	if value.Len() >= maxInlineValue {
   367  		if b.Desc.Types[idx].Enc == val.StringEnc {
   368  			return ErrCannotMigrateText
   369  		} else {
   370  			return ErrCannotMigrateBlob
   371  		}
   372  	}
   373  
   374  	buf := make([]byte, value.Len())
   375  	_, err := value.ReadAt(ctx, buf, 0)
   376  	if err == io.EOF {
   377  		err = nil
   378  	} else if err != nil {
   379  		return err
   380  	}
   381  
   382  	typ := b.Desc.Types[idx]
   383  	switch typ.Enc {
   384  	case val.ByteStringEnc:
   385  		b.PutByteString(idx, buf)
   386  	case val.StringEnc:
   387  		b.PutString(idx, string(buf))
   388  	default:
   389  		panic(fmt.Sprintf("unexpected encoding for blob (%d)", typ.Enc))
   390  	}
   391  	return nil
   392  }
   393  
   394  func isEven(n uint64) bool {
   395  	return n%2 == 0
   396  }