github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/binlogreplication/binlog_row_serialization.go (about) 1 // Copyright 2024 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package binlogreplication 16 17 import ( 18 "fmt" 19 20 "github.com/dolthub/go-mysql-server/sql" 21 "github.com/dolthub/vitess/go/mysql" 22 23 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 24 "github.com/dolthub/dolt/go/store/prolly/tree" 25 "github.com/dolthub/dolt/go/store/val" 26 ) 27 28 // rowSerializationIter iterates over the columns in a schema and abstracts access to the key and value tuples storing 29 // the data for a row, so that callers can ask for the next column information and get the right descriptor, tuple, 30 // and tuple index to use to load that column's data. 31 type rowSerializationIter struct { 32 sch schema.Schema // The schema representing the row being serialized 33 colIdx int // The position in the schema for the current column 34 35 key val.Tuple // The key tuple for the row being serialized 36 keyDesc val.TupleDesc // The descriptor for the key tuple 37 keyIdx int // The last index in the key tuple used for a column 38 39 value val.Tuple // The value tuple for the row being serialized 40 valueDesc val.TupleDesc // The descriptor for the value tuple 41 valueIdx int // The last index in the value tuple used for a column 42 } 43 44 // newRowSerializationIter creates a new rowSerializationIter for the specified |schema| and row data from the 45 // |key| and |value| tuples. 46 func newRowSerializationIter(sch schema.Schema, key, value tree.Item) *rowSerializationIter { 47 return &rowSerializationIter{ 48 sch: sch, 49 key: val.Tuple(key), 50 keyDesc: sch.GetKeyDescriptor(), 51 value: val.Tuple(value), 52 valueDesc: sch.GetValueDescriptor(), 53 keyIdx: -1, 54 valueIdx: -1, 55 colIdx: 0, 56 } 57 } 58 59 // hasNext returns true if this iterator has more columns to provide and the |nextColumn| method can be called. 60 func (rsi *rowSerializationIter) hasNext() bool { 61 return rsi.colIdx < rsi.sch.GetAllCols().Size() 62 } 63 64 // nextColumn provides the data needed to process the next column in a row, including the column itself, the tuple 65 // holding the data, the tuple descriptor for that tuple, and the position index into that tuple where the column 66 // is stored. Callers should always call hasNext() before calling nextColumn() to ensure that it is safe to call. 67 func (rsi *rowSerializationIter) nextColumn() (schema.Column, val.TupleDesc, val.Tuple, int) { 68 col := rsi.sch.GetAllCols().GetColumns()[rsi.colIdx] 69 rsi.colIdx++ 70 71 // For keyless schemas, the key is a single hash column representing the row's unique identity, so we 72 // always use the value descriptor for all columns. Additionally, the first field in the value is a 73 // count of how many times that row appears in the table, so we increment |idx| by one extra field to 74 // skip over that row count field and get to the real data fields. 75 if schema.IsKeyless(rsi.sch) { 76 rsi.valueIdx++ 77 return col, rsi.valueDesc, rsi.value, rsi.valueIdx + 1 78 } 79 80 // Otherwise, for primary key tables, we need to check if the next column is stored in the key or value. 81 if col.IsPartOfPK { 82 rsi.keyIdx++ 83 return col, rsi.keyDesc, rsi.key, rsi.keyIdx 84 } else { 85 rsi.valueIdx++ 86 return col, rsi.valueDesc, rsi.value, rsi.valueIdx 87 } 88 } 89 90 // serializeRowToBinlogBytes serializes the row formed by |key| and |value| and defined by the |schema| structure, into 91 // MySQL binlog binary format. For data stored out of band (e.g. BLOB, TEXT, GEOMETRY, JSON), |ns| is used to load the 92 // out-of-band data. This function returns the binary representation of the row, as well as a bitmap that indicates 93 // which fields of the row are null (and therefore don't contribute any bytes to the returned binary data). 94 func serializeRowToBinlogBytes(ctx *sql.Context, sch schema.Schema, key, value tree.Item, ns tree.NodeStore) (data []byte, nullBitmap mysql.Bitmap, err error) { 95 columns := sch.GetAllCols().GetColumns() 96 nullBitmap = mysql.NewServerBitmap(len(columns)) 97 98 iter := newRowSerializationIter(sch, key, value) 99 rowIdx := -1 100 for iter.hasNext() { 101 rowIdx++ 102 col, descriptor, tuple, tupleIdx := iter.nextColumn() 103 104 typ := col.TypeInfo.ToSqlType() 105 serializer, ok := typeSerializersMap[typ.Type()] 106 if !ok { 107 return nil, nullBitmap, fmt.Errorf( 108 "unsupported type: %v (%d)\n", typ.String(), typ.Type()) 109 } 110 newData, err := serializer.serialize(ctx, typ, descriptor, tuple, tupleIdx, ns) 111 if err != nil { 112 return nil, mysql.Bitmap{}, err 113 } 114 if newData == nil { 115 nullBitmap.Set(rowIdx, true) 116 } else { 117 data = append(data, newData...) 118 } 119 } 120 121 return data, nullBitmap, nil 122 }