github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/binlogreplication/binlog_row_serialization.go

github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/binlogreplication/binlog_row_serialization.go (about)

     1  // Copyright 2024 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package binlogreplication
    16  
    17  import (
    18  	"fmt"
    19  
    20  	"github.com/dolthub/go-mysql-server/sql"
    21  	"github.com/dolthub/vitess/go/mysql"
    22  
    23  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    24  	"github.com/dolthub/dolt/go/store/prolly/tree"
    25  	"github.com/dolthub/dolt/go/store/val"
    26  )
    27  
    28  // rowSerializationIter iterates over the columns in a schema and abstracts access to the key and value tuples storing
    29  // the data for a row, so that callers can ask for the next column information and get the right descriptor, tuple,
    30  // and tuple index to use to load that column's data.
    31  type rowSerializationIter struct {
    32  	sch    schema.Schema // The schema representing the row being serialized
    33  	colIdx int           // The position in the schema for the current column
    34  
    35  	key     val.Tuple     // The key tuple for the row being serialized
    36  	keyDesc val.TupleDesc // The descriptor for the key tuple
    37  	keyIdx  int           // The last index in the key tuple used for a column
    38  
    39  	value     val.Tuple     // The value tuple for the row being serialized
    40  	valueDesc val.TupleDesc // The descriptor for the value tuple
    41  	valueIdx  int           // The last index in the value tuple used for a column
    42  }
    43  
    44  // newRowSerializationIter creates a new rowSerializationIter for the specified |schema| and row data from the
    45  // |key| and |value| tuples.
    46  func newRowSerializationIter(sch schema.Schema, key, value tree.Item) *rowSerializationIter {
    47  	return &rowSerializationIter{
    48  		sch:       sch,
    49  		key:       val.Tuple(key),
    50  		keyDesc:   sch.GetKeyDescriptor(),
    51  		value:     val.Tuple(value),
    52  		valueDesc: sch.GetValueDescriptor(),
    53  		keyIdx:    -1,
    54  		valueIdx:  -1,
    55  		colIdx:    0,
    56  	}
    57  }
    58  
    59  // hasNext returns true if this iterator has more columns to provide and the |nextColumn| method can be called.
    60  func (rsi *rowSerializationIter) hasNext() bool {
    61  	return rsi.colIdx < rsi.sch.GetAllCols().Size()
    62  }
    63  
    64  // nextColumn provides the data needed to process the next column in a row, including the column itself, the tuple
    65  // holding the data, the tuple descriptor for that tuple, and the position index into that tuple where the column
    66  // is stored. Callers should always call hasNext() before calling nextColumn() to ensure that it is safe to call.
    67  func (rsi *rowSerializationIter) nextColumn() (schema.Column, val.TupleDesc, val.Tuple, int) {
    68  	col := rsi.sch.GetAllCols().GetColumns()[rsi.colIdx]
    69  	rsi.colIdx++
    70  
    71  	// For keyless schemas, the key is a single hash column representing the row's unique identity, so we
    72  	// always use the value descriptor for all columns. Additionally, the first field in the value is a
    73  	// count of how many times that row appears in the table, so we increment |idx| by one extra field to
    74  	// skip over that row count field and get to the real data fields.
    75  	if schema.IsKeyless(rsi.sch) {
    76  		rsi.valueIdx++
    77  		return col, rsi.valueDesc, rsi.value, rsi.valueIdx + 1
    78  	}
    79  
    80  	// Otherwise, for primary key tables, we need to check if the next column is stored in the key or value.
    81  	if col.IsPartOfPK {
    82  		rsi.keyIdx++
    83  		return col, rsi.keyDesc, rsi.key, rsi.keyIdx
    84  	} else {
    85  		rsi.valueIdx++
    86  		return col, rsi.valueDesc, rsi.value, rsi.valueIdx
    87  	}
    88  }
    89  
    90  // serializeRowToBinlogBytes serializes the row formed by |key| and |value| and defined by the |schema| structure, into
    91  // MySQL binlog binary format. For data stored out of band (e.g. BLOB, TEXT, GEOMETRY, JSON), |ns| is used to load the
    92  // out-of-band data. This function returns the binary representation of the row, as well as a bitmap that indicates
    93  // which fields of the row are null (and therefore don't contribute any bytes to the returned binary data).
    94  func serializeRowToBinlogBytes(ctx *sql.Context, sch schema.Schema, key, value tree.Item, ns tree.NodeStore) (data []byte, nullBitmap mysql.Bitmap, err error) {
    95  	columns := sch.GetAllCols().GetColumns()
    96  	nullBitmap = mysql.NewServerBitmap(len(columns))
    97  
    98  	iter := newRowSerializationIter(sch, key, value)
    99  	rowIdx := -1
   100  	for iter.hasNext() {
   101  		rowIdx++
   102  		col, descriptor, tuple, tupleIdx := iter.nextColumn()
   103  
   104  		typ := col.TypeInfo.ToSqlType()
   105  		serializer, ok := typeSerializersMap[typ.Type()]
   106  		if !ok {
   107  			return nil, nullBitmap, fmt.Errorf(
   108  				"unsupported type: %v (%d)\n", typ.String(), typ.Type())
   109  		}
   110  		newData, err := serializer.serialize(ctx, typ, descriptor, tuple, tupleIdx, ns)
   111  		if err != nil {
   112  			return nil, mysql.Bitmap{}, err
   113  		}
   114  		if newData == nil {
   115  			nullBitmap.Set(rowIdx, true)
   116  		} else {
   117  			data = append(data, newData...)
   118  		}
   119  	}
   120  
   121  	return data, nullBitmap, nil
   122  }