vitess.io/vitess@v0.16.2/go/vt/vttablet/onlineddl/vrepl.go (about)

     1  /*
     2  	Original copyright by GitHub as follows. Additions by the Vitess authors as follows.
     3  */
     4  /*
     5     Copyright 2016 GitHub Inc.
     6  	 See https://github.com/github/gh-ost/blob/master/LICENSE
     7  */
     8  /*
     9  Copyright 2021 The Vitess Authors.
    10  
    11  Licensed under the Apache License, Version 2.0 (the "License");
    12  you may not use this file except in compliance with the License.
    13  You may obtain a copy of the License at
    14  
    15      http://www.apache.org/licenses/LICENSE-2.0
    16  
    17  Unless required by applicable law or agreed to in writing, software
    18  distributed under the License is distributed on an "AS IS" BASIS,
    19  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    20  See the License for the specific language governing permissions and
    21  limitations under the License.
    22  */
    23  
    24  package onlineddl
    25  
    26  import (
    27  	"context"
    28  	"errors"
    29  	"fmt"
    30  	"math"
    31  	"strconv"
    32  	"strings"
    33  
    34  	"vitess.io/vitess/go/mysql"
    35  	"vitess.io/vitess/go/sqltypes"
    36  	"vitess.io/vitess/go/textutil"
    37  	"vitess.io/vitess/go/vt/binlog/binlogplayer"
    38  	"vitess.io/vitess/go/vt/dbconnpool"
    39  	binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata"
    40  	vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
    41  	"vitess.io/vitess/go/vt/schema"
    42  	"vitess.io/vitess/go/vt/sqlparser"
    43  	"vitess.io/vitess/go/vt/vterrors"
    44  	"vitess.io/vitess/go/vt/vttablet/onlineddl/vrepl"
    45  	"vitess.io/vitess/go/vt/vttablet/tabletmanager/vreplication"
    46  )
    47  
    48  // VReplStream represents a row in _vt.vreplication table
    49  type VReplStream struct {
    50  	id                   int64
    51  	workflow             string
    52  	source               string
    53  	pos                  string
    54  	timeUpdated          int64
    55  	timeHeartbeat        int64
    56  	timeThrottled        int64
    57  	componentThrottled   string
    58  	transactionTimestamp int64
    59  	state                string
    60  	message              string
    61  	rowsCopied           int64
    62  	bls                  *binlogdatapb.BinlogSource
    63  }
    64  
    65  // livenessTimeIndicator returns a time indicator for last known healthy state.
    66  // vreplication uses three indicators:
    67  // - transaction_timestamp
    68  // - time_heartbeat
    69  // - time_throttled.
    70  // Updating any of them, also updates time_updated, indicating liveness.
    71  func (v *VReplStream) livenessTimeIndicator() int64 {
    72  	return v.timeUpdated
    73  }
    74  
    75  // isRunning() returns true when the workflow is actively running
    76  func (v *VReplStream) isRunning() bool {
    77  	switch v.state {
    78  	case binlogplayer.VReplicationInit, binlogplayer.VReplicationCopying, binlogplayer.BlpRunning:
    79  		return true
    80  	}
    81  	return false
    82  }
    83  
    84  // hasError() returns true when the workflow has failed and will not retry
    85  func (v *VReplStream) hasError() (isTerminal bool, vreplError error) {
    86  	switch {
    87  	case v.state == binlogplayer.BlpError:
    88  		return true, errors.New(v.message)
    89  	case strings.Contains(strings.ToLower(v.message), "error"):
    90  		return false, errors.New(v.message)
    91  	}
    92  	return false, nil
    93  }
    94  
    95  // VRepl is an online DDL helper for VReplication based migrations (ddl_strategy="online")
    96  type VRepl struct {
    97  	workflow    string
    98  	keyspace    string
    99  	shard       string
   100  	dbName      string
   101  	sourceTable string
   102  	targetTable string
   103  	pos         string
   104  	alterQuery  string
   105  	tableRows   int64
   106  
   107  	sourceSharedColumns              *vrepl.ColumnList
   108  	targetSharedColumns              *vrepl.ColumnList
   109  	droppedSourceNonGeneratedColumns *vrepl.ColumnList
   110  	droppedNoDefaultColumnNames      []string
   111  	expandedColumnNames              []string
   112  	sharedColumnsMap                 map[string]string
   113  	sourceAutoIncrement              uint64
   114  
   115  	chosenSourceUniqueKey *vrepl.UniqueKey
   116  	chosenTargetUniqueKey *vrepl.UniqueKey
   117  
   118  	addedUniqueKeys   []*vrepl.UniqueKey
   119  	removedUniqueKeys []*vrepl.UniqueKey
   120  
   121  	revertibleNotes string
   122  	filterQuery     string
   123  	enumToTextMap   map[string]string
   124  	intToEnumMap    map[string]bool
   125  	bls             *binlogdatapb.BinlogSource
   126  
   127  	parser *vrepl.AlterTableParser
   128  
   129  	convertCharset map[string](*binlogdatapb.CharsetConversion)
   130  }
   131  
   132  // NewVRepl creates a VReplication handler for Online DDL
   133  func NewVRepl(workflow, keyspace, shard, dbName, sourceTable, targetTable, alterQuery string) *VRepl {
   134  	return &VRepl{
   135  		workflow:       workflow,
   136  		keyspace:       keyspace,
   137  		shard:          shard,
   138  		dbName:         dbName,
   139  		sourceTable:    sourceTable,
   140  		targetTable:    targetTable,
   141  		alterQuery:     alterQuery,
   142  		parser:         vrepl.NewAlterTableParser(),
   143  		enumToTextMap:  map[string]string{},
   144  		intToEnumMap:   map[string]bool{},
   145  		convertCharset: map[string](*binlogdatapb.CharsetConversion){},
   146  	}
   147  }
   148  
   149  // readAutoIncrement reads the AUTO_INCREMENT vlaue, if any, for a give ntable
   150  func (v *VRepl) readAutoIncrement(ctx context.Context, conn *dbconnpool.DBConnection, tableName string) (autoIncrement uint64, err error) {
   151  	query, err := sqlparser.ParseAndBind(sqlGetAutoIncrement,
   152  		sqltypes.StringBindVariable(v.dbName),
   153  		sqltypes.StringBindVariable(tableName),
   154  	)
   155  	if err != nil {
   156  		return 0, err
   157  	}
   158  
   159  	rs, err := conn.ExecuteFetch(query, math.MaxInt64, true)
   160  	if err != nil {
   161  		return 0, err
   162  	}
   163  	for _, row := range rs.Named().Rows {
   164  		autoIncrement = row.AsUint64("AUTO_INCREMENT", 0)
   165  	}
   166  
   167  	return autoIncrement, nil
   168  }
   169  
   170  // readTableColumns reads column list from given table
   171  func (v *VRepl) readTableColumns(ctx context.Context, conn *dbconnpool.DBConnection, tableName string) (columns *vrepl.ColumnList, virtualColumns *vrepl.ColumnList, pkColumns *vrepl.ColumnList, err error) {
   172  	parsed := sqlparser.BuildParsedQuery(sqlShowColumnsFrom, tableName)
   173  	rs, err := conn.ExecuteFetch(parsed.Query, math.MaxInt64, true)
   174  	if err != nil {
   175  		return nil, nil, nil, err
   176  	}
   177  	columnNames := []string{}
   178  	virtualColumnNames := []string{}
   179  	pkColumnNames := []string{}
   180  	for _, row := range rs.Named().Rows {
   181  		columnName := row.AsString("Field", "")
   182  		columnNames = append(columnNames, columnName)
   183  
   184  		extra := row.AsString("Extra", "")
   185  		if strings.Contains(extra, "STORED GENERATED") || strings.Contains(extra, "VIRTUAL GENERATED") {
   186  			virtualColumnNames = append(virtualColumnNames, columnName)
   187  		}
   188  
   189  		key := row.AsString("Key", "")
   190  		if key == "PRI" {
   191  			pkColumnNames = append(pkColumnNames, columnName)
   192  		}
   193  	}
   194  	if len(columnNames) == 0 {
   195  		return nil, nil, nil, fmt.Errorf("Found 0 columns on `%s`", tableName)
   196  	}
   197  	return vrepl.NewColumnList(columnNames), vrepl.NewColumnList(virtualColumnNames), vrepl.NewColumnList(pkColumnNames), nil
   198  }
   199  
   200  // readTableUniqueKeys reads all unique keys from a given table, by order of usefulness/performance: PRIMARY first, integers are better, non-null are better
   201  func (v *VRepl) readTableUniqueKeys(ctx context.Context, conn *dbconnpool.DBConnection, tableName string) (uniqueKeys []*vrepl.UniqueKey, err error) {
   202  	query, err := sqlparser.ParseAndBind(sqlSelectUniqueKeys,
   203  		sqltypes.StringBindVariable(v.dbName),
   204  		sqltypes.StringBindVariable(tableName),
   205  		sqltypes.StringBindVariable(v.dbName),
   206  		sqltypes.StringBindVariable(tableName),
   207  	)
   208  	if err != nil {
   209  		return nil, err
   210  	}
   211  	rs, err := conn.ExecuteFetch(query, math.MaxInt64, true)
   212  	if err != nil {
   213  		return nil, err
   214  	}
   215  	for _, row := range rs.Named().Rows {
   216  		uniqueKey := &vrepl.UniqueKey{
   217  			Name:            row.AsString("index_name", ""),
   218  			Columns:         *vrepl.ParseColumnList(row.AsString("column_names", "")),
   219  			HasNullable:     row.AsBool("has_nullable", false),
   220  			HasFloat:        row.AsBool("is_float", false),
   221  			IsAutoIncrement: row.AsBool("is_auto_increment", false),
   222  		}
   223  		uniqueKeys = append(uniqueKeys, uniqueKey)
   224  	}
   225  	return uniqueKeys, nil
   226  }
   227  
   228  // readTableStatus reads table status information
   229  func (v *VRepl) readTableStatus(ctx context.Context, conn *dbconnpool.DBConnection, tableName string) (tableRows int64, err error) {
   230  	parsed := sqlparser.BuildParsedQuery(sqlShowTableStatus, tableName)
   231  	rs, err := conn.ExecuteFetch(parsed.Query, math.MaxInt64, true)
   232  	if err != nil {
   233  		return 0, err
   234  	}
   235  	row := rs.Named().Row()
   236  	if row == nil {
   237  		return 0, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "Cannot SHOW TABLE STATUS LIKE '%s'", tableName)
   238  	}
   239  	tableRows, err = row.ToInt64("Rows")
   240  	return tableRows, err
   241  }
   242  
   243  // applyColumnTypes
   244  func (v *VRepl) applyColumnTypes(ctx context.Context, conn *dbconnpool.DBConnection, tableName string, columnsLists ...*vrepl.ColumnList) error {
   245  	query, err := sqlparser.ParseAndBind(sqlSelectColumnTypes,
   246  		sqltypes.StringBindVariable(v.dbName),
   247  		sqltypes.StringBindVariable(tableName),
   248  	)
   249  	if err != nil {
   250  		return err
   251  	}
   252  	rs, err := conn.ExecuteFetch(query, math.MaxInt64, true)
   253  	if err != nil {
   254  		return err
   255  	}
   256  	for _, row := range rs.Named().Rows {
   257  		columnName := row["COLUMN_NAME"].ToString()
   258  		columnType := row["COLUMN_TYPE"].ToString()
   259  		columnOctetLength := row.AsUint64("CHARACTER_OCTET_LENGTH", 0)
   260  
   261  		for _, columnsList := range columnsLists {
   262  			column := columnsList.GetColumn(columnName)
   263  			if column == nil {
   264  				continue
   265  			}
   266  
   267  			column.DataType = row.AsString("DATA_TYPE", "") // a more canonical form of column_type
   268  			column.IsNullable = (row.AsString("IS_NULLABLE", "") == "YES")
   269  			column.IsDefaultNull = row.AsBool("is_default_null", false)
   270  
   271  			column.CharacterMaximumLength = row.AsInt64("CHARACTER_MAXIMUM_LENGTH", 0)
   272  			column.NumericPrecision = row.AsInt64("NUMERIC_PRECISION", 0)
   273  			column.NumericScale = row.AsInt64("NUMERIC_SCALE", 0)
   274  			column.DateTimePrecision = row.AsInt64("DATETIME_PRECISION", -1)
   275  
   276  			column.Type = vrepl.UnknownColumnType
   277  			if strings.Contains(columnType, "unsigned") {
   278  				column.IsUnsigned = true
   279  			}
   280  			if strings.Contains(columnType, "mediumint") {
   281  				column.SetTypeIfUnknown(vrepl.MediumIntColumnType)
   282  			}
   283  			if strings.Contains(columnType, "timestamp") {
   284  				column.SetTypeIfUnknown(vrepl.TimestampColumnType)
   285  			}
   286  			if strings.Contains(columnType, "datetime") {
   287  				column.SetTypeIfUnknown(vrepl.DateTimeColumnType)
   288  			}
   289  			if strings.Contains(columnType, "json") {
   290  				column.SetTypeIfUnknown(vrepl.JSONColumnType)
   291  			}
   292  			if strings.Contains(columnType, "float") {
   293  				column.SetTypeIfUnknown(vrepl.FloatColumnType)
   294  			}
   295  			if strings.Contains(columnType, "double") {
   296  				column.SetTypeIfUnknown(vrepl.DoubleColumnType)
   297  			}
   298  			if strings.HasPrefix(columnType, "enum") {
   299  				column.SetTypeIfUnknown(vrepl.EnumColumnType)
   300  				column.EnumValues = schema.ParseEnumValues(columnType)
   301  			}
   302  			if strings.HasPrefix(columnType, "set(") {
   303  				column.SetTypeIfUnknown(vrepl.SetColumnType)
   304  				column.EnumValues = schema.ParseSetValues(columnType)
   305  			}
   306  			if strings.HasPrefix(columnType, "binary") {
   307  				column.SetTypeIfUnknown(vrepl.BinaryColumnType)
   308  				column.BinaryOctetLength = columnOctetLength
   309  			}
   310  			if charset := row.AsString("CHARACTER_SET_NAME", ""); charset != "" {
   311  				column.Charset = charset
   312  			}
   313  			if collation := row.AsString("COLLATION_NAME", ""); collation != "" {
   314  				column.SetTypeIfUnknown(vrepl.StringColumnType)
   315  				column.Collation = collation
   316  			}
   317  		}
   318  	}
   319  	return nil
   320  }
   321  
   322  func (v *VRepl) analyzeAlter(ctx context.Context) error {
   323  	if v.alterQuery == "" {
   324  		// Happens for REVERT
   325  		return nil
   326  	}
   327  	if err := v.parser.ParseAlterStatement(v.alterQuery); err != nil {
   328  		return err
   329  	}
   330  	if v.parser.IsRenameTable() {
   331  		return fmt.Errorf("Renaming the table is not aupported in ALTER TABLE: %s", v.alterQuery)
   332  	}
   333  	return nil
   334  }
   335  
   336  func (v *VRepl) analyzeTables(ctx context.Context, conn *dbconnpool.DBConnection) (err error) {
   337  	v.tableRows, err = v.readTableStatus(ctx, conn, v.sourceTable)
   338  	if err != nil {
   339  		return err
   340  	}
   341  	// columns:
   342  	sourceColumns, sourceVirtualColumns, sourcePKColumns, err := v.readTableColumns(ctx, conn, v.sourceTable)
   343  	if err != nil {
   344  		return err
   345  	}
   346  	targetColumns, targetVirtualColumns, targetPKColumns, err := v.readTableColumns(ctx, conn, v.targetTable)
   347  	if err != nil {
   348  		return err
   349  	}
   350  	v.sourceSharedColumns, v.targetSharedColumns, v.droppedSourceNonGeneratedColumns, v.sharedColumnsMap = vrepl.GetSharedColumns(sourceColumns, targetColumns, sourceVirtualColumns, targetVirtualColumns, v.parser)
   351  
   352  	// unique keys
   353  	sourceUniqueKeys, err := v.readTableUniqueKeys(ctx, conn, v.sourceTable)
   354  	if err != nil {
   355  		return err
   356  	}
   357  	if len(sourceUniqueKeys) == 0 {
   358  		return fmt.Errorf("Found no possible unique key on `%s`", v.sourceTable)
   359  	}
   360  	targetUniqueKeys, err := v.readTableUniqueKeys(ctx, conn, v.targetTable)
   361  	if err != nil {
   362  		return err
   363  	}
   364  	if len(targetUniqueKeys) == 0 {
   365  		return fmt.Errorf("Found no possible unique key on `%s`", v.targetTable)
   366  	}
   367  	v.chosenSourceUniqueKey, v.chosenTargetUniqueKey = vrepl.GetSharedUniqueKeys(sourceUniqueKeys, targetUniqueKeys, v.parser.ColumnRenameMap())
   368  	if v.chosenSourceUniqueKey == nil {
   369  		// VReplication supports completely different unique keys on source and target, covering
   370  		// some/completely different columns. The condition is that the key on source
   371  		// must use columns which all exist on target table.
   372  		v.chosenSourceUniqueKey = vrepl.GetUniqueKeyCoveredByColumns(sourceUniqueKeys, v.sourceSharedColumns)
   373  		if v.chosenSourceUniqueKey == nil {
   374  			// Still no luck.
   375  			return fmt.Errorf("Found no possible unique key on `%s` whose columns are in target table `%s`", v.sourceTable, v.targetTable)
   376  		}
   377  	}
   378  	if v.chosenTargetUniqueKey == nil {
   379  		// VReplication supports completely different unique keys on source and target, covering
   380  		// some/completely different columns. The condition is that the key on target
   381  		// must use columns which all exist on source table.
   382  		v.chosenTargetUniqueKey = vrepl.GetUniqueKeyCoveredByColumns(targetUniqueKeys, v.targetSharedColumns)
   383  		if v.chosenTargetUniqueKey == nil {
   384  			// Still no luck.
   385  			return fmt.Errorf("Found no possible unique key on `%s` whose columns are in source table `%s`", v.targetTable, v.sourceTable)
   386  		}
   387  	}
   388  	if v.chosenSourceUniqueKey == nil || v.chosenTargetUniqueKey == nil {
   389  		return fmt.Errorf("Found no shared, not nullable, unique keys between `%s` and `%s`", v.sourceTable, v.targetTable)
   390  	}
   391  	v.addedUniqueKeys = vrepl.AddedUniqueKeys(sourceUniqueKeys, targetUniqueKeys, v.parser.ColumnRenameMap())
   392  	v.removedUniqueKeys = vrepl.RemovedUniqueKeys(sourceUniqueKeys, targetUniqueKeys, v.parser.ColumnRenameMap())
   393  
   394  	// chosen source & target unique keys have exact columns in same order
   395  	sharedPKColumns := &v.chosenSourceUniqueKey.Columns
   396  
   397  	if err := v.applyColumnTypes(ctx, conn, v.sourceTable, sourceColumns, sourceVirtualColumns, sourcePKColumns, v.sourceSharedColumns, sharedPKColumns, v.droppedSourceNonGeneratedColumns); err != nil {
   398  		return err
   399  	}
   400  	if err := v.applyColumnTypes(ctx, conn, v.targetTable, targetColumns, targetVirtualColumns, targetPKColumns, v.targetSharedColumns); err != nil {
   401  		return err
   402  	}
   403  
   404  	for _, sourcePKColumn := range sharedPKColumns.Columns() {
   405  		mappedColumn := v.targetSharedColumns.GetColumn(sourcePKColumn.Name)
   406  		if sourcePKColumn.Type == vrepl.EnumColumnType && mappedColumn.Type == vrepl.EnumColumnType {
   407  			// An ENUM as part of PRIMARY KEY. We must convert it to text because OMG that's complicated.
   408  			// There's a scenario where a query may modify the enum value (and it's bad practice, seeing
   409  			// that it's part of the PK, but it's still valid), and in that case we must have the string value
   410  			// to be able to DELETE the old row
   411  			v.targetSharedColumns.SetEnumToTextConversion(mappedColumn.Name, sourcePKColumn.EnumValues)
   412  			v.enumToTextMap[sourcePKColumn.Name] = sourcePKColumn.EnumValues
   413  		}
   414  	}
   415  
   416  	for i := range v.sourceSharedColumns.Columns() {
   417  		sourceColumn := v.sourceSharedColumns.Columns()[i]
   418  		mappedColumn := v.targetSharedColumns.Columns()[i]
   419  		if sourceColumn.Type == vrepl.EnumColumnType && mappedColumn.Type != vrepl.EnumColumnType && mappedColumn.Charset != "" {
   420  			// A column is converted from ENUM type to textual type
   421  			v.targetSharedColumns.SetEnumToTextConversion(mappedColumn.Name, sourceColumn.EnumValues)
   422  			v.enumToTextMap[sourceColumn.Name] = sourceColumn.EnumValues
   423  		}
   424  		if sourceColumn.IsIntegralType() && mappedColumn.Type == vrepl.EnumColumnType {
   425  			v.intToEnumMap[sourceColumn.Name] = true
   426  		}
   427  	}
   428  
   429  	v.droppedNoDefaultColumnNames = vrepl.GetNoDefaultColumnNames(v.droppedSourceNonGeneratedColumns)
   430  	var expandedDescriptions map[string]string
   431  	v.expandedColumnNames, expandedDescriptions = vrepl.GetExpandedColumnNames(v.sourceSharedColumns, v.targetSharedColumns)
   432  
   433  	v.sourceAutoIncrement, err = v.readAutoIncrement(ctx, conn, v.sourceTable)
   434  
   435  	notes := []string{}
   436  	for _, uk := range v.removedUniqueKeys {
   437  		notes = append(notes, fmt.Sprintf("unique constraint removed: %s", uk.Name))
   438  	}
   439  	for _, name := range v.droppedNoDefaultColumnNames {
   440  		notes = append(notes, fmt.Sprintf("column %s dropped, and had no default value", name))
   441  	}
   442  	for _, name := range v.expandedColumnNames {
   443  		notes = append(notes, fmt.Sprintf("column %s: %s", name, expandedDescriptions[name]))
   444  	}
   445  	v.revertibleNotes = strings.Join(notes, "\n")
   446  	if err != nil {
   447  		return err
   448  	}
   449  
   450  	return nil
   451  }
   452  
   453  // generateFilterQuery creates a SELECT query used by vreplication as a filter. It SELECTs all
   454  // non-generated columns between source & target tables, and takes care of column renames.
   455  func (v *VRepl) generateFilterQuery(ctx context.Context) error {
   456  	if v.sourceSharedColumns.Len() == 0 {
   457  		return fmt.Errorf("Empty column list")
   458  	}
   459  	var sb strings.Builder
   460  	sb.WriteString("select ")
   461  
   462  	for i, sourceCol := range v.sourceSharedColumns.Columns() {
   463  		name := sourceCol.Name
   464  		targetName := v.sharedColumnsMap[name]
   465  
   466  		targetCol := v.targetSharedColumns.GetColumn(targetName)
   467  		if targetCol == nil {
   468  			return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "Cannot find target column %s", targetName)
   469  		}
   470  
   471  		if i > 0 {
   472  			sb.WriteString(", ")
   473  		}
   474  		switch {
   475  		case sourceCol.EnumToTextConversion:
   476  			sb.WriteString(fmt.Sprintf("CONCAT(%s)", escapeName(name)))
   477  		case v.intToEnumMap[name]:
   478  			sb.WriteString(fmt.Sprintf("CONCAT(%s)", escapeName(name)))
   479  		case sourceCol.Type == vrepl.JSONColumnType:
   480  			sb.WriteString(fmt.Sprintf("convert(%s using utf8mb4)", escapeName(name)))
   481  		case sourceCol.Type == vrepl.StringColumnType:
   482  			// Check source and target charset/encoding. If needed, create
   483  			// a binlogdatapb.CharsetConversion entry (later written to vreplication)
   484  			fromEncoding, ok := mysql.CharacterSetEncoding[sourceCol.Charset]
   485  			if !ok {
   486  				return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "Character set %s not supported for column %s", sourceCol.Charset, sourceCol.Name)
   487  			}
   488  			toEncoding, ok := mysql.CharacterSetEncoding[targetCol.Charset]
   489  			// Let's see if target col is at all textual
   490  			if targetCol.Type == vrepl.StringColumnType && !ok {
   491  				return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "Character set %s not supported for column %s", targetCol.Charset, targetCol.Name)
   492  			}
   493  			if fromEncoding == nil && toEncoding == nil && targetCol.Type != vrepl.JSONColumnType {
   494  				// Both source and target have trivial charsets
   495  				sb.WriteString(escapeName(name))
   496  			} else {
   497  				// encoding can be nil for trivial charsets, like utf8, ascii, binary, etc.
   498  				v.convertCharset[targetName] = &binlogdatapb.CharsetConversion{
   499  					FromCharset: sourceCol.Charset,
   500  					ToCharset:   targetCol.Charset,
   501  				}
   502  				sb.WriteString(fmt.Sprintf("convert(%s using utf8mb4)", escapeName(name)))
   503  			}
   504  		case targetCol.Type == vrepl.JSONColumnType && sourceCol.Type != vrepl.JSONColumnType:
   505  			// Convert any type to JSON: encode the type as utf8mb4 text
   506  			sb.WriteString(fmt.Sprintf("convert(%s using utf8mb4)", escapeName(name)))
   507  		default:
   508  			sb.WriteString(escapeName(name))
   509  		}
   510  		sb.WriteString(" as ")
   511  		sb.WriteString(escapeName(targetName))
   512  	}
   513  	sb.WriteString(" from ")
   514  	sb.WriteString(escapeName(v.sourceTable))
   515  
   516  	v.filterQuery = sb.String()
   517  	return nil
   518  }
   519  
   520  func (v *VRepl) analyzeBinlogSource(ctx context.Context) {
   521  	bls := &binlogdatapb.BinlogSource{
   522  		Keyspace:      v.keyspace,
   523  		Shard:         v.shard,
   524  		Filter:        &binlogdatapb.Filter{},
   525  		StopAfterCopy: false,
   526  	}
   527  
   528  	encodeColumns := func(columns *vrepl.ColumnList) string {
   529  		return textutil.EscapeJoin(columns.Names(), ",")
   530  	}
   531  	rule := &binlogdatapb.Rule{
   532  		Match:                        v.targetTable,
   533  		Filter:                       v.filterQuery,
   534  		SourceUniqueKeyColumns:       encodeColumns(&v.chosenSourceUniqueKey.Columns),
   535  		TargetUniqueKeyColumns:       encodeColumns(&v.chosenTargetUniqueKey.Columns),
   536  		SourceUniqueKeyTargetColumns: encodeColumns(v.chosenSourceUniqueKey.Columns.MappedNamesColumnList(v.sharedColumnsMap)),
   537  	}
   538  	if len(v.convertCharset) > 0 {
   539  		rule.ConvertCharset = v.convertCharset
   540  	}
   541  	if len(v.enumToTextMap) > 0 {
   542  		rule.ConvertEnumToText = v.enumToTextMap
   543  	}
   544  	if len(v.intToEnumMap) > 0 {
   545  		rule.ConvertIntToEnum = v.intToEnumMap
   546  	}
   547  
   548  	bls.Filter.Rules = append(bls.Filter.Rules, rule)
   549  	v.bls = bls
   550  }
   551  
   552  func (v *VRepl) analyze(ctx context.Context, conn *dbconnpool.DBConnection) error {
   553  	if err := v.analyzeAlter(ctx); err != nil {
   554  		return err
   555  	}
   556  	if err := v.analyzeTables(ctx, conn); err != nil {
   557  		return err
   558  	}
   559  	if err := v.generateFilterQuery(ctx); err != nil {
   560  		return err
   561  	}
   562  	v.analyzeBinlogSource(ctx)
   563  	return nil
   564  }
   565  
   566  // generateInsertStatement generates the INSERT INTO _vt.replication stataement that creates the vreplication workflow
   567  func (v *VRepl) generateInsertStatement(ctx context.Context) (string, error) {
   568  	ig := vreplication.NewInsertGenerator(binlogplayer.BlpStopped, v.dbName)
   569  	ig.AddRow(v.workflow, v.bls, v.pos, "", "in_order:REPLICA,PRIMARY",
   570  		int64(binlogdatapb.VReplicationWorkflowType_OnlineDDL), int64(binlogdatapb.VReplicationWorkflowSubType_None), false)
   571  
   572  	return ig.String(), nil
   573  }
   574  
   575  // generateStartStatement Generates the statement to start VReplication running on the workflow
   576  func (v *VRepl) generateStartStatement(ctx context.Context) (string, error) {
   577  	return sqlparser.ParseAndBind(sqlStartVReplStream,
   578  		sqltypes.StringBindVariable(v.dbName),
   579  		sqltypes.StringBindVariable(v.workflow),
   580  	)
   581  }
   582  
   583  func getVreplTable(ctx context.Context, s *VReplStream) (string, error) {
   584  	// sanity checks:
   585  	if s == nil {
   586  		return "", vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "No vreplication stream migration %s", s.workflow)
   587  	}
   588  	if s.bls.Filter == nil {
   589  		return "", vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "No binlog source filter for migration %s", s.workflow)
   590  	}
   591  	if len(s.bls.Filter.Rules) != 1 {
   592  		return "", vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "Cannot detect filter rules for migration/vreplication %+v", s.workflow)
   593  	}
   594  	vreplTable := s.bls.Filter.Rules[0].Match
   595  	return vreplTable, nil
   596  }
   597  
   598  // escapeName will escape a db/table/column/... name by wrapping with backticks.
   599  // It is not fool proof. I'm just trying to do the right thing here, not solving
   600  // SQL injection issues, which should be irrelevant for this tool.
   601  func escapeName(name string) string {
   602  	if unquoted, err := strconv.Unquote(name); err == nil {
   603  		name = unquoted
   604  	}
   605  	return fmt.Sprintf("`%s`", name)
   606  }