vitess.io/vitess@v0.16.2/go/vt/vttablet/onlineddl/executor.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  /*
    18  Functionality of this Executor is tested in go/test/endtoend/onlineddl/...
    19  */
    20  
    21  package onlineddl
    22  
    23  import (
    24  	"context"
    25  	"errors"
    26  	"fmt"
    27  	"math"
    28  	"os"
    29  	"path"
    30  	"strconv"
    31  	"strings"
    32  	"sync"
    33  	"sync/atomic"
    34  	"syscall"
    35  	"time"
    36  
    37  	"github.com/spf13/pflag"
    38  
    39  	"google.golang.org/protobuf/proto"
    40  
    41  	"google.golang.org/protobuf/encoding/prototext"
    42  
    43  	"vitess.io/vitess/go/mysql"
    44  	"vitess.io/vitess/go/sqlescape"
    45  	"vitess.io/vitess/go/sqltypes"
    46  	"vitess.io/vitess/go/textutil"
    47  	"vitess.io/vitess/go/timer"
    48  	"vitess.io/vitess/go/vt/binlog/binlogplayer"
    49  	"vitess.io/vitess/go/vt/dbconnpool"
    50  	"vitess.io/vitess/go/vt/log"
    51  	binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata"
    52  	querypb "vitess.io/vitess/go/vt/proto/query"
    53  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    54  	vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
    55  	"vitess.io/vitess/go/vt/schema"
    56  	"vitess.io/vitess/go/vt/schemadiff"
    57  	"vitess.io/vitess/go/vt/servenv"
    58  	"vitess.io/vitess/go/vt/sqlparser"
    59  	"vitess.io/vitess/go/vt/topo"
    60  	"vitess.io/vitess/go/vt/topo/topoproto"
    61  	"vitess.io/vitess/go/vt/vterrors"
    62  	"vitess.io/vitess/go/vt/vttablet/tabletserver/connpool"
    63  	"vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv"
    64  	"vitess.io/vitess/go/vt/vttablet/tabletserver/throttle"
    65  	"vitess.io/vitess/go/vt/vttablet/tmclient"
    66  	"vitess.io/vitess/go/vt/vttablet/vexec"
    67  )
    68  
    69  var (
    70  	// ErrExecutorNotWritableTablet  is generated when executor is asked to run gh-ost on a read-only server
    71  	ErrExecutorNotWritableTablet = errors.New("cannot run migration on non-writable tablet")
    72  	// ErrExecutorMigrationAlreadyRunning is generated when an attempt is made to run an operation that conflicts with a running migration
    73  	ErrExecutorMigrationAlreadyRunning = errors.New("cannot run migration since a migration is already running")
    74  	// ErrMigrationNotFound is returned by readMigration when given UUI cannot be found
    75  	ErrMigrationNotFound = errors.New("migration not found")
    76  )
    77  
    78  var vexecUpdateTemplates = []string{
    79  	`update _vt.schema_migrations set migration_status='val1' where mysql_schema='val2'`,
    80  	`update _vt.schema_migrations set migration_status='val1' where migration_uuid='val2' and mysql_schema='val3'`,
    81  	`update _vt.schema_migrations set migration_status='val1' where migration_uuid='val2' and mysql_schema='val3' and shard='val4'`,
    82  }
    83  
    84  var vexecInsertTemplates = []string{
    85  	`INSERT IGNORE INTO _vt.schema_migrations (
    86  		migration_uuid,
    87  		keyspace,
    88  		shard,
    89  		mysql_schema,
    90  		mysql_table,
    91  		migration_statement,
    92  		strategy,
    93  		options,
    94  		ddl_action,
    95  		requested_timestamp,
    96  		migration_context,
    97  		migration_status
    98  	) VALUES (
    99  		'val1', 'val2', 'val3', 'val4', 'val5', 'val6', 'val7', 'val8', 'val9', FROM_UNIXTIME(0), 'vala', 'valb'
   100  	)`,
   101  }
   102  
   103  var emptyResult = &sqltypes.Result{}
   104  var acceptableDropTableIfExistsErrorCodes = []int{mysql.ERCantFindFile, mysql.ERNoSuchTable}
   105  var copyAlgorithm = sqlparser.AlgorithmValue(sqlparser.CopyStr)
   106  
   107  var (
   108  	ghostOverridePath       string
   109  	ptOSCOverridePath       string
   110  	migrationCheckInterval  = 1 * time.Minute
   111  	retainOnlineDDLTables   = 24 * time.Hour
   112  	maxConcurrentOnlineDDLs = 256
   113  )
   114  
   115  func init() {
   116  	servenv.OnParseFor("vtcombo", registerOnlineDDLFlags)
   117  	servenv.OnParseFor("vttablet", registerOnlineDDLFlags)
   118  }
   119  
   120  func registerOnlineDDLFlags(fs *pflag.FlagSet) {
   121  	fs.StringVar(&ghostOverridePath, "gh-ost-path", ghostOverridePath, "override default gh-ost binary full path")
   122  	fs.StringVar(&ptOSCOverridePath, "pt-osc-path", ptOSCOverridePath, "override default pt-online-schema-change binary full path")
   123  	fs.DurationVar(&migrationCheckInterval, "migration_check_interval", migrationCheckInterval, "Interval between migration checks")
   124  	fs.DurationVar(&retainOnlineDDLTables, "retain_online_ddl_tables", retainOnlineDDLTables, "How long should vttablet keep an old migrated table before purging it")
   125  	fs.IntVar(&maxConcurrentOnlineDDLs, "max_concurrent_online_ddl", maxConcurrentOnlineDDLs, "Maximum number of online DDL changes that may run concurrently")
   126  }
   127  
   128  var migrationNextCheckIntervals = []time.Duration{1 * time.Second, 5 * time.Second, 10 * time.Second, 20 * time.Second}
   129  var maxConstraintNameLength = 64
   130  
   131  const (
   132  	maxPasswordLength                        = 32 // MySQL's *replication* password may not exceed 32 characters
   133  	staleMigrationMinutes                    = 180
   134  	progressPctStarted               float64 = 0
   135  	progressPctFull                  float64 = 100.0
   136  	etaSecondsUnknown                        = -1
   137  	etaSecondsNow                            = 0
   138  	rowsCopiedUnknown                        = 0
   139  	emptyHint                                = ""
   140  	readyToCompleteHint                      = "ready_to_complete"
   141  	databasePoolSize                         = 3
   142  	vreplicationCutOverThreshold             = 5 * time.Second
   143  	vreplicationTestSuiteWaitSeconds         = 5
   144  )
   145  
   146  var (
   147  	migrationLogFileName     = "migration.log"
   148  	migrationFailureFileName = "migration-failure.log"
   149  	onlineDDLUser            = "vt-online-ddl-internal"
   150  	onlineDDLGrant           = fmt.Sprintf("'%s'@'%s'", onlineDDLUser, "%")
   151  	throttlerOnlineDDLApp    = "online-ddl"
   152  	throttleCheckFlags       = &throttle.CheckFlags{}
   153  )
   154  
   155  type ConstraintType int
   156  
   157  const (
   158  	UnknownConstraintType ConstraintType = iota
   159  	CheckConstraintType
   160  	ForeignKeyConstraintType
   161  )
   162  
   163  var (
   164  	constraintIndicatorMap = map[int]string{
   165  		int(CheckConstraintType):      "chk",
   166  		int(ForeignKeyConstraintType): "fk",
   167  	}
   168  )
   169  
   170  func GetConstraintType(constraintInfo sqlparser.ConstraintInfo) ConstraintType {
   171  	if _, ok := constraintInfo.(*sqlparser.CheckConstraintDefinition); ok {
   172  		return CheckConstraintType
   173  	}
   174  	if _, ok := constraintInfo.(*sqlparser.ForeignKeyDefinition); ok {
   175  		return ForeignKeyConstraintType
   176  	}
   177  	return UnknownConstraintType
   178  }
   179  
   180  type mysqlVariables struct {
   181  	host           string
   182  	port           int
   183  	readOnly       bool
   184  	version        string
   185  	versionComment string
   186  }
   187  
   188  // Executor wraps and manages the execution of a gh-ost migration.
   189  type Executor struct {
   190  	env                   tabletenv.Env
   191  	pool                  *connpool.Pool
   192  	tabletTypeFunc        func() topodatapb.TabletType
   193  	ts                    *topo.Server
   194  	lagThrottler          *throttle.Throttler
   195  	toggleBufferTableFunc func(cancelCtx context.Context, tableName string, bufferQueries bool)
   196  	tabletAlias           *topodatapb.TabletAlias
   197  
   198  	keyspace string
   199  	shard    string
   200  	dbName   string
   201  
   202  	initMutex      sync.Mutex
   203  	migrationMutex sync.Mutex
   204  	submitMutex    sync.Mutex // used when submitting migrations
   205  	// ownedRunningMigrations lists UUIDs owned by this executor (consider this a map[string]bool)
   206  	// A UUID listed in this map stands for a migration that is executing, and that this executor can control.
   207  	// Migrations found to be running which are not listed in this map will either:
   208  	// - be adopted by this executor (possible for vreplication migrations), or
   209  	// - be terminated (example: pt-osc migration gone rogue, process still running even as the migration failed)
   210  	// The Executor auto-reviews the map and cleans up migrations thought to be running which are not running.
   211  	ownedRunningMigrations        sync.Map
   212  	vreplicationLastError         map[string]*vterrors.LastError
   213  	tickReentranceFlag            int64
   214  	reviewedRunningMigrationsFlag bool
   215  
   216  	ticks             *timer.Timer
   217  	isOpen            int64
   218  	schemaInitialized bool
   219  
   220  	initVreplicationDDLOnce sync.Once
   221  }
   222  
   223  type cancellableMigration struct {
   224  	uuid    string
   225  	message string
   226  }
   227  
   228  func newCancellableMigration(uuid string, message string) *cancellableMigration {
   229  	return &cancellableMigration{uuid: uuid, message: message}
   230  }
   231  
   232  // GhostBinaryFileName returns the full path+name of the gh-ost binary
   233  func GhostBinaryFileName() (fileName string, isOverride bool) {
   234  	if ghostOverridePath != "" {
   235  		return ghostOverridePath, true
   236  	}
   237  	return path.Join(os.TempDir(), "vt-gh-ost"), false
   238  }
   239  
   240  // PTOSCFileName returns the full path+name of the pt-online-schema-change binary
   241  // Note that vttablet does not include pt-online-schema-change
   242  func PTOSCFileName() (fileName string, isOverride bool) {
   243  	if ptOSCOverridePath != "" {
   244  		return ptOSCOverridePath, true
   245  	}
   246  	return "/usr/bin/pt-online-schema-change", false
   247  }
   248  
   249  // newGCTableRetainTime returns the time until which a new GC table is to be retained
   250  func newGCTableRetainTime() time.Time {
   251  	return time.Now().UTC().Add(retainOnlineDDLTables)
   252  }
   253  
   254  // NewExecutor creates a new gh-ost executor.
   255  func NewExecutor(env tabletenv.Env, tabletAlias *topodatapb.TabletAlias, ts *topo.Server,
   256  	lagThrottler *throttle.Throttler,
   257  	tabletTypeFunc func() topodatapb.TabletType,
   258  	toggleBufferTableFunc func(cancelCtx context.Context, tableName string, bufferQueries bool),
   259  ) *Executor {
   260  	// sanitize flags
   261  	if maxConcurrentOnlineDDLs < 1 {
   262  		maxConcurrentOnlineDDLs = 1 // or else nothing will ever run
   263  	}
   264  	return &Executor{
   265  		env:         env,
   266  		tabletAlias: proto.Clone(tabletAlias).(*topodatapb.TabletAlias),
   267  
   268  		pool: connpool.NewPool(env, "OnlineDDLExecutorPool", tabletenv.ConnPoolConfig{
   269  			Size:               databasePoolSize,
   270  			IdleTimeoutSeconds: env.Config().OltpReadPool.IdleTimeoutSeconds,
   271  		}),
   272  		tabletTypeFunc:        tabletTypeFunc,
   273  		ts:                    ts,
   274  		lagThrottler:          lagThrottler,
   275  		toggleBufferTableFunc: toggleBufferTableFunc,
   276  		ticks:                 timer.NewTimer(migrationCheckInterval),
   277  	}
   278  }
   279  
   280  func (e *Executor) execQuery(ctx context.Context, query string) (result *sqltypes.Result, err error) {
   281  	defer e.env.LogError()
   282  
   283  	conn, err := e.pool.Get(ctx, nil)
   284  	if err != nil {
   285  		return result, err
   286  	}
   287  	defer conn.Recycle()
   288  	return conn.Exec(ctx, query, math.MaxInt32, true)
   289  }
   290  
   291  // TabletAliasString returns tablet alias as string (duh)
   292  func (e *Executor) TabletAliasString() string {
   293  	return topoproto.TabletAliasString(e.tabletAlias)
   294  }
   295  
   296  // InitDBConfig initializes keysapce
   297  func (e *Executor) InitDBConfig(keyspace, shard, dbName string) {
   298  	e.keyspace = keyspace
   299  	e.shard = shard
   300  	e.dbName = dbName
   301  }
   302  
   303  // Open opens database pool and initializes the schema
   304  func (e *Executor) Open() error {
   305  	e.initMutex.Lock()
   306  	defer e.initMutex.Unlock()
   307  	if atomic.LoadInt64(&e.isOpen) > 0 || !e.env.Config().EnableOnlineDDL {
   308  		return nil
   309  	}
   310  	log.Infof("onlineDDL Executor Open()")
   311  
   312  	e.reviewedRunningMigrationsFlag = false // will be set as "true" by reviewRunningMigrations()
   313  	e.ownedRunningMigrations.Range(func(k, _ any) bool {
   314  		e.ownedRunningMigrations.Delete(k)
   315  		return true
   316  	})
   317  	e.vreplicationLastError = make(map[string]*vterrors.LastError)
   318  
   319  	e.pool.Open(e.env.Config().DB.AppWithDB(), e.env.Config().DB.DbaWithDB(), e.env.Config().DB.AppDebugWithDB())
   320  	e.ticks.Start(e.onMigrationCheckTick)
   321  	e.triggerNextCheckInterval()
   322  
   323  	if _, err := sqlparser.QueryMatchesTemplates("select 1 from dual", vexecUpdateTemplates); err != nil {
   324  		// this validates vexecUpdateTemplates
   325  		return err
   326  	}
   327  	atomic.StoreInt64(&e.isOpen, 1)
   328  
   329  	return nil
   330  }
   331  
   332  // Close frees resources
   333  func (e *Executor) Close() {
   334  	e.initMutex.Lock()
   335  	defer e.initMutex.Unlock()
   336  	if atomic.LoadInt64(&e.isOpen) == 0 {
   337  		return
   338  	}
   339  	log.Infof("onlineDDL Executor Close()")
   340  
   341  	e.ticks.Stop()
   342  	e.pool.Close()
   343  	atomic.StoreInt64(&e.isOpen, 0)
   344  }
   345  
   346  // triggerNextCheckInterval the next tick sooner than normal
   347  func (e *Executor) triggerNextCheckInterval() {
   348  	for _, interval := range migrationNextCheckIntervals {
   349  		e.ticks.TriggerAfter(interval)
   350  	}
   351  }
   352  
   353  // matchesShards checks whether given comma delimited shard names include this tablet's shard. If the input param is empty then
   354  // that implicitly means "true"
   355  func (e *Executor) matchesShards(commaDelimitedShards string) bool {
   356  	shards := textutil.SplitDelimitedList(commaDelimitedShards)
   357  	if len(shards) == 0 {
   358  		// Nothing explicitly defined, so implicitly all shards are allowed
   359  		return true
   360  	}
   361  	for _, shard := range shards {
   362  		if shard == e.shard {
   363  			return true
   364  		}
   365  	}
   366  	return false
   367  }
   368  
   369  // countOwnedRunningMigrations returns an estimate of current count of running migrations; this is
   370  // normally an accurate number, but can be inexact because the exdcutor peridocially reviews
   371  // e.ownedRunningMigrations and adds/removes migrations based on actual migration state.
   372  func (e *Executor) countOwnedRunningMigrations() (count int) {
   373  	e.ownedRunningMigrations.Range(func(_, val any) bool {
   374  		if _, ok := val.(*schema.OnlineDDL); ok {
   375  			count++
   376  		}
   377  		return true // continue iteration
   378  	})
   379  	return count
   380  }
   381  
   382  // allowConcurrentMigration checks if the given migration is allowed to run concurrently.
   383  // First, the migration itself must declare --allow-concurrent. But then, there's also some
   384  // restrictions on which migrations exactly are allowed such concurrency.
   385  func (e *Executor) allowConcurrentMigration(onlineDDL *schema.OnlineDDL) (action sqlparser.DDLAction, allowConcurrent bool) {
   386  	if !onlineDDL.StrategySetting().IsAllowConcurrent() {
   387  		return action, false
   388  	}
   389  
   390  	var err error
   391  	action, err = onlineDDL.GetAction()
   392  	if err != nil {
   393  		return action, false
   394  	}
   395  	switch action {
   396  	case sqlparser.CreateDDLAction, sqlparser.DropDDLAction:
   397  		// CREATE TABLE, DROP TABLE are allowed to run concurrently.
   398  		return action, true
   399  	case sqlparser.AlterDDLAction:
   400  		// ALTER is only allowed concurrent execution if this is a Vitess migration
   401  		strategy := onlineDDL.StrategySetting().Strategy
   402  		return action, (strategy == schema.DDLStrategyOnline || strategy == schema.DDLStrategyVitess)
   403  	case sqlparser.RevertDDLAction:
   404  		// REVERT is allowed to run concurrently.
   405  		// Reminder that REVERT is supported for CREATE, DROP and for 'vitess' ALTER, but never for
   406  		// 'gh-ost' or 'pt-osc' ALTERs
   407  		return action, true
   408  	}
   409  	return action, false
   410  }
   411  
   412  func (e *Executor) proposedMigrationConflictsWithRunningMigration(runningMigration, proposedMigration *schema.OnlineDDL) bool {
   413  	if runningMigration.Table == proposedMigration.Table {
   414  		// migrations operate on same table
   415  		return true
   416  	}
   417  	_, isRunningMigrationAllowConcurrent := e.allowConcurrentMigration(runningMigration)
   418  	proposedMigrationAction, isProposedMigrationAllowConcurrent := e.allowConcurrentMigration(proposedMigration)
   419  	if !isRunningMigrationAllowConcurrent && !isProposedMigrationAllowConcurrent {
   420  		// neither allowed concurrently
   421  		return true
   422  	}
   423  	if proposedMigrationAction == sqlparser.AlterDDLAction {
   424  		// A new ALTER migration conflicts with an existing migration if the existing migration is still not ready to complete.
   425  		// Specifically, if the running migration is an ALTER, and is still busy with copying rows (copy_state), then
   426  		// we consider the two to be conflicting. But, if the running migration is done copying rows, and is now only
   427  		// applying binary logs, and is up-to-date, then we consider a new ALTER migration to be non-conflicting.
   428  		return atomic.LoadInt64(&runningMigration.ReadyToComplete) == 0
   429  	}
   430  	return false
   431  }
   432  
   433  // isAnyConflictingMigrationRunning checks if there's any running migration that conflicts with the
   434  // given migration, such that they can't both run concurrently.
   435  func (e *Executor) isAnyConflictingMigrationRunning(onlineDDL *schema.OnlineDDL) (conflictFound bool, conflictingMigration *schema.OnlineDDL) {
   436  	e.ownedRunningMigrations.Range(func(_, val any) bool {
   437  		runningMigration, ok := val.(*schema.OnlineDDL)
   438  		if !ok {
   439  			return true // continue iteration
   440  		}
   441  		if e.proposedMigrationConflictsWithRunningMigration(runningMigration, onlineDDL) {
   442  			conflictingMigration = runningMigration
   443  			return false // stop iteration, no need to review other migrations
   444  		}
   445  		return true // continue iteration
   446  	})
   447  	return (conflictingMigration != nil), conflictingMigration
   448  }
   449  
   450  func (e *Executor) ghostPanicFlagFileName(uuid string) string {
   451  	return path.Join(os.TempDir(), fmt.Sprintf("ghost.%s.panic.flag", uuid))
   452  }
   453  
   454  func (e *Executor) createGhostPanicFlagFile(uuid string) error {
   455  	_, err := os.Create(e.ghostPanicFlagFileName(uuid))
   456  	return err
   457  }
   458  
   459  func (e *Executor) deleteGhostPanicFlagFile(uuid string) error {
   460  	// We use RemoveAll because if the file does not exist that's fine. Remove will return an error
   461  	// if file does not exist; RemoveAll does not.
   462  	return os.RemoveAll(e.ghostPanicFlagFileName(uuid))
   463  }
   464  
   465  func (e *Executor) ghostPostponeFlagFileName(uuid string) string {
   466  	return path.Join(os.TempDir(), fmt.Sprintf("ghost.%s.postpone.flag", uuid))
   467  }
   468  
   469  func (e *Executor) deleteGhostPostponeFlagFile(uuid string) error {
   470  	// We use RemoveAll because if the file does not exist that's fine. Remove will return an error
   471  	// if file does not exist; RemoveAll does not.
   472  	return os.RemoveAll(e.ghostPostponeFlagFileName(uuid))
   473  }
   474  
   475  func (e *Executor) ptPidFileName(uuid string) string {
   476  	return path.Join(os.TempDir(), fmt.Sprintf("pt-online-schema-change.%s.pid", uuid))
   477  }
   478  
   479  // readMySQLVariables contacts the backend MySQL server to read some of its configuration
   480  func (e *Executor) readMySQLVariables(ctx context.Context) (variables *mysqlVariables, err error) {
   481  	conn, err := e.pool.Get(ctx, nil)
   482  	if err != nil {
   483  		return nil, err
   484  	}
   485  	defer conn.Recycle()
   486  
   487  	tm, err := conn.Exec(ctx, `select
   488  			@@global.hostname as hostname,
   489  			@@global.port as port,
   490  			@@global.read_only as read_only,
   491  			@@global.version AS version,
   492  			@@global.version_comment AS version_comment
   493  		from dual`, 1, true)
   494  	if err != nil {
   495  		return nil, vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "could not read MySQL variables: %v", err)
   496  	}
   497  	row := tm.Named().Row()
   498  	if row == nil {
   499  		return nil, vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "unexpected result for MySQL variables: %+v", tm.Rows)
   500  	}
   501  	variables = &mysqlVariables{}
   502  
   503  	if e.env.Config().DB.Host != "" {
   504  		variables.host = e.env.Config().DB.Host
   505  	} else {
   506  		variables.host = row["hostname"].ToString()
   507  	}
   508  
   509  	if e.env.Config().DB.Port != 0 {
   510  		variables.port = e.env.Config().DB.Port
   511  	} else if port, err := row.ToInt64("port"); err != nil {
   512  		return nil, vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "could not parse @@global.port %v: %v", tm, err)
   513  	} else {
   514  		variables.port = int(port)
   515  	}
   516  	if variables.readOnly, err = row.ToBool("read_only"); err != nil {
   517  		return nil, vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "could not parse @@global.read_only %v: %v", tm, err)
   518  	}
   519  
   520  	variables.version = row["version"].ToString()
   521  	variables.versionComment = row["version_comment"].ToString()
   522  
   523  	return variables, nil
   524  }
   525  
   526  // createOnlineDDLUser creates a gh-ost or pt-osc user account with all
   527  // neccessary privileges and with a random password
   528  func (e *Executor) createOnlineDDLUser(ctx context.Context) (password string, err error) {
   529  	conn, err := dbconnpool.NewDBConnection(ctx, e.env.Config().DB.DbaConnector())
   530  	if err != nil {
   531  		return password, err
   532  	}
   533  	defer conn.Close()
   534  
   535  	password = RandomHash()[0:maxPasswordLength]
   536  
   537  	for _, query := range sqlCreateOnlineDDLUser {
   538  		parsed := sqlparser.BuildParsedQuery(query, onlineDDLGrant, password)
   539  		if _, err := conn.ExecuteFetch(parsed.Query, 0, false); err != nil {
   540  			return password, err
   541  		}
   542  	}
   543  	for _, query := range sqlGrantOnlineDDLSuper {
   544  		parsed := sqlparser.BuildParsedQuery(query, onlineDDLGrant)
   545  		conn.ExecuteFetch(parsed.Query, 0, false)
   546  		// We ignore failure, since we might not be able to grant
   547  		// SUPER privs (e.g. Aurora)
   548  	}
   549  	for _, query := range sqlGrantOnlineDDLUser {
   550  		parsed := sqlparser.BuildParsedQuery(query, onlineDDLGrant)
   551  		if _, err := conn.ExecuteFetch(parsed.Query, 0, false); err != nil {
   552  			return password, err
   553  		}
   554  	}
   555  	return password, err
   556  }
   557  
   558  // dropOnlineDDLUser drops the given ddl user account at the end of migration
   559  func (e *Executor) dropOnlineDDLUser(ctx context.Context) error {
   560  	conn, err := dbconnpool.NewDBConnection(ctx, e.env.Config().DB.DbaConnector())
   561  	if err != nil {
   562  		return err
   563  	}
   564  	defer conn.Close()
   565  
   566  	parsed := sqlparser.BuildParsedQuery(sqlDropOnlineDDLUser, onlineDDLGrant)
   567  	_, err = conn.ExecuteFetch(parsed.Query, 0, false)
   568  	return err
   569  }
   570  
   571  // tableExists checks if a given table exists.
   572  func (e *Executor) tableExists(ctx context.Context, tableName string) (bool, error) {
   573  	tableName = strings.ReplaceAll(tableName, `_`, `\_`)
   574  	parsed := sqlparser.BuildParsedQuery(sqlShowTablesLike, tableName)
   575  	rs, err := e.execQuery(ctx, parsed.Query)
   576  	if err != nil {
   577  		return false, err
   578  	}
   579  	row := rs.Named().Row()
   580  	return (row != nil), nil
   581  }
   582  
   583  // showCreateTable returns the SHOW CREATE statement for a table or a view
   584  func (e *Executor) showCreateTable(ctx context.Context, tableName string) (string, error) {
   585  	parsed := sqlparser.BuildParsedQuery(sqlShowCreateTable, tableName)
   586  	rs, err := e.execQuery(ctx, parsed.Query)
   587  	if err != nil {
   588  		return "", err
   589  	}
   590  	if len(rs.Rows) == 0 {
   591  		return "", nil
   592  	}
   593  	row := rs.Rows[0]
   594  	return row[1].ToString(), nil
   595  }
   596  
   597  func (e *Executor) parseAlterOptions(ctx context.Context, onlineDDL *schema.OnlineDDL) string {
   598  	// Temporary hack (2020-08-11)
   599  	// Because sqlparser does not do full blown ALTER TABLE parsing,
   600  	// and because we don't want gh-ost to know about WITH_GHOST and WITH_PT syntax,
   601  	// we resort to regexp-based parsing of the query.
   602  	// TODO(shlomi): generate _alter options_ via sqlparser when it full supports ALTER TABLE syntax.
   603  	_, _, alterOptions := schema.ParseAlterTableOptions(onlineDDL.SQL)
   604  	return alterOptions
   605  }
   606  
   607  // executeDirectly runs a DDL query directly on the backend MySQL server
   608  func (e *Executor) executeDirectly(ctx context.Context, onlineDDL *schema.OnlineDDL, acceptableMySQLErrorCodes ...int) (acceptableErrorCodeFound bool, err error) {
   609  	conn, err := dbconnpool.NewDBConnection(ctx, e.env.Config().DB.DbaWithDB())
   610  	if err != nil {
   611  		return false, err
   612  	}
   613  	defer conn.Close()
   614  
   615  	restoreSQLModeFunc, err := e.initMigrationSQLMode(ctx, onlineDDL, conn)
   616  	defer restoreSQLModeFunc()
   617  	if err != nil {
   618  		return false, err
   619  	}
   620  
   621  	_ = e.onSchemaMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusRunning, false, progressPctStarted, etaSecondsUnknown, rowsCopiedUnknown, emptyHint)
   622  	_, err = conn.ExecuteFetch(onlineDDL.SQL, 0, false)
   623  
   624  	if err != nil {
   625  		// let's see if this error is actually acceptable
   626  		if merr, ok := err.(*mysql.SQLError); ok {
   627  			for _, acceptableCode := range acceptableMySQLErrorCodes {
   628  				if merr.Num == acceptableCode {
   629  					// we don't consider this to be an error.
   630  					acceptableErrorCodeFound = true
   631  					err = nil
   632  					break
   633  				}
   634  			}
   635  		}
   636  	}
   637  	if err != nil {
   638  		return false, err
   639  	}
   640  	defer e.reloadSchema(ctx)
   641  	_ = e.onSchemaMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusComplete, false, progressPctFull, etaSecondsNow, rowsCopiedUnknown, emptyHint)
   642  
   643  	return acceptableErrorCodeFound, nil
   644  }
   645  
   646  // doesConnectionInfoMatch checks if theres a MySQL connection in PROCESSLIST whose Info matches given text
   647  func (e *Executor) doesConnectionInfoMatch(ctx context.Context, connID int64, submatch string) (bool, error) {
   648  	findProcessQuery, err := sqlparser.ParseAndBind(sqlFindProcess,
   649  		sqltypes.Int64BindVariable(connID),
   650  		sqltypes.StringBindVariable("%"+submatch+"%"),
   651  	)
   652  	if err != nil {
   653  		return false, err
   654  	}
   655  	rs, err := e.execQuery(ctx, findProcessQuery)
   656  	if err != nil {
   657  		return false, err
   658  	}
   659  	return len(rs.Rows) == 1, nil
   660  }
   661  
   662  // tableParticipatesInForeignKeyRelationship checks if a given table is either a parent or a child in at least one foreign key constraint
   663  func (e *Executor) tableParticipatesInForeignKeyRelationship(ctx context.Context, schema string, table string) (bool, error) {
   664  	for _, fkQuery := range []string{selSelectCountFKParentConstraints, selSelectCountFKChildConstraints} {
   665  		query, err := sqlparser.ParseAndBind(fkQuery,
   666  			sqltypes.StringBindVariable(schema),
   667  			sqltypes.StringBindVariable(table),
   668  		)
   669  		if err != nil {
   670  			return false, err
   671  		}
   672  		r, err := e.execQuery(ctx, query)
   673  		if err != nil {
   674  			return false, err
   675  		}
   676  		row := r.Named().Row()
   677  		if row == nil {
   678  			return false, vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "unexpected result from INFORMATION_SCHEMA.KEY_COLUMN_USAGE query: %s", query)
   679  		}
   680  		countFKConstraints := row.AsInt64("num_fk_constraints", 0)
   681  		if countFKConstraints > 0 {
   682  			return true, nil
   683  		}
   684  	}
   685  	return false, nil
   686  }
   687  
   688  // validateTableForAlterAction checks whether a table is good to undergo a ALTER operation. It returns detailed error if not.
   689  func (e *Executor) validateTableForAlterAction(ctx context.Context, onlineDDL *schema.OnlineDDL) (err error) {
   690  	if !onlineDDL.StrategySetting().IsAllowForeignKeysFlag() {
   691  		// Validate table does not participate in foreign key relationship:
   692  		participates, err := e.tableParticipatesInForeignKeyRelationship(ctx, onlineDDL.Schema, onlineDDL.Table)
   693  		if err != nil {
   694  			return vterrors.Wrapf(err, "error while attempting to validate whether table %s participates in FOREIGN KEY constraint", onlineDDL.Table)
   695  		}
   696  		if participates {
   697  			return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "table %s participates in a FOREIGN KEY constraint and FOREIGN KEY constraints are not supported in Online DDL unless the *experimental and unsafe* --unsafe-allow-foreign-keys strategy flag is specified", onlineDDL.Table)
   698  		}
   699  	}
   700  	return nil
   701  }
   702  
   703  // primaryPosition returns the MySQL/MariaDB position (typically GTID pos) on the tablet
   704  func (e *Executor) primaryPosition(ctx context.Context) (pos mysql.Position, err error) {
   705  	conn, err := dbconnpool.NewDBConnection(ctx, e.env.Config().DB.DbaWithDB())
   706  	if err != nil {
   707  		return pos, err
   708  	}
   709  	defer conn.Close()
   710  
   711  	pos, err = conn.PrimaryPosition()
   712  	return pos, err
   713  }
   714  
   715  // terminateVReplMigration stops vreplication, then removes the _vt.vreplication entry for the given migration
   716  func (e *Executor) terminateVReplMigration(ctx context.Context, uuid string) error {
   717  	tmClient := e.tabletManagerClient()
   718  	defer tmClient.Close()
   719  
   720  	tablet, err := e.ts.GetTablet(ctx, e.tabletAlias)
   721  	if err != nil {
   722  		return err
   723  	}
   724  	query, err := sqlparser.ParseAndBind(sqlStopVReplStream,
   725  		sqltypes.StringBindVariable(e.dbName),
   726  		sqltypes.StringBindVariable(uuid),
   727  	)
   728  	if err != nil {
   729  		return err
   730  	}
   731  	// silently skip error; stopping the stream is just a graceful act; later deleting it is more important
   732  	if _, err := e.vreplicationExec(ctx, tablet.Tablet, query); err != nil {
   733  		log.Errorf("FAIL vreplicationExec: uuid=%s, query=%v, error=%v", uuid, query, err)
   734  	}
   735  
   736  	if err := e.deleteVReplicationEntry(ctx, uuid); err != nil {
   737  		return err
   738  	}
   739  	return nil
   740  }
   741  
   742  // cutOverVReplMigration stops vreplication, then removes the _vt.vreplication entry for the given migration
   743  func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream) error {
   744  	if err := e.incrementCutoverAttempts(ctx, s.workflow); err != nil {
   745  		return err
   746  	}
   747  
   748  	tmClient := e.tabletManagerClient()
   749  	defer tmClient.Close()
   750  
   751  	// sanity checks:
   752  	vreplTable, err := getVreplTable(ctx, s)
   753  	if err != nil {
   754  		return err
   755  	}
   756  
   757  	// get topology client & entities:
   758  	tablet, err := e.ts.GetTablet(ctx, e.tabletAlias)
   759  	if err != nil {
   760  		return err
   761  	}
   762  
   763  	// information about source tablet
   764  	onlineDDL, _, err := e.readMigration(ctx, s.workflow)
   765  	if err != nil {
   766  		return err
   767  	}
   768  	isVreplicationTestSuite := onlineDDL.StrategySetting().IsVreplicationTestSuite()
   769  	e.updateMigrationStage(ctx, onlineDDL.UUID, "starting cut-over")
   770  
   771  	var sentryTableName string
   772  
   773  	waitForPos := func(s *VReplStream, pos mysql.Position) error {
   774  		ctx, cancel := context.WithTimeout(ctx, vreplicationCutOverThreshold)
   775  		defer cancel()
   776  		// Wait for target to reach the up-to-date pos
   777  		if err := tmClient.VReplicationWaitForPos(ctx, tablet.Tablet, int(s.id), mysql.EncodePosition(pos)); err != nil {
   778  			return err
   779  		}
   780  		// Target is now in sync with source!
   781  		return nil
   782  	}
   783  
   784  	if !isVreplicationTestSuite {
   785  		// A bit early on, we generate names for stowaway and temporary tables
   786  		// We do this here because right now we're in a safe place where nothing happened yet. If there's an error now, bail out
   787  		// and no harm done.
   788  		// Later on, when traffic is blocked and tables renamed, that's a more dangerous place to be in; we want as little logic
   789  		// in that place as possible.
   790  		sentryTableName, err = schema.GenerateGCTableName(schema.HoldTableGCState, newGCTableRetainTime())
   791  		if err != nil {
   792  			return nil
   793  		}
   794  
   795  		// We create the sentry table before toggling writes, because this involves a WaitForPos, which takes some time. We
   796  		// don't want to overload the buffering time with this excessive wait.
   797  
   798  		if err := e.updateArtifacts(ctx, onlineDDL.UUID, sentryTableName); err != nil {
   799  			return err
   800  		}
   801  		parsed := sqlparser.BuildParsedQuery(sqlCreateSentryTable, sentryTableName)
   802  		if _, err := e.execQuery(ctx, parsed.Query); err != nil {
   803  			return err
   804  		}
   805  		e.updateMigrationStage(ctx, onlineDDL.UUID, "sentry table created: %s", sentryTableName)
   806  
   807  		postSentryPos, err := e.primaryPosition(ctx)
   808  		if err != nil {
   809  			return err
   810  		}
   811  		e.updateMigrationStage(ctx, onlineDDL.UUID, "waiting for post-sentry pos: %v", mysql.EncodePosition(postSentryPos))
   812  		if err := waitForPos(s, postSentryPos); err != nil {
   813  			return err
   814  		}
   815  		e.updateMigrationStage(ctx, onlineDDL.UUID, "post-sentry pos reached")
   816  	}
   817  
   818  	lockConn, err := e.pool.Get(ctx, nil)
   819  	if err != nil {
   820  		return err
   821  	}
   822  	defer lockConn.Recycle()
   823  	defer lockConn.Exec(ctx, sqlUnlockTables, 1, false)
   824  
   825  	renameConn, err := e.pool.Get(ctx, nil)
   826  	if err != nil {
   827  		return err
   828  	}
   829  	defer renameConn.Recycle()
   830  	defer renameConn.Kill("premature exit while renaming tables", 0)
   831  	renameQuery := sqlparser.BuildParsedQuery(sqlSwapTables, onlineDDL.Table, sentryTableName, vreplTable, onlineDDL.Table, sentryTableName, vreplTable)
   832  
   833  	waitForRenameProcess := func() error {
   834  		// This function waits until it finds the RENAME TABLE... query running in MySQL's PROCESSLIST, or until timeout
   835  		// The function assumes that one of the renamed tables is locked, thus causing the RENAME to block. If nothing
   836  		// is locked, then the RENAME will be near-instantaneious and it's unlikely that the function will find it.
   837  		renameWaitCtx, cancel := context.WithTimeout(ctx, vreplicationCutOverThreshold)
   838  		defer cancel()
   839  
   840  		for {
   841  			renameProcessFound, err := e.doesConnectionInfoMatch(renameWaitCtx, renameConn.ID(), "rename")
   842  			if err != nil {
   843  				return err
   844  			}
   845  			if renameProcessFound {
   846  				return nil
   847  			}
   848  			select {
   849  			case <-renameWaitCtx.Done():
   850  				return vterrors.Errorf(vtrpcpb.Code_ABORTED, "timeout for rename query: %s", renameQuery.Query)
   851  			case <-time.After(time.Second):
   852  				// sleep
   853  			}
   854  		}
   855  	}
   856  
   857  	renameCompleteChan := make(chan error)
   858  
   859  	bufferingCtx, bufferingContextCancel := context.WithCancel(ctx)
   860  	defer bufferingContextCancel()
   861  	// Preparation is complete. We proceed to cut-over.
   862  	toggleBuffering := func(bufferQueries bool) error {
   863  		log.Infof("toggling buffering: %t in migration %v", bufferQueries, onlineDDL.UUID)
   864  		e.toggleBufferTableFunc(bufferingCtx, onlineDDL.Table, bufferQueries)
   865  		if !bufferQueries {
   866  			// called after new table is in place.
   867  			// unbuffer existing queries:
   868  			bufferingContextCancel()
   869  			// force re-read of tables
   870  			if err := tmClient.RefreshState(ctx, tablet.Tablet); err != nil {
   871  				return err
   872  			}
   873  		}
   874  		log.Infof("toggled buffering: %t in migration %v", bufferQueries, onlineDDL.UUID)
   875  		return nil
   876  	}
   877  
   878  	var reenableOnce sync.Once
   879  	reenableWritesOnce := func() {
   880  		reenableOnce.Do(func() {
   881  			log.Infof("re-enabling writes in migration %v", onlineDDL.UUID)
   882  			toggleBuffering(false)
   883  			go log.Infof("cutOverVReplMigration %v: unbuffered queries", s.workflow)
   884  		})
   885  	}
   886  	e.updateMigrationStage(ctx, onlineDDL.UUID, "buffering queries")
   887  	// stop writes on source:
   888  	err = toggleBuffering(true)
   889  	defer reenableWritesOnce()
   890  	if err != nil {
   891  		return err
   892  	}
   893  	// Give a fraction of a second for a scenario where a query is in
   894  	// query executor, it passed the ACLs and is _about to_ execute. This will be nicer to those queries:
   895  	// they will be able to complete before the rename, rather than block briefly on the rename only to find
   896  	// the table no longer exists.
   897  	e.updateMigrationStage(ctx, onlineDDL.UUID, "graceful wait for buffering")
   898  	time.Sleep(100 * time.Millisecond)
   899  
   900  	if isVreplicationTestSuite {
   901  		// The testing suite may inject queries internally from the server via a recurring EVENT.
   902  		// Those queries are unaffected by query rules (ACLs) because they don't go through Vitess.
   903  		// We therefore hard-rename the table into an agreed upon name, and we won't swap it with
   904  		// the original table. We will actually make the table disappear, creating a void.
   905  		testSuiteBeforeTableName := fmt.Sprintf("%s_before", onlineDDL.Table)
   906  		parsed := sqlparser.BuildParsedQuery(sqlRenameTable, onlineDDL.Table, testSuiteBeforeTableName)
   907  		if _, err := e.execQuery(ctx, parsed.Query); err != nil {
   908  			return err
   909  		}
   910  		e.updateMigrationStage(ctx, onlineDDL.UUID, "test suite 'before' table renamed")
   911  	} else {
   912  		// real production
   913  
   914  		e.updateMigrationStage(ctx, onlineDDL.UUID, "locking tables")
   915  		lockCtx, cancel := context.WithTimeout(ctx, vreplicationCutOverThreshold)
   916  		defer cancel()
   917  		lockTableQuery := sqlparser.BuildParsedQuery(sqlLockTwoTablesWrite, sentryTableName, onlineDDL.Table)
   918  		if _, err := lockConn.Exec(lockCtx, lockTableQuery.Query, 1, false); err != nil {
   919  			return err
   920  		}
   921  
   922  		e.updateMigrationStage(ctx, onlineDDL.UUID, "renaming tables")
   923  		go func() {
   924  			_, err := renameConn.Exec(ctx, renameQuery.Query, 1, false)
   925  			renameCompleteChan <- err
   926  		}()
   927  		// the rename should block, because of the LOCK. Wait for it to show up.
   928  		e.updateMigrationStage(ctx, onlineDDL.UUID, "waiting for RENAME to block")
   929  		if err := waitForRenameProcess(); err != nil {
   930  			return err
   931  		}
   932  		e.updateMigrationStage(ctx, onlineDDL.UUID, "RENAME found")
   933  	}
   934  
   935  	e.updateMigrationStage(ctx, onlineDDL.UUID, "reading post-lock pos")
   936  	postWritesPos, err := e.primaryPosition(ctx)
   937  	if err != nil {
   938  		return err
   939  	}
   940  
   941  	// Right now: new queries are buffered, any existing query will have executed, and worst case scenario is
   942  	// that some leftover query finds the table is not actually there anymore...
   943  	// At any case, there's definitely no more writes to the table since it does not exist. We can
   944  	// safely take the (GTID) pos now.
   945  	_ = e.updateMigrationTimestamp(ctx, "liveness_timestamp", s.workflow)
   946  
   947  	// Writes are now disabled on table. Read up-to-date vreplication info, specifically to get latest (and fixed) pos:
   948  	s, err = e.readVReplStream(ctx, s.workflow, false)
   949  	if err != nil {
   950  		return err
   951  	}
   952  
   953  	e.updateMigrationStage(ctx, onlineDDL.UUID, "waiting for post-lock pos: %v", mysql.EncodePosition(postWritesPos))
   954  	if err := waitForPos(s, postWritesPos); err != nil {
   955  		e.updateMigrationStage(ctx, onlineDDL.UUID, "timeout while waiting for post-lock pos: %v", err)
   956  		return err
   957  	}
   958  	go log.Infof("cutOverVReplMigration %v: done waiting for position %v", s.workflow, mysql.EncodePosition(postWritesPos))
   959  	// Stop vreplication
   960  	e.updateMigrationStage(ctx, onlineDDL.UUID, "stopping vreplication")
   961  	if _, err := e.vreplicationExec(ctx, tablet.Tablet, binlogplayer.StopVReplication(uint32(s.id), "stopped for online DDL cutover")); err != nil {
   962  		return err
   963  	}
   964  	go log.Infof("cutOverVReplMigration %v: stopped vreplication", s.workflow)
   965  
   966  	// rename tables atomically (remember, writes on source tables are stopped)
   967  	{
   968  		if isVreplicationTestSuite {
   969  			// this is used in Vitess endtoend testing suite
   970  			testSuiteAfterTableName := fmt.Sprintf("%s_after", onlineDDL.Table)
   971  			parsed := sqlparser.BuildParsedQuery(sqlRenameTable, vreplTable, testSuiteAfterTableName)
   972  			if _, err := e.execQuery(ctx, parsed.Query); err != nil {
   973  				return err
   974  			}
   975  			e.updateMigrationStage(ctx, onlineDDL.UUID, "test suite 'after' table renamed")
   976  		} else {
   977  			e.updateMigrationStage(ctx, onlineDDL.UUID, "validating rename is still in place")
   978  			if err := waitForRenameProcess(); err != nil {
   979  				return err
   980  			}
   981  
   982  			// Normal (non-testing) alter table
   983  			e.updateMigrationStage(ctx, onlineDDL.UUID, "dropping sentry table")
   984  
   985  			{
   986  				dropTableQuery := sqlparser.BuildParsedQuery(sqlDropTable, sentryTableName)
   987  				lockCtx, cancel := context.WithTimeout(ctx, vreplicationCutOverThreshold)
   988  				defer cancel()
   989  				if _, err := lockConn.Exec(lockCtx, dropTableQuery.Query, 1, false); err != nil {
   990  					return err
   991  				}
   992  			}
   993  			{
   994  				lockCtx, cancel := context.WithTimeout(ctx, vreplicationCutOverThreshold)
   995  				defer cancel()
   996  				e.updateMigrationStage(ctx, onlineDDL.UUID, "unlocking tables")
   997  				if _, err := lockConn.Exec(lockCtx, sqlUnlockTables, 1, false); err != nil {
   998  					return err
   999  				}
  1000  			}
  1001  			{
  1002  				lockCtx, cancel := context.WithTimeout(ctx, vreplicationCutOverThreshold)
  1003  				defer cancel()
  1004  				e.updateMigrationStage(lockCtx, onlineDDL.UUID, "waiting for RENAME to complete")
  1005  				if err := <-renameCompleteChan; err != nil {
  1006  					return err
  1007  				}
  1008  			}
  1009  		}
  1010  	}
  1011  	e.updateMigrationStage(ctx, onlineDDL.UUID, "cut-over complete")
  1012  	e.ownedRunningMigrations.Delete(onlineDDL.UUID)
  1013  
  1014  	go func() {
  1015  		// Tables are swapped! Let's take the opportunity to ReloadSchema now
  1016  		// We do this in a goroutine because it might take time on a schema with thousands of tables, and we don't want to delay
  1017  		// the cut-over.
  1018  		// this means ReloadSchema is not in sync with the actual schema change. Users will still need to run tracker if they want to sync.
  1019  		// In the future, we will want to reload the single table, instead of reloading the schema.
  1020  		if err := e.reloadSchema(ctx); err != nil {
  1021  			vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "Error on ReloadSchema while cutting over vreplication migration UUID: %+v", onlineDDL.UUID)
  1022  		}
  1023  	}()
  1024  
  1025  	// Tables are now swapped! Migration is successful
  1026  	e.updateMigrationStage(ctx, onlineDDL.UUID, "re-enabling writes")
  1027  	reenableWritesOnce() // this function is also deferred, in case of early return; but now would be a good time to resume writes, before we publish the migration as "complete"
  1028  	go log.Infof("cutOverVReplMigration %v: marking as complete", s.workflow)
  1029  	_ = e.onSchemaMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusComplete, false, progressPctFull, etaSecondsNow, s.rowsCopied, emptyHint)
  1030  	return nil
  1031  
  1032  	// deferred function will re-enable writes now
  1033  }
  1034  
  1035  // initMigrationSQLMode sets sql_mode according to DDL strategy, and returns a function that
  1036  // restores sql_mode to original state
  1037  func (e *Executor) initMigrationSQLMode(ctx context.Context, onlineDDL *schema.OnlineDDL, conn *dbconnpool.DBConnection) (deferFunc func(), err error) {
  1038  	deferFunc = func() {}
  1039  	if !onlineDDL.StrategySetting().IsAllowZeroInDateFlag() {
  1040  		// No need to change sql_mode.
  1041  		return deferFunc, nil
  1042  	}
  1043  
  1044  	// Grab current sql_mode value
  1045  	rs, err := conn.ExecuteFetch(`select @@session.sql_mode as sql_mode`, 1, true)
  1046  	if err != nil {
  1047  		return deferFunc, vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "could not read sql_mode: %v", err)
  1048  	}
  1049  	sqlMode, err := rs.Named().Row().ToString("sql_mode")
  1050  	if err != nil {
  1051  		return deferFunc, vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "could not read sql_mode: %v", err)
  1052  	}
  1053  	// Pre-calculate restore function
  1054  	deferFunc = func() {
  1055  		restoreSQLModeQuery := fmt.Sprintf("set @@session.sql_mode='%s'", sqlMode)
  1056  		conn.ExecuteFetch(restoreSQLModeQuery, 0, false)
  1057  	}
  1058  	// Change sql_mode
  1059  	changeSQLModeQuery := fmt.Sprintf("set @@session.sql_mode=REPLACE(REPLACE('%s', 'NO_ZERO_DATE', ''), 'NO_ZERO_IN_DATE', '')", sqlMode)
  1060  	if _, err := conn.ExecuteFetch(changeSQLModeQuery, 0, false); err != nil {
  1061  		return deferFunc, err
  1062  	}
  1063  	return deferFunc, nil
  1064  }
  1065  
  1066  // newConstraintName generates a new, unique name for a constraint. Our problem is that a MySQL
  1067  // constraint's name is unique in the schema (!). And so as we duplicate the original table, we must
  1068  // create completely new names for all constraints.
  1069  // Moreover, we really want this name to be consistent across all shards. We therefore use a deterministic
  1070  // UUIDv5 (SHA) function over the migration UUID, table name, and constraint's _contents_.
  1071  // We _also_ include the original constraint name as prefix, as room allows
  1072  // for example, if the original constraint name is "check_1",
  1073  // we might generate "check_1_cps1okb4uafunfqusi2lp22u3".
  1074  // If we then again migrate a table whose constraint name is "check_1_cps1okb4uafunfqusi2lp22u3	" we
  1075  // get for example "check_1_19l09s37kbhj4axnzmi10e18k" (hash changes, and we still try to preserve original name)
  1076  //
  1077  // Furthermore, per bug report https://bugs.mysql.com/bug.php?id=107772, if the user doesn't provide a name for
  1078  // their CHECK constraint, then MySQL picks a name in this format <tablename>_chk_<number>.
  1079  // Example: sometable_chk_1
  1080  // Next, when MySQL is asked to RENAME TABLE and sees a constraint with this format, it attempts to rename
  1081  // the constraint with the new table's name. This is problematic for Vitess, because we often rename tables to
  1082  // very long names, such as _vt_HOLD_394f9e6dfc3d11eca0390a43f95f28a3_20220706091048.
  1083  // As we rename the constraint to e.g. `sometable_chk_1_cps1okb4uafunfqusi2lp22u3`, this makes MySQL want to
  1084  // call the new constraint something like _vt_HOLD_394f9e6dfc3d11eca0390a43f95f28a3_20220706091048_chk_1_cps1okb4uafunfqusi2lp22u3,
  1085  // which exceeds the 64 character limit for table names. Long story short, we also trim down <tablename> if the constraint seems
  1086  // to be auto-generated.
  1087  func (e *Executor) newConstraintName(onlineDDL *schema.OnlineDDL, constraintType ConstraintType, hashExists map[string]bool, seed string, oldName string) string {
  1088  	constraintIndicator := constraintIndicatorMap[int(constraintType)]
  1089  	oldName = schemadiff.ExtractConstraintOriginalName(oldName)
  1090  	autoGeneratedName := fmt.Sprintf("%s_%s_", onlineDDL.Table, constraintIndicator)
  1091  	if strings.HasPrefix(oldName, autoGeneratedName) {
  1092  		// strip out table name
  1093  		oldName = constraintIndicator + "_" + oldName[len(autoGeneratedName):]
  1094  	}
  1095  
  1096  	hash := textutil.UUIDv5Base36(onlineDDL.UUID, onlineDDL.Table, seed)
  1097  	for i := 1; hashExists[hash]; i++ {
  1098  		hash = textutil.UUIDv5Base36(onlineDDL.UUID, onlineDDL.Table, seed, fmt.Sprintf("%d", i))
  1099  	}
  1100  	hashExists[hash] = true
  1101  	suffix := "_" + hash
  1102  	maxAllowedNameLength := maxConstraintNameLength - len(suffix)
  1103  	newName := oldName
  1104  	if newName == "" {
  1105  		newName = constraintIndicator // start with something that looks consistent with MySQL's naming
  1106  	}
  1107  	if len(newName) > maxAllowedNameLength {
  1108  		newName = newName[0:maxAllowedNameLength]
  1109  	}
  1110  	newName = newName + suffix
  1111  	return newName
  1112  }
  1113  
  1114  // validateAndEditCreateTableStatement inspects the CreateTable AST and does the following:
  1115  // - extra validation (no FKs for now...)
  1116  // - generate new and unique names for all constraints (CHECK and FK; yes, why not handle FK names; even as we don't support FKs today, we may in the future)
  1117  func (e *Executor) validateAndEditCreateTableStatement(ctx context.Context, onlineDDL *schema.OnlineDDL, createTable *sqlparser.CreateTable) (constraintMap map[string]string, err error) {
  1118  	constraintMap = map[string]string{}
  1119  	hashExists := map[string]bool{}
  1120  
  1121  	validateWalk := func(node sqlparser.SQLNode) (kontinue bool, err error) {
  1122  		switch node := node.(type) {
  1123  		case *sqlparser.ForeignKeyDefinition:
  1124  			if !onlineDDL.StrategySetting().IsAllowForeignKeysFlag() {
  1125  				return false, schema.ErrForeignKeyFound
  1126  			}
  1127  		case *sqlparser.ConstraintDefinition:
  1128  			oldName := node.Name.String()
  1129  			newName := e.newConstraintName(onlineDDL, GetConstraintType(node.Details), hashExists, sqlparser.CanonicalString(node.Details), oldName)
  1130  			node.Name = sqlparser.NewIdentifierCI(newName)
  1131  			constraintMap[oldName] = newName
  1132  		}
  1133  		return true, nil
  1134  	}
  1135  	if err := sqlparser.Walk(validateWalk, createTable); err != nil {
  1136  		return constraintMap, err
  1137  	}
  1138  	return constraintMap, nil
  1139  }
  1140  
  1141  // validateAndEditAlterTableStatement inspects the AlterTable statement and:
  1142  // - modifies any CONSTRAINT name according to given name mapping
  1143  // - explode ADD FULLTEXT KEY into multiple statements
  1144  func (e *Executor) validateAndEditAlterTableStatement(ctx context.Context, onlineDDL *schema.OnlineDDL, alterTable *sqlparser.AlterTable, constraintMap map[string]string) (alters []*sqlparser.AlterTable, err error) {
  1145  	hashExists := map[string]bool{}
  1146  	validateWalk := func(node sqlparser.SQLNode) (kontinue bool, err error) {
  1147  		switch node := node.(type) {
  1148  		case *sqlparser.DropKey:
  1149  			if node.Type == sqlparser.CheckKeyType {
  1150  				// drop a check constraint
  1151  				mappedName, ok := constraintMap[node.Name.String()]
  1152  				if !ok {
  1153  					return false, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "Found DROP CONSTRAINT: %v, but could not find constraint name in map", sqlparser.CanonicalString(node))
  1154  				}
  1155  				node.Name = sqlparser.NewIdentifierCI(mappedName)
  1156  			}
  1157  		case *sqlparser.AddConstraintDefinition:
  1158  			oldName := node.ConstraintDefinition.Name.String()
  1159  			newName := e.newConstraintName(onlineDDL, GetConstraintType(node.ConstraintDefinition.Details), hashExists, sqlparser.CanonicalString(node.ConstraintDefinition.Details), oldName)
  1160  			node.ConstraintDefinition.Name = sqlparser.NewIdentifierCI(newName)
  1161  			constraintMap[oldName] = newName
  1162  		}
  1163  		return true, nil
  1164  	}
  1165  	if err := sqlparser.Walk(validateWalk, alterTable); err != nil {
  1166  		return alters, err
  1167  	}
  1168  	alters = append(alters, alterTable)
  1169  	// Handle ADD FULLTEXT KEY statements
  1170  	countAddFullTextStatements := 0
  1171  	redactedOptions := make([]sqlparser.AlterOption, 0, len(alterTable.AlterOptions))
  1172  	for i := range alterTable.AlterOptions {
  1173  		opt := alterTable.AlterOptions[i]
  1174  		switch opt := opt.(type) {
  1175  		case sqlparser.AlgorithmValue:
  1176  			// we do not pass ALGORITHM. We choose our own ALGORITHM.
  1177  			continue
  1178  		case *sqlparser.AddIndexDefinition:
  1179  			if opt.IndexDefinition.Info.Fulltext {
  1180  				countAddFullTextStatements++
  1181  				if countAddFullTextStatements > 1 {
  1182  					// We've already got one ADD FULLTEXT KEY. We can't have another
  1183  					// in the same statement
  1184  					extraAlterTable := &sqlparser.AlterTable{
  1185  						Table:        alterTable.Table,
  1186  						AlterOptions: []sqlparser.AlterOption{opt, copyAlgorithm},
  1187  					}
  1188  					alters = append(alters, extraAlterTable)
  1189  					continue
  1190  				}
  1191  			}
  1192  		}
  1193  		redactedOptions = append(redactedOptions, opt)
  1194  	}
  1195  	alterTable.AlterOptions = redactedOptions
  1196  	alterTable.AlterOptions = append(alterTable.AlterOptions, copyAlgorithm)
  1197  	return alters, nil
  1198  }
  1199  
  1200  // createTableLike creates the table named by `newTableName` in the likeness of onlineDDL.Table
  1201  // This function emulates MySQL's `CREATE TABLE LIKE ...` statement. The difference is that this function takes control over the generated CONSTRAINT names,
  1202  // if any, such that they are detrministic across shards, as well as preserve original names where possible.
  1203  func (e *Executor) createTableLike(ctx context.Context, newTableName string, onlineDDL *schema.OnlineDDL, conn *dbconnpool.DBConnection) (constraintMap map[string]string, err error) {
  1204  	existingShowCreateTable, err := e.showCreateTable(ctx, onlineDDL.Table)
  1205  	if err != nil {
  1206  		return nil, vterrors.Wrapf(err, "in createTableLike(), newTableName=%s", newTableName)
  1207  	}
  1208  	stmt, err := sqlparser.ParseStrictDDL(existingShowCreateTable)
  1209  	if err != nil {
  1210  		return nil, err
  1211  	}
  1212  	createTable, ok := stmt.(*sqlparser.CreateTable)
  1213  	if !ok {
  1214  		return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "expected CreateTable statement, got: %v", sqlparser.CanonicalString(stmt))
  1215  	}
  1216  	createTable.SetTable(createTable.GetTable().Qualifier.CompliantName(), newTableName)
  1217  	// manipulate CreateTable statement: take care of constraints names which have to be
  1218  	// unique across the schema
  1219  	constraintMap, err = e.validateAndEditCreateTableStatement(ctx, onlineDDL, createTable)
  1220  	if err != nil {
  1221  		return nil, err
  1222  	}
  1223  	// Create the table
  1224  	if _, err := conn.ExecuteFetch(sqlparser.CanonicalString(createTable), 0, false); err != nil {
  1225  		return nil, err
  1226  	}
  1227  	return constraintMap, nil
  1228  }
  1229  
  1230  // initVreplicationOriginalMigration performs the first steps towards running a VRepl ALTER migration:
  1231  // - analyze the original table
  1232  // - formalize a new CreateTable statement
  1233  // - inspect the ALTER TABLE query
  1234  // - formalize an AlterTable statement
  1235  // - create the vrepl table
  1236  // - modify the vrepl table
  1237  // - Create and return a VRepl instance
  1238  func (e *Executor) initVreplicationOriginalMigration(ctx context.Context, onlineDDL *schema.OnlineDDL, conn *dbconnpool.DBConnection) (v *VRepl, err error) {
  1239  	restoreSQLModeFunc, err := e.initMigrationSQLMode(ctx, onlineDDL, conn)
  1240  	defer restoreSQLModeFunc()
  1241  	if err != nil {
  1242  		return v, err
  1243  	}
  1244  
  1245  	vreplTableName := fmt.Sprintf("_%s_%s_vrepl", onlineDDL.UUID, ReadableTimestamp())
  1246  	if err := e.updateArtifacts(ctx, onlineDDL.UUID, vreplTableName); err != nil {
  1247  		return v, err
  1248  	}
  1249  	constraintMap, err := e.createTableLike(ctx, vreplTableName, onlineDDL, conn)
  1250  	if err != nil {
  1251  		return nil, err
  1252  	}
  1253  	{
  1254  		stmt, err := sqlparser.ParseStrictDDL(onlineDDL.SQL)
  1255  		if err != nil {
  1256  			return nil, err
  1257  		}
  1258  		alterTable, ok := stmt.(*sqlparser.AlterTable)
  1259  		if !ok {
  1260  			return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "expected AlterTable statement, got: %v", sqlparser.CanonicalString(stmt))
  1261  		}
  1262  		// ALTER TABLE should apply to the vrepl table
  1263  		alterTable.SetTable(alterTable.GetTable().Qualifier.CompliantName(), vreplTableName)
  1264  		// Also, change any constraint names:
  1265  		alters, err := e.validateAndEditAlterTableStatement(ctx, onlineDDL, alterTable, constraintMap)
  1266  		if err != nil {
  1267  			return v, err
  1268  		}
  1269  		// Apply ALTER TABLE to materialized table
  1270  		for _, alter := range alters {
  1271  			if _, err := conn.ExecuteFetch(sqlparser.CanonicalString(alter), 0, false); err != nil {
  1272  				return v, err
  1273  			}
  1274  		}
  1275  	}
  1276  	v = NewVRepl(onlineDDL.UUID, e.keyspace, e.shard, e.dbName, onlineDDL.Table, vreplTableName, onlineDDL.SQL)
  1277  	return v, nil
  1278  }
  1279  
  1280  // postInitVreplicationOriginalMigration runs extra changes after a vreplication online DDL has been initialized.
  1281  // This function is called after both source and target tables have been analyzed, so there's more information
  1282  // about the two, and about the transition between the two.
  1283  func (e *Executor) postInitVreplicationOriginalMigration(ctx context.Context, onlineDDL *schema.OnlineDDL, v *VRepl, conn *dbconnpool.DBConnection) (err error) {
  1284  	if v.sourceAutoIncrement > 0 && !v.parser.IsAutoIncrementDefined() {
  1285  		restoreSQLModeFunc, err := e.initMigrationSQLMode(ctx, onlineDDL, conn)
  1286  		defer restoreSQLModeFunc()
  1287  		if err != nil {
  1288  			return err
  1289  		}
  1290  
  1291  		// Apply ALTER TABLE AUTO_INCREMENT=?
  1292  		parsed := sqlparser.BuildParsedQuery(sqlAlterTableAutoIncrement, v.targetTable, ":auto_increment")
  1293  		bindVars := map[string]*querypb.BindVariable{
  1294  			"auto_increment": sqltypes.Uint64BindVariable(v.sourceAutoIncrement),
  1295  		}
  1296  		bound, err := parsed.GenerateQuery(bindVars, nil)
  1297  		if err != nil {
  1298  			return err
  1299  		}
  1300  		if _, err := conn.ExecuteFetch(bound, 0, false); err != nil {
  1301  			return err
  1302  		}
  1303  	}
  1304  	return nil
  1305  }
  1306  
  1307  func (e *Executor) initVreplicationRevertMigration(ctx context.Context, onlineDDL *schema.OnlineDDL, revertMigration *schema.OnlineDDL) (v *VRepl, err error) {
  1308  	// Getting here we've already validated that migration is revertible
  1309  
  1310  	// Validation: vreplication still exists for reverted migration
  1311  	revertStream, err := e.readVReplStream(ctx, revertMigration.UUID, false)
  1312  	if err != nil {
  1313  		// cannot read the vreplication stream which we want to revert
  1314  		return nil, fmt.Errorf("can not revert vreplication migration %s because vreplication stream %s was not found", revertMigration.UUID, revertMigration.UUID)
  1315  	}
  1316  
  1317  	onlineDDL.Table = revertMigration.Table
  1318  	if err := e.updateMySQLTable(ctx, onlineDDL.UUID, onlineDDL.Table); err != nil {
  1319  		return nil, err
  1320  	}
  1321  
  1322  	vreplTableName, err := getVreplTable(ctx, revertStream)
  1323  	if err != nil {
  1324  		return nil, err
  1325  	}
  1326  
  1327  	if err := e.updateArtifacts(ctx, onlineDDL.UUID, vreplTableName); err != nil {
  1328  		return v, err
  1329  	}
  1330  	v = NewVRepl(onlineDDL.UUID, e.keyspace, e.shard, e.dbName, onlineDDL.Table, vreplTableName, "")
  1331  	v.pos = revertStream.pos
  1332  	return v, nil
  1333  }
  1334  
  1335  // ExecuteWithVReplication sets up the grounds for a vreplication schema migration
  1336  func (e *Executor) ExecuteWithVReplication(ctx context.Context, onlineDDL *schema.OnlineDDL, revertMigration *schema.OnlineDDL) error {
  1337  	// make sure there's no vreplication workflow running under same name
  1338  	_ = e.terminateVReplMigration(ctx, onlineDDL.UUID)
  1339  
  1340  	if conflictFound, conflictingMigration := e.isAnyConflictingMigrationRunning(onlineDDL); conflictFound {
  1341  		return vterrors.Wrapf(ErrExecutorMigrationAlreadyRunning, "conflicting migration: %v over table: %v", conflictingMigration.UUID, conflictingMigration.Table)
  1342  	}
  1343  
  1344  	if e.tabletTypeFunc() != topodatapb.TabletType_PRIMARY {
  1345  		return ErrExecutorNotWritableTablet
  1346  	}
  1347  
  1348  	conn, err := dbconnpool.NewDBConnection(ctx, e.env.Config().DB.DbaWithDB())
  1349  	if err != nil {
  1350  		return err
  1351  	}
  1352  	defer conn.Close()
  1353  
  1354  	e.ownedRunningMigrations.Store(onlineDDL.UUID, onlineDDL)
  1355  	if err := e.onSchemaMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusRunning, false, progressPctStarted, etaSecondsUnknown, rowsCopiedUnknown, emptyHint); err != nil {
  1356  		return err
  1357  	}
  1358  
  1359  	var v *VRepl
  1360  	if revertMigration == nil {
  1361  		// Original ALTER TABLE request for vreplication
  1362  		v, err = e.initVreplicationOriginalMigration(ctx, onlineDDL, conn)
  1363  	} else {
  1364  		// this is a revert request
  1365  		v, err = e.initVreplicationRevertMigration(ctx, onlineDDL, revertMigration)
  1366  	}
  1367  	if err != nil {
  1368  		return err
  1369  	}
  1370  	if err := v.analyze(ctx, conn); err != nil {
  1371  		return err
  1372  	}
  1373  	if err := e.updateMigrationTableRows(ctx, onlineDDL.UUID, v.tableRows); err != nil {
  1374  		return err
  1375  	}
  1376  	removedUniqueKeyNames := []string{}
  1377  	for _, uniqueKey := range v.removedUniqueKeys {
  1378  		removedUniqueKeyNames = append(removedUniqueKeyNames, uniqueKey.Name)
  1379  	}
  1380  
  1381  	if err := e.updateSchemaAnalysis(ctx, onlineDDL.UUID,
  1382  		len(v.addedUniqueKeys),
  1383  		len(v.removedUniqueKeys),
  1384  		strings.Join(sqlescape.EscapeIDs(removedUniqueKeyNames), ","),
  1385  		strings.Join(sqlescape.EscapeIDs(v.droppedNoDefaultColumnNames), ","),
  1386  		strings.Join(sqlescape.EscapeIDs(v.expandedColumnNames), ","),
  1387  		v.revertibleNotes,
  1388  	); err != nil {
  1389  		return err
  1390  	}
  1391  	if revertMigration == nil {
  1392  		// Original ALTER TABLE request for vreplication
  1393  		if err := e.validateTableForAlterAction(ctx, onlineDDL); err != nil {
  1394  			return err
  1395  		}
  1396  		if err := e.postInitVreplicationOriginalMigration(ctx, onlineDDL, v, conn); err != nil {
  1397  			return err
  1398  		}
  1399  	}
  1400  
  1401  	{
  1402  		// We need to talk to tabletmanager's VREngine. But we're on TabletServer. While we live in the same
  1403  		// process as VREngine, it is actually simpler to get hold of it via gRPC, just like wrangler does.
  1404  		tablet, err := e.ts.GetTablet(ctx, e.tabletAlias)
  1405  		if err != nil {
  1406  			return err
  1407  		}
  1408  
  1409  		// reload schema before migration
  1410  		if err := e.reloadSchema(ctx); err != nil {
  1411  			return err
  1412  		}
  1413  
  1414  		// create vreplication entry
  1415  		insertVReplicationQuery, err := v.generateInsertStatement(ctx)
  1416  		if err != nil {
  1417  			return err
  1418  		}
  1419  		if _, err := e.vreplicationExec(ctx, tablet.Tablet, insertVReplicationQuery); err != nil {
  1420  			return err
  1421  		}
  1422  
  1423  		{
  1424  			// temporary hack. todo: this should be done when inserting any _vt.vreplication record across all workflow types
  1425  			query := fmt.Sprintf("update _vt.vreplication set workflow_type = %d where workflow = '%s'",
  1426  				binlogdatapb.VReplicationWorkflowType_OnlineDDL, v.workflow)
  1427  			if _, err := e.vreplicationExec(ctx, tablet.Tablet, query); err != nil {
  1428  				return vterrors.Wrapf(err, "VReplicationExec(%v, %s)", tablet.Tablet, query)
  1429  			}
  1430  		}
  1431  		// start stream!
  1432  		startVReplicationQuery, err := v.generateStartStatement(ctx)
  1433  		if err != nil {
  1434  			return err
  1435  		}
  1436  		if _, err := e.vreplicationExec(ctx, tablet.Tablet, startVReplicationQuery); err != nil {
  1437  			return err
  1438  		}
  1439  	}
  1440  	return nil
  1441  }
  1442  
  1443  // ExecuteWithGhost validates and runs a gh-ost process.
  1444  // Validation included testing the backend MySQL server and the gh-ost binary itself
  1445  // Execution runs first a dry run, then an actual migration
  1446  func (e *Executor) ExecuteWithGhost(ctx context.Context, onlineDDL *schema.OnlineDDL) error {
  1447  	if conflictFound, conflictingMigration := e.isAnyConflictingMigrationRunning(onlineDDL); conflictFound {
  1448  		return vterrors.Wrapf(ErrExecutorMigrationAlreadyRunning, "conflicting migration: %v over table: %v", conflictingMigration.UUID, conflictingMigration.Table)
  1449  	}
  1450  
  1451  	if e.tabletTypeFunc() != topodatapb.TabletType_PRIMARY {
  1452  		return ErrExecutorNotWritableTablet
  1453  	}
  1454  	variables, err := e.readMySQLVariables(ctx)
  1455  	if err != nil {
  1456  		log.Errorf("Error before running gh-ost: %+v", err)
  1457  		return err
  1458  	}
  1459  	if variables.readOnly {
  1460  		err := fmt.Errorf("Error before running gh-ost: MySQL server is read_only")
  1461  		log.Errorf(err.Error())
  1462  		return err
  1463  	}
  1464  	onlineDDLPassword, err := e.createOnlineDDLUser(ctx)
  1465  	if err != nil {
  1466  		err := fmt.Errorf("Error creating gh-ost user: %+v", err)
  1467  		log.Errorf(err.Error())
  1468  		return err
  1469  	}
  1470  	tempDir, err := createTempDir(onlineDDL.UUID)
  1471  	if err != nil {
  1472  		log.Errorf("Error creating temporary directory: %+v", err)
  1473  		return err
  1474  	}
  1475  	binaryFileName, _ := GhostBinaryFileName()
  1476  	credentialsConfigFileContent := fmt.Sprintf(`[client]
  1477  user=%s
  1478  password=${ONLINE_DDL_PASSWORD}
  1479  `, onlineDDLUser)
  1480  	credentialsConfigFileName, err := createTempScript(tempDir, "gh-ost-conf.cfg", credentialsConfigFileContent)
  1481  	if err != nil {
  1482  		log.Errorf("Error creating config file: %+v", err)
  1483  		return err
  1484  	}
  1485  	wrapperScriptContent := fmt.Sprintf(`#!/bin/bash
  1486  ghost_log_path="%s"
  1487  ghost_log_file="%s"
  1488  ghost_log_failure_file="%s"
  1489  
  1490  mkdir -p "$ghost_log_path"
  1491  
  1492  export ONLINE_DDL_PASSWORD
  1493  %s "$@" > "$ghost_log_path/$ghost_log_file" 2>&1
  1494  exit_code=$?
  1495  grep -o '\bFATAL\b.*' "$ghost_log_path/$ghost_log_file" | tail -1 > "$ghost_log_path/$ghost_log_failure_file"
  1496  exit $exit_code
  1497  	`, tempDir, migrationLogFileName, migrationFailureFileName, binaryFileName,
  1498  	)
  1499  	wrapperScriptFileName, err := createTempScript(tempDir, "gh-ost-wrapper.sh", wrapperScriptContent)
  1500  	if err != nil {
  1501  		log.Errorf("Error creating wrapper script: %+v", err)
  1502  		return err
  1503  	}
  1504  	onHookContent := func(status schema.OnlineDDLStatus, hint string) string {
  1505  		return fmt.Sprintf(`#!/bin/bash
  1506  	curl --max-time 10 -s 'http://localhost:%d/schema-migration/report-status?uuid=%s&status=%s&hint=%s&dryrun='"$GH_OST_DRY_RUN"'&progress='"$GH_OST_PROGRESS"'&eta='"$GH_OST_ETA_SECONDS"'&rowscopied='"$GH_OST_COPIED_ROWS"
  1507  			`, servenv.Port(), onlineDDL.UUID, string(status), hint)
  1508  	}
  1509  	if _, err := createTempScript(tempDir, "gh-ost-on-startup", onHookContent(schema.OnlineDDLStatusRunning, emptyHint)); err != nil {
  1510  		log.Errorf("Error creating script: %+v", err)
  1511  		return err
  1512  	}
  1513  	if _, err := createTempScript(tempDir, "gh-ost-on-status", onHookContent(schema.OnlineDDLStatusRunning, emptyHint)); err != nil {
  1514  		log.Errorf("Error creating script: %+v", err)
  1515  		return err
  1516  	}
  1517  	if _, err := createTempScript(tempDir, "gh-ost-on-success", onHookContent(schema.OnlineDDLStatusComplete, emptyHint)); err != nil {
  1518  		log.Errorf("Error creating script: %+v", err)
  1519  		return err
  1520  	}
  1521  	if _, err := createTempScript(tempDir, "gh-ost-on-failure", onHookContent(schema.OnlineDDLStatusFailed, emptyHint)); err != nil {
  1522  		log.Errorf("Error creating script: %+v", err)
  1523  		return err
  1524  	}
  1525  	if _, err := createTempScript(tempDir, "gh-ost-on-begin-postponed", onHookContent(schema.OnlineDDLStatusRunning, readyToCompleteHint)); err != nil {
  1526  		log.Errorf("Error creating script: %+v", err)
  1527  		return err
  1528  	}
  1529  	serveSocketFile := path.Join(tempDir, "serve.sock")
  1530  
  1531  	if err := e.deleteGhostPanicFlagFile(onlineDDL.UUID); err != nil {
  1532  		log.Errorf("Error removing gh-ost panic flag file %s: %+v", e.ghostPanicFlagFileName(onlineDDL.UUID), err)
  1533  		return err
  1534  	}
  1535  	if err := e.deleteGhostPostponeFlagFile(onlineDDL.UUID); err != nil {
  1536  		log.Errorf("Error removing gh-ost postpone flag file %s before migration: %+v", e.ghostPostponeFlagFileName(onlineDDL.UUID), err)
  1537  		return err
  1538  	}
  1539  	// Validate gh-ost binary:
  1540  	_ = e.updateMigrationMessage(ctx, onlineDDL.UUID, "validating gh-ost --version")
  1541  	log.Infof("Will now validate gh-ost binary")
  1542  	_, err = execCmd(
  1543  		"bash",
  1544  		[]string{
  1545  			wrapperScriptFileName,
  1546  			"--version",
  1547  		},
  1548  		os.Environ(),
  1549  		"/tmp",
  1550  		nil,
  1551  		nil,
  1552  	)
  1553  	if err != nil {
  1554  		log.Errorf("Error testing gh-ost binary: %+v", err)
  1555  		return err
  1556  	}
  1557  	_ = e.updateMigrationMessage(ctx, onlineDDL.UUID, "validated gh-ost --version")
  1558  	log.Infof("+ OK")
  1559  
  1560  	if err := e.updateMigrationLogPath(ctx, onlineDDL.UUID, variables.host, tempDir); err != nil {
  1561  		return err
  1562  	}
  1563  
  1564  	runGhost := func(execute bool) error {
  1565  		alterOptions := e.parseAlterOptions(ctx, onlineDDL)
  1566  		forceTableNames := fmt.Sprintf("%s_%s", onlineDDL.UUID, ReadableTimestamp())
  1567  
  1568  		if err := e.updateArtifacts(ctx, onlineDDL.UUID,
  1569  			fmt.Sprintf("_%s_gho", forceTableNames),
  1570  			fmt.Sprintf("_%s_ghc", forceTableNames),
  1571  			fmt.Sprintf("_%s_del", forceTableNames),
  1572  		); err != nil {
  1573  			return err
  1574  		}
  1575  
  1576  		os.Setenv("ONLINE_DDL_PASSWORD", onlineDDLPassword)
  1577  		args := []string{
  1578  			wrapperScriptFileName,
  1579  			fmt.Sprintf(`--host=%s`, variables.host),
  1580  			fmt.Sprintf(`--port=%d`, variables.port),
  1581  			fmt.Sprintf(`--conf=%s`, credentialsConfigFileName), // user & password found here
  1582  			`--allow-on-master`,
  1583  			`--max-load=Threads_running=900`,
  1584  			`--critical-load=Threads_running=1000`,
  1585  			`--critical-load-hibernate-seconds=60`,
  1586  			`--approve-renamed-columns`,
  1587  			`--debug`,
  1588  			`--exact-rowcount`,
  1589  			`--default-retries=120`,
  1590  			fmt.Sprintf("--force-table-names=%s", forceTableNames),
  1591  			fmt.Sprintf("--serve-socket-file=%s", serveSocketFile),
  1592  			fmt.Sprintf("--hooks-path=%s", tempDir),
  1593  			fmt.Sprintf(`--hooks-hint-token=%s`, onlineDDL.UUID),
  1594  			fmt.Sprintf(`--throttle-http=http://localhost:%d/throttler/check?app=%s:gh-ost:%s&p=low`, servenv.Port(), throttlerOnlineDDLApp, onlineDDL.UUID),
  1595  			fmt.Sprintf(`--database=%s`, e.dbName),
  1596  			fmt.Sprintf(`--table=%s`, onlineDDL.Table),
  1597  			fmt.Sprintf(`--alter=%s`, alterOptions),
  1598  			fmt.Sprintf(`--panic-flag-file=%s`, e.ghostPanicFlagFileName(onlineDDL.UUID)),
  1599  			fmt.Sprintf(`--execute=%t`, execute),
  1600  		}
  1601  		if onlineDDL.StrategySetting().IsAllowZeroInDateFlag() {
  1602  			args = append(args, "--allow-zero-in-date")
  1603  		}
  1604  		if execute && onlineDDL.StrategySetting().IsPostponeCompletion() {
  1605  			args = append(args, "--postpone-cut-over-flag-file", e.ghostPostponeFlagFileName(onlineDDL.UUID))
  1606  		}
  1607  
  1608  		args = append(args, onlineDDL.StrategySetting().RuntimeOptions()...)
  1609  		_ = e.updateMigrationMessage(ctx, onlineDDL.UUID, fmt.Sprintf("executing gh-ost --execute=%v", execute))
  1610  		_, err := execCmd("bash", args, os.Environ(), "/tmp", nil, nil)
  1611  		_ = e.updateMigrationMessage(ctx, onlineDDL.UUID, fmt.Sprintf("executed gh-ost --execute=%v, err=%v", execute, err))
  1612  		if err != nil {
  1613  			// See if we can get more info from the failure file
  1614  			if content, ferr := os.ReadFile(path.Join(tempDir, migrationFailureFileName)); ferr == nil {
  1615  				failureMessage := strings.TrimSpace(string(content))
  1616  				if failureMessage != "" {
  1617  					// This message was produced by gh-ost itself. It is more informative than the default "migration failed..." message. Overwrite.
  1618  					return errors.New(failureMessage)
  1619  				}
  1620  			}
  1621  		}
  1622  		return err
  1623  	}
  1624  
  1625  	e.ownedRunningMigrations.Store(onlineDDL.UUID, onlineDDL)
  1626  
  1627  	go func() error {
  1628  		defer e.ownedRunningMigrations.Delete(onlineDDL.UUID)
  1629  		defer e.deleteGhostPostponeFlagFile(onlineDDL.UUID) // irrespective whether the file was in fact in use or not
  1630  		defer e.dropOnlineDDLUser(ctx)
  1631  		defer e.gcArtifacts(ctx)
  1632  
  1633  		log.Infof("Will now dry-run gh-ost on: %s:%d", variables.host, variables.port)
  1634  		if err := runGhost(false); err != nil {
  1635  			// perhaps gh-ost was interrupted midway and didn't have the chance to send a "failed" status
  1636  			_ = e.failMigration(ctx, onlineDDL, err)
  1637  
  1638  			log.Errorf("Error executing gh-ost dry run: %+v", err)
  1639  			return err
  1640  		}
  1641  		log.Infof("+ OK")
  1642  
  1643  		log.Infof("Will now run gh-ost on: %s:%d", variables.host, variables.port)
  1644  		startedMigrations.Add(1)
  1645  		if err := runGhost(true); err != nil {
  1646  			// perhaps gh-ost was interrupted midway and didn't have the chance to send a "failes" status
  1647  			_ = e.failMigration(ctx, onlineDDL, err)
  1648  			failedMigrations.Add(1)
  1649  			log.Errorf("Error running gh-ost: %+v", err)
  1650  			return err
  1651  		}
  1652  		// Migration successful!
  1653  		defer e.reloadSchema(ctx)
  1654  		successfulMigrations.Add(1)
  1655  		log.Infof("+ OK")
  1656  		return nil
  1657  	}()
  1658  	return nil
  1659  }
  1660  
  1661  // ExecuteWithPTOSC validates and runs a pt-online-schema-change process.
  1662  // Validation included testing the backend MySQL server and the pt-online-schema-change binary itself
  1663  // Execution runs first a dry run, then an actual migration
  1664  func (e *Executor) ExecuteWithPTOSC(ctx context.Context, onlineDDL *schema.OnlineDDL) error {
  1665  	if conflictFound, conflictingMigration := e.isAnyConflictingMigrationRunning(onlineDDL); conflictFound {
  1666  		return vterrors.Wrapf(ErrExecutorMigrationAlreadyRunning, "conflicting migration: %v over table: %v", conflictingMigration.UUID, conflictingMigration.Table)
  1667  	}
  1668  
  1669  	if e.tabletTypeFunc() != topodatapb.TabletType_PRIMARY {
  1670  		return ErrExecutorNotWritableTablet
  1671  	}
  1672  	variables, err := e.readMySQLVariables(ctx)
  1673  	if err != nil {
  1674  		log.Errorf("Error before running pt-online-schema-change: %+v", err)
  1675  		return err
  1676  	}
  1677  	if variables.readOnly {
  1678  		err := fmt.Errorf("Error before running pt-online-schema-change: MySQL server is read_only")
  1679  		log.Errorf(err.Error())
  1680  		return err
  1681  	}
  1682  	onlineDDLPassword, err := e.createOnlineDDLUser(ctx)
  1683  	if err != nil {
  1684  		err := fmt.Errorf("Error creating pt-online-schema-change user: %+v", err)
  1685  		log.Errorf(err.Error())
  1686  		return err
  1687  	}
  1688  	tempDir, err := createTempDir(onlineDDL.UUID)
  1689  	if err != nil {
  1690  		log.Errorf("Error creating temporary directory: %+v", err)
  1691  		return err
  1692  	}
  1693  
  1694  	binaryFileName, _ := PTOSCFileName()
  1695  	wrapperScriptContent := fmt.Sprintf(`#!/bin/bash
  1696  pt_log_path="%s"
  1697  pt_log_file="%s"
  1698  
  1699  mkdir -p "$pt_log_path"
  1700  
  1701  export MYSQL_PWD
  1702  %s "$@" > "$pt_log_path/$pt_log_file" 2>&1
  1703  	`, tempDir, migrationLogFileName, binaryFileName,
  1704  	)
  1705  	wrapperScriptFileName, err := createTempScript(tempDir, "pt-online-schema-change-wrapper.sh", wrapperScriptContent)
  1706  	if err != nil {
  1707  		log.Errorf("Error creating wrapper script: %+v", err)
  1708  		return err
  1709  	}
  1710  	pluginCode := `
  1711  	package pt_online_schema_change_plugin;
  1712  
  1713  	use strict;
  1714  	use LWP::Simple;
  1715  
  1716  	sub new {
  1717  	  my($class, % args) = @_;
  1718  	  my $self = { %args };
  1719  	  return bless $self, $class;
  1720  	}
  1721  
  1722  	sub init {
  1723  	  my($self, % args) = @_;
  1724  	}
  1725  
  1726  	sub before_create_new_table {
  1727  	  my($self, % args) = @_;
  1728  	  get("http://localhost:{{VTTABLET_PORT}}/schema-migration/report-status?uuid={{MIGRATION_UUID}}&status={{OnlineDDLStatusRunning}}&hint=&dryrun={{DRYRUN}}");
  1729  	}
  1730  
  1731  	sub before_exit {
  1732  		my($self, % args) = @_;
  1733  		my $exit_status = $args{exit_status};
  1734  	  if ($exit_status == 0) {
  1735  	    get("http://localhost:{{VTTABLET_PORT}}/schema-migration/report-status?uuid={{MIGRATION_UUID}}&status={{OnlineDDLStatusComplete}}&hint=&dryrun={{DRYRUN}}");
  1736  	  } else {
  1737  	    get("http://localhost:{{VTTABLET_PORT}}/schema-migration/report-status?uuid={{MIGRATION_UUID}}&status={{OnlineDDLStatusFailed}}&hint=&dryrun={{DRYRUN}}");
  1738  	  }
  1739  	}
  1740  
  1741  	sub get_slave_lag {
  1742  		my ($self, %args) = @_;
  1743  
  1744  		return sub {
  1745  			if (head("http://localhost:{{VTTABLET_PORT}}/throttler/check?app={{THROTTLER_ONLINE_DDL_APP}}:pt-osc:{{MIGRATION_UUID}}&p=low")) {
  1746  				# Got HTTP 200 OK, means throttler is happy
  1747  				return 0;
  1748  			}	else {
  1749  				# Throttler requests to hold back
  1750  				return 2147483647; # maxint, report *very* high lag
  1751  			}
  1752  		};
  1753  	}
  1754  
  1755  	1;
  1756  	`
  1757  	pluginCode = strings.ReplaceAll(pluginCode, "{{VTTABLET_PORT}}", fmt.Sprintf("%d", servenv.Port()))
  1758  	pluginCode = strings.ReplaceAll(pluginCode, "{{MIGRATION_UUID}}", onlineDDL.UUID)
  1759  	pluginCode = strings.ReplaceAll(pluginCode, "{{THROTTLER_ONLINE_DDL_APP}}", throttlerOnlineDDLApp)
  1760  
  1761  	pluginCode = strings.ReplaceAll(pluginCode, "{{OnlineDDLStatusRunning}}", string(schema.OnlineDDLStatusRunning))
  1762  	pluginCode = strings.ReplaceAll(pluginCode, "{{OnlineDDLStatusComplete}}", string(schema.OnlineDDLStatusComplete))
  1763  	pluginCode = strings.ReplaceAll(pluginCode, "{{OnlineDDLStatusFailed}}", string(schema.OnlineDDLStatusFailed))
  1764  
  1765  	// Validate pt-online-schema-change binary:
  1766  	log.Infof("Will now validate pt-online-schema-change binary")
  1767  	_, err = execCmd(
  1768  		"bash",
  1769  		[]string{
  1770  			wrapperScriptFileName,
  1771  			"--version",
  1772  		},
  1773  		os.Environ(),
  1774  		"/tmp",
  1775  		nil,
  1776  		nil,
  1777  	)
  1778  	if err != nil {
  1779  		log.Errorf("Error testing pt-online-schema-change binary: %+v", err)
  1780  		return err
  1781  	}
  1782  	log.Infof("+ OK")
  1783  
  1784  	if err := e.updateMigrationLogPath(ctx, onlineDDL.UUID, variables.host, tempDir); err != nil {
  1785  		return err
  1786  	}
  1787  
  1788  	alterOptions := e.parseAlterOptions(ctx, onlineDDL)
  1789  
  1790  	// The following sleep() is temporary and artificial. Because we create a new user for this
  1791  	// migration, and because we throttle by replicas, we need to wait for the replicas to be
  1792  	// caught up with the new user creation. Otherwise, the OSC tools will fail connecting to the replicas...
  1793  	// Once we have a built in throttling service , we will no longe rneed to have the OSC tools probe the
  1794  	// replicas. Instead, they will consult with our throttling service.
  1795  	// TODO(shlomi): replace/remove this when we have a proper throttling solution
  1796  	time.Sleep(time.Second)
  1797  
  1798  	runPTOSC := func(execute bool) error {
  1799  		os.Setenv("MYSQL_PWD", onlineDDLPassword)
  1800  		newTableName := fmt.Sprintf("_%s_%s_new", onlineDDL.UUID, ReadableTimestamp())
  1801  
  1802  		if err := e.updateArtifacts(ctx, onlineDDL.UUID,
  1803  			fmt.Sprintf("_%s_old", onlineDDL.Table),
  1804  			fmt.Sprintf("__%s_old", onlineDDL.Table),
  1805  			newTableName,
  1806  		); err != nil {
  1807  			return err
  1808  		}
  1809  
  1810  		executeFlag := "--dry-run"
  1811  		if execute {
  1812  			executeFlag = "--execute"
  1813  		}
  1814  		finalPluginCode := strings.ReplaceAll(pluginCode, "{{DRYRUN}}", fmt.Sprintf("%t", !execute))
  1815  		pluginFile, err := createTempScript(tempDir, "pt-online-schema-change-plugin", finalPluginCode)
  1816  		if err != nil {
  1817  			log.Errorf("Error creating script: %+v", err)
  1818  			return err
  1819  		}
  1820  		args := []string{
  1821  			wrapperScriptFileName,
  1822  			`--pid`,
  1823  			e.ptPidFileName(onlineDDL.UUID),
  1824  			`--plugin`,
  1825  			pluginFile,
  1826  			`--new-table-name`,
  1827  			newTableName,
  1828  			`--alter`,
  1829  			alterOptions,
  1830  			`--check-slave-lag`, // We use primary's identity so that pt-online-schema-change calls our lag plugin for exactly 1 server
  1831  			fmt.Sprintf(`h=%s,P=%d,D=%s,t=%s,u=%s`, variables.host, variables.port, e.dbName, onlineDDL.Table, onlineDDLUser),
  1832  			executeFlag,
  1833  			fmt.Sprintf(`h=%s,P=%d,D=%s,t=%s,u=%s`, variables.host, variables.port, e.dbName, onlineDDL.Table, onlineDDLUser),
  1834  		}
  1835  
  1836  		if execute {
  1837  			args = append(args,
  1838  				`--no-drop-new-table`,
  1839  				`--no-drop-old-table`,
  1840  			)
  1841  		}
  1842  		args = append(args, onlineDDL.StrategySetting().RuntimeOptions()...)
  1843  		_, err = execCmd("bash", args, os.Environ(), "/tmp", nil, nil)
  1844  		return err
  1845  	}
  1846  
  1847  	e.ownedRunningMigrations.Store(onlineDDL.UUID, onlineDDL)
  1848  
  1849  	go func() error {
  1850  		defer e.ownedRunningMigrations.Delete(onlineDDL.UUID)
  1851  		defer e.dropOnlineDDLUser(ctx)
  1852  		defer e.gcArtifacts(ctx)
  1853  
  1854  		log.Infof("Will now dry-run pt-online-schema-change on: %s:%d", variables.host, variables.port)
  1855  		if err := runPTOSC(false); err != nil {
  1856  			// perhaps pt-osc was interrupted midway and didn't have the chance to send a "failes" status
  1857  			_ = e.failMigration(ctx, onlineDDL, err)
  1858  			_ = e.updateMigrationTimestamp(ctx, "completed_timestamp", onlineDDL.UUID)
  1859  			log.Errorf("Error executing pt-online-schema-change dry run: %+v", err)
  1860  			return err
  1861  		}
  1862  		log.Infof("+ OK")
  1863  
  1864  		log.Infof("Will now run pt-online-schema-change on: %s:%d", variables.host, variables.port)
  1865  		startedMigrations.Add(1)
  1866  		if err := runPTOSC(true); err != nil {
  1867  			// perhaps pt-osc was interrupted midway and didn't have the chance to send a "failes" status
  1868  			_ = e.failMigration(ctx, onlineDDL, err)
  1869  			_ = e.updateMigrationTimestamp(ctx, "completed_timestamp", onlineDDL.UUID)
  1870  			_ = e.dropPTOSCMigrationTriggers(ctx, onlineDDL)
  1871  			failedMigrations.Add(1)
  1872  			log.Errorf("Error running pt-online-schema-change: %+v", err)
  1873  			return err
  1874  		}
  1875  		// Migration successful!
  1876  		defer e.reloadSchema(ctx)
  1877  		successfulMigrations.Add(1)
  1878  		log.Infof("+ OK")
  1879  		return nil
  1880  	}()
  1881  	return nil
  1882  }
  1883  
  1884  func (e *Executor) readMigration(ctx context.Context, uuid string) (onlineDDL *schema.OnlineDDL, row sqltypes.RowNamedValues, err error) {
  1885  
  1886  	parsed := sqlparser.BuildParsedQuery(sqlSelectMigration, ":migration_uuid")
  1887  	bindVars := map[string]*querypb.BindVariable{
  1888  		"migration_uuid": sqltypes.StringBindVariable(uuid),
  1889  	}
  1890  	bound, err := parsed.GenerateQuery(bindVars, nil)
  1891  	if err != nil {
  1892  		return onlineDDL, nil, err
  1893  	}
  1894  	r, err := e.execQuery(ctx, bound)
  1895  	if err != nil {
  1896  		return onlineDDL, nil, err
  1897  	}
  1898  	row = r.Named().Row()
  1899  	if row == nil {
  1900  		// No results
  1901  		return nil, nil, ErrMigrationNotFound
  1902  	}
  1903  	onlineDDL = &schema.OnlineDDL{
  1904  		Keyspace:         row["keyspace"].ToString(),
  1905  		Table:            row["mysql_table"].ToString(),
  1906  		Schema:           row["mysql_schema"].ToString(),
  1907  		SQL:              row["migration_statement"].ToString(),
  1908  		UUID:             row["migration_uuid"].ToString(),
  1909  		Strategy:         schema.DDLStrategy(row["strategy"].ToString()),
  1910  		Options:          row["options"].ToString(),
  1911  		Status:           schema.OnlineDDLStatus(row["migration_status"].ToString()),
  1912  		Retries:          row.AsInt64("retries", 0),
  1913  		ReadyToComplete:  row.AsInt64("ready_to_complete", 0),
  1914  		TabletAlias:      row["tablet"].ToString(),
  1915  		MigrationContext: row["migration_context"].ToString(),
  1916  	}
  1917  	return onlineDDL, row, nil
  1918  }
  1919  
  1920  // readPendingMigrationsUUIDs returns UUIDs for migrations in pending state (queued/ready/running)
  1921  func (e *Executor) readPendingMigrationsUUIDs(ctx context.Context) (uuids []string, err error) {
  1922  	r, err := e.execQuery(ctx, sqlSelectPendingMigrations)
  1923  	if err != nil {
  1924  		return uuids, err
  1925  	}
  1926  	for _, row := range r.Named().Rows {
  1927  		uuid := row["migration_uuid"].ToString()
  1928  		uuids = append(uuids, uuid)
  1929  	}
  1930  	return uuids, err
  1931  }
  1932  
  1933  // terminateMigration attempts to interrupt and hard-stop a running migration
  1934  func (e *Executor) terminateMigration(ctx context.Context, onlineDDL *schema.OnlineDDL) (foundRunning bool, err error) {
  1935  	log.Infof("terminateMigration: request to terminate %s", onlineDDL.UUID)
  1936  	// It's possible the killing the migration fails for whatever reason, in which case
  1937  	// the logic will retry killing it later on.
  1938  	// Whatever happens in this function, this executor stops owning the given migration.
  1939  	defer e.ownedRunningMigrations.Delete(onlineDDL.UUID)
  1940  
  1941  	switch onlineDDL.Strategy {
  1942  	case schema.DDLStrategyOnline, schema.DDLStrategyVitess:
  1943  		// migration could have started by a different tablet. We need to actively verify if it is running
  1944  		s, _ := e.readVReplStream(ctx, onlineDDL.UUID, true)
  1945  		foundRunning = (s != nil && s.isRunning())
  1946  		if err := e.terminateVReplMigration(ctx, onlineDDL.UUID); err != nil {
  1947  			return foundRunning, fmt.Errorf("Error terminating migration, vreplication exec error: %+v", err)
  1948  		}
  1949  	case schema.DDLStrategyPTOSC:
  1950  		// see if pt-osc is running (could have been executed by this vttablet or one that crashed in the past)
  1951  		if running, pid, _ := e.isPTOSCMigrationRunning(ctx, onlineDDL.UUID); running {
  1952  			foundRunning = true
  1953  			// Because pt-osc doesn't offer much control, we take a brute force approach to killing it,
  1954  			// revoking its privileges, and cleaning up its triggers.
  1955  			if err := syscall.Kill(pid, syscall.SIGTERM); err != nil {
  1956  				return foundRunning, nil
  1957  			}
  1958  			if err := syscall.Kill(pid, syscall.SIGKILL); err != nil {
  1959  				return foundRunning, nil
  1960  			}
  1961  			if err := e.dropOnlineDDLUser(ctx); err != nil {
  1962  				return foundRunning, nil
  1963  			}
  1964  			if err := e.dropPTOSCMigrationTriggers(ctx, onlineDDL); err != nil {
  1965  				return foundRunning, nil
  1966  			}
  1967  		}
  1968  	case schema.DDLStrategyGhost:
  1969  		// double check: is the running migration the very same one we wish to cancel?
  1970  		if _, ok := e.ownedRunningMigrations.Load(onlineDDL.UUID); ok {
  1971  			// assuming all goes well in next steps, we can already report that there has indeed been a migration
  1972  			foundRunning = true
  1973  		}
  1974  		// gh-ost migrations are easy to kill: just touch their specific panic flag files. We trust
  1975  		// gh-ost to terminate. No need to KILL it. And there's no trigger cleanup.
  1976  		if err := e.createGhostPanicFlagFile(onlineDDL.UUID); err != nil {
  1977  			return foundRunning, fmt.Errorf("Error terminating gh-ost migration, flag file error: %+v", err)
  1978  		}
  1979  	}
  1980  	return foundRunning, nil
  1981  }
  1982  
  1983  // CancelMigration attempts to abort a scheduled or a running migration
  1984  func (e *Executor) CancelMigration(ctx context.Context, uuid string, message string, issuedByUser bool) (result *sqltypes.Result, err error) {
  1985  	if atomic.LoadInt64(&e.isOpen) == 0 {
  1986  		return nil, vterrors.New(vtrpcpb.Code_FAILED_PRECONDITION, "online ddl is disabled")
  1987  	}
  1988  	log.Infof("CancelMigration: request to cancel %s with message: %v", uuid, message)
  1989  
  1990  	e.migrationMutex.Lock()
  1991  	defer e.migrationMutex.Unlock()
  1992  
  1993  	var rowsAffected uint64
  1994  
  1995  	onlineDDL, _, err := e.readMigration(ctx, uuid)
  1996  	if err != nil {
  1997  		return nil, err
  1998  	}
  1999  
  2000  	switch onlineDDL.Status {
  2001  	case schema.OnlineDDLStatusComplete, schema.OnlineDDLStatusFailed, schema.OnlineDDLStatusCancelled:
  2002  		log.Infof("CancelMigration: migration %s is in non-cancellable status: %v", uuid, onlineDDL.Status)
  2003  		return emptyResult, nil
  2004  	}
  2005  	// From this point on, we're actually cancelling a migration
  2006  	if issuedByUser {
  2007  		// if this was issued by the user, then we mark the `cancelled_timestamp`, and based on that,
  2008  		// the migration state will be 'cancelled'.
  2009  		// If this was not issued by the user, then this is an internal state machine cancellation of the
  2010  		// migration, e.g. because it is stale or has an unrecoverable error. In this case we do not mark
  2011  		// the timestamp, and as result, the state will transition to 'failed'
  2012  		if err := e.updateMigrationTimestamp(ctx, "cancelled_timestamp", uuid); err != nil {
  2013  			return nil, err
  2014  		}
  2015  	}
  2016  	defer e.failMigration(ctx, onlineDDL, errors.New(message))
  2017  	defer e.triggerNextCheckInterval()
  2018  
  2019  	switch onlineDDL.Status {
  2020  	case schema.OnlineDDLStatusQueued, schema.OnlineDDLStatusReady:
  2021  		log.Infof("CancelMigration: cancelling %s with status: %v", uuid, onlineDDL.Status)
  2022  		return &sqltypes.Result{RowsAffected: 1}, nil
  2023  	}
  2024  
  2025  	migrationFound, err := e.terminateMigration(ctx, onlineDDL)
  2026  	if migrationFound {
  2027  		log.Infof("CancelMigration: terminated %s with status: %v", uuid, onlineDDL.Status)
  2028  		rowsAffected = 1
  2029  	} else {
  2030  		log.Infof("CancelMigration: migration %s wasn't found to be running", uuid)
  2031  	}
  2032  	if err != nil {
  2033  		return result, err
  2034  	}
  2035  
  2036  	result = &sqltypes.Result{
  2037  		RowsAffected: rowsAffected,
  2038  	}
  2039  	return result, nil
  2040  }
  2041  
  2042  // cancelMigrations attempts to abort a list of migrations
  2043  func (e *Executor) cancelMigrations(ctx context.Context, cancellable []*cancellableMigration, issuedByUser bool) (err error) {
  2044  	for _, migration := range cancellable {
  2045  		log.Infof("cancelMigrations: cancelling %s; reason: %s", migration.uuid, migration.message)
  2046  		if _, err := e.CancelMigration(ctx, migration.uuid, migration.message, issuedByUser); err != nil {
  2047  			return err
  2048  		}
  2049  	}
  2050  	return nil
  2051  }
  2052  
  2053  // CancelPendingMigrations cancels all pending migrations (that are expected to run or are running)
  2054  // for this keyspace
  2055  func (e *Executor) CancelPendingMigrations(ctx context.Context, message string, issuedByUser bool) (result *sqltypes.Result, err error) {
  2056  	if atomic.LoadInt64(&e.isOpen) == 0 {
  2057  		return nil, vterrors.New(vtrpcpb.Code_FAILED_PRECONDITION, "online ddl is disabled")
  2058  	}
  2059  
  2060  	uuids, err := e.readPendingMigrationsUUIDs(ctx)
  2061  	if err != nil {
  2062  		return result, err
  2063  	}
  2064  	log.Infof("CancelPendingMigrations: iterating %v migrations %s", len(uuids))
  2065  
  2066  	result = &sqltypes.Result{}
  2067  	for _, uuid := range uuids {
  2068  		log.Infof("CancelPendingMigrations: cancelling %s", uuid)
  2069  		res, err := e.CancelMigration(ctx, uuid, message, issuedByUser)
  2070  		if err != nil {
  2071  			return result, err
  2072  		}
  2073  		result.AppendResult(res)
  2074  	}
  2075  	log.Infof("CancelPendingMigrations: done iterating %v migrations %s", len(uuids))
  2076  	return result, nil
  2077  }
  2078  
  2079  func (e *Executor) validateThrottleParams(ctx context.Context, expireString string, ratioLiteral *sqlparser.Literal) (duration time.Duration, ratio float64, err error) {
  2080  	duration = time.Hour * 24 * 365 * 100
  2081  	if expireString != "" {
  2082  		duration, err = time.ParseDuration(expireString)
  2083  		if err != nil || duration < 0 {
  2084  			return duration, ratio, vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "invalid EXPIRE value: %s. Try '120s', '30m', '1h', etc. Allowed units are (s)ec, (m)in, (h)hour", expireString)
  2085  		}
  2086  	}
  2087  	ratio = 1.0
  2088  	if ratioLiteral != nil {
  2089  		ratio, err = strconv.ParseFloat(ratioLiteral.Val, 64)
  2090  		if err != nil || ratio < 0 || ratio > 1 {
  2091  			return duration, ratio, vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "invalid RATIO value: %s. Try any decimal number between '0.0' (no throttle) and `1.0` (fully throttled)", ratioLiteral.Val)
  2092  		}
  2093  	}
  2094  	return duration, ratio, nil
  2095  }
  2096  
  2097  // ThrottleMigration
  2098  func (e *Executor) ThrottleMigration(ctx context.Context, uuid string, expireString string, ratioLiteral *sqlparser.Literal) (result *sqltypes.Result, err error) {
  2099  	duration, ratio, err := e.validateThrottleParams(ctx, expireString, ratioLiteral)
  2100  	if err != nil {
  2101  		return nil, err
  2102  	}
  2103  	if err := e.lagThrottler.CheckIsReady(); err != nil {
  2104  		return nil, err
  2105  	}
  2106  	_ = e.lagThrottler.ThrottleApp(uuid, time.Now().Add(duration), ratio)
  2107  	return emptyResult, nil
  2108  }
  2109  
  2110  // ThrottleAllMigrations
  2111  func (e *Executor) ThrottleAllMigrations(ctx context.Context, expireString string, ratioLiteral *sqlparser.Literal) (result *sqltypes.Result, err error) {
  2112  	duration, ratio, err := e.validateThrottleParams(ctx, expireString, ratioLiteral)
  2113  	if err != nil {
  2114  		return nil, err
  2115  	}
  2116  	if err := e.lagThrottler.CheckIsReady(); err != nil {
  2117  		return nil, err
  2118  	}
  2119  	_ = e.lagThrottler.ThrottleApp(throttlerOnlineDDLApp, time.Now().Add(duration), ratio)
  2120  	return emptyResult, nil
  2121  }
  2122  
  2123  // UnthrottleMigration
  2124  func (e *Executor) UnthrottleMigration(ctx context.Context, uuid string) (result *sqltypes.Result, err error) {
  2125  	if err := e.lagThrottler.CheckIsReady(); err != nil {
  2126  		return nil, err
  2127  	}
  2128  	defer e.triggerNextCheckInterval()
  2129  	_ = e.lagThrottler.UnthrottleApp(uuid)
  2130  	return emptyResult, nil
  2131  }
  2132  
  2133  // UnthrottleAllMigrations
  2134  func (e *Executor) UnthrottleAllMigrations(ctx context.Context) (result *sqltypes.Result, err error) {
  2135  	if err := e.lagThrottler.CheckIsReady(); err != nil {
  2136  		return nil, err
  2137  	}
  2138  	defer e.triggerNextCheckInterval()
  2139  	_ = e.lagThrottler.UnthrottleApp(throttlerOnlineDDLApp)
  2140  	return emptyResult, nil
  2141  }
  2142  
  2143  // scheduleNextMigration attemps to schedule a single migration to run next.
  2144  // possibly there are migrations to run.
  2145  // The effect of this function is to move a migration from 'queued' state to 'ready' state, is all.
  2146  func (e *Executor) scheduleNextMigration(ctx context.Context) error {
  2147  	e.migrationMutex.Lock()
  2148  	defer e.migrationMutex.Unlock()
  2149  
  2150  	var onlyScheduleOneMigration sync.Once
  2151  
  2152  	r, err := e.execQuery(ctx, sqlSelectQueuedMigrations)
  2153  	if err != nil {
  2154  		return err
  2155  	}
  2156  	for _, row := range r.Named().Rows {
  2157  		uuid := row["migration_uuid"].ToString()
  2158  		postponeLaunch := row.AsBool("postpone_launch", false)
  2159  		postponeCompletion := row.AsBool("postpone_completion", false)
  2160  		readyToComplete := row.AsBool("ready_to_complete", false)
  2161  		isImmediateOperation := row.AsBool("is_immediate_operation", false)
  2162  
  2163  		if postponeLaunch {
  2164  			// We don't even look into this migration until its postpone_launch flag is cleared
  2165  			continue
  2166  		}
  2167  
  2168  		if !readyToComplete {
  2169  			// see if we need to update ready_to_complete
  2170  			if isImmediateOperation {
  2171  				// Whether postponsed or not, CREATE and DROP operations, as well as VIEW operations,
  2172  				// are inherently "ready to complete" because their operation is immediate.
  2173  				if err := e.updateMigrationReadyToComplete(ctx, uuid, true); err != nil {
  2174  					return err
  2175  				}
  2176  			}
  2177  		}
  2178  
  2179  		if !(isImmediateOperation && postponeCompletion) {
  2180  			// Any non-postponed migration can be scheduled
  2181  			// postponed ALTER can be scheduled (because gh-ost or vreplication will postpone the cut-over)
  2182  			// We only schedule a single migration in the execution of this function
  2183  			onlyScheduleOneMigration.Do(func() {
  2184  				err = e.updateMigrationStatus(ctx, uuid, schema.OnlineDDLStatusReady)
  2185  				log.Infof("Executor.scheduleNextMigration: scheduling migration %s; err: %v", uuid, err)
  2186  				e.triggerNextCheckInterval()
  2187  			})
  2188  			if err != nil {
  2189  				return err
  2190  			}
  2191  		}
  2192  	}
  2193  	return err
  2194  }
  2195  
  2196  // reviewEmptyTableRevertMigrations reviews a queued REVERT migration. Such a migration has the following SQL:
  2197  // "REVERT VITESS_MIGRATION '...'"
  2198  // There's nothing in this SQL to indicate:
  2199  // - which table is involved?
  2200  // - is this a table or a view?
  2201  // - Are we reverting a CREATE? A DROP? An ALTER?
  2202  // This function fills in the blanks and updates the database row.
  2203  func (e *Executor) reviewEmptyTableRevertMigrations(ctx context.Context, onlineDDL *schema.OnlineDDL) (changesMade bool, err error) {
  2204  	if onlineDDL.Table != "" {
  2205  		return false, nil
  2206  	}
  2207  	// Table name is empty. Let's populate it.
  2208  
  2209  	// Try to update table name and ddl_action
  2210  	// Failure to do so fails the migration
  2211  	revertUUID, err := onlineDDL.GetRevertUUID()
  2212  	if err != nil {
  2213  		return false, e.failMigration(ctx, onlineDDL, fmt.Errorf("cannot analyze revert UUID for revert migration %s: %v", onlineDDL.UUID, err))
  2214  	}
  2215  	revertedMigration, revertedRow, err := e.readMigration(ctx, revertUUID)
  2216  	if err != nil {
  2217  		return false, e.failMigration(ctx, onlineDDL, fmt.Errorf("cannot read migration %s reverted by migration %s: %s", revertUUID, onlineDDL.UUID, err))
  2218  	}
  2219  	revertedActionStr := revertedRow["ddl_action"].ToString()
  2220  
  2221  	mimickedActionStr := ""
  2222  	switch revertedActionStr {
  2223  	case sqlparser.CreateStr:
  2224  		mimickedActionStr = sqlparser.DropStr
  2225  	case sqlparser.DropStr:
  2226  		mimickedActionStr = sqlparser.CreateStr
  2227  	case sqlparser.AlterStr:
  2228  		mimickedActionStr = sqlparser.AlterStr
  2229  	default:
  2230  		return false, e.failMigration(ctx, onlineDDL, fmt.Errorf("cannot run migration %s reverting %s: unexpected action %s", onlineDDL.UUID, revertedMigration.UUID, revertedActionStr))
  2231  	}
  2232  	if err := e.updateDDLAction(ctx, onlineDDL.UUID, mimickedActionStr); err != nil {
  2233  		return false, err
  2234  	}
  2235  	if err := e.updateMigrationIsView(ctx, onlineDDL.UUID, revertedRow.AsBool("is_view", false)); err != nil {
  2236  		return false, err
  2237  	}
  2238  	if err := e.updateMySQLTable(ctx, onlineDDL.UUID, revertedMigration.Table); err != nil {
  2239  		return false, err
  2240  	}
  2241  	return true, nil
  2242  }
  2243  
  2244  // reviewImmediateOperations reviews a queued migration and determines whether it is an "immediate operation".
  2245  // Immediate operations are ones that can be performed within a split second, or rather, do not require long
  2246  // running processes. Immediate operations are:
  2247  // - CREATE TABLE
  2248  // - DROP TABLE (which we convert into RENAME)
  2249  // - All VIEW operations
  2250  // - An INSTANT DDL accompanied by relevant ddl strategy flags
  2251  // Non immediate operations are:
  2252  // - A gh-ost migration
  2253  // - A vitess (vreplication) migration
  2254  func (e *Executor) reviewImmediateOperations(ctx context.Context, capableOf mysql.CapableOf, onlineDDL *schema.OnlineDDL, ddlAction string, isRevert bool, isView bool) (bool, error) {
  2255  	switch ddlAction {
  2256  	case sqlparser.CreateStr, sqlparser.DropStr:
  2257  		return true, nil
  2258  	case sqlparser.AlterStr:
  2259  		switch {
  2260  		case isView:
  2261  			return true, nil
  2262  		case isRevert:
  2263  			// REVERT for a true ALTER TABLE. not an immediate operation
  2264  			return false, nil
  2265  		default:
  2266  			specialPlan, err := e.analyzeSpecialAlterPlan(ctx, onlineDDL, capableOf)
  2267  			if err != nil {
  2268  				return false, err
  2269  			}
  2270  			return (specialPlan != nil), nil
  2271  		}
  2272  	}
  2273  	return false, nil
  2274  }
  2275  
  2276  // reviewQueuedMigrations iterates through queued migrations and sees if any information needs to be updated.
  2277  // The function analyzes the queued migration and fills in some blanks:
  2278  // - If this is a REVERT migration, what table is affected? What's the operation?
  2279  // - Is this migration an "immediate operation"?
  2280  func (e *Executor) reviewQueuedMigrations(ctx context.Context) error {
  2281  	conn, err := dbconnpool.NewDBConnection(ctx, e.env.Config().DB.DbaWithDB())
  2282  	if err != nil {
  2283  		return err
  2284  	}
  2285  	defer conn.Close()
  2286  	_, capableOf, _ := mysql.GetFlavor(conn.ServerVersion, nil)
  2287  
  2288  	e.migrationMutex.Lock()
  2289  	defer e.migrationMutex.Unlock()
  2290  
  2291  	r, err := e.execQuery(ctx, sqlSelectQueuedUnreviewedMigrations)
  2292  	if err != nil {
  2293  		return err
  2294  	}
  2295  
  2296  	for _, uuidRow := range r.Named().Rows {
  2297  		uuid := uuidRow["migration_uuid"].ToString()
  2298  		onlineDDL, row, err := e.readMigration(ctx, uuid)
  2299  		if err != nil {
  2300  			return err
  2301  		}
  2302  		// handle REVERT migrations: populate table name and update ddl action and is_view:
  2303  		ddlAction := row["ddl_action"].ToString()
  2304  		isRevert := false
  2305  		if ddlAction == schema.RevertActionStr {
  2306  			isRevert = true
  2307  			rowModified, err := e.reviewEmptyTableRevertMigrations(ctx, onlineDDL)
  2308  			if err != nil {
  2309  				return err
  2310  			}
  2311  			if rowModified {
  2312  				// re-read migration and entire row
  2313  				onlineDDL, row, err = e.readMigration(ctx, uuid)
  2314  				if err != nil {
  2315  					return err
  2316  				}
  2317  				ddlAction = row["ddl_action"].ToString()
  2318  			}
  2319  		}
  2320  		isView := row.AsBool("is_view", false)
  2321  		isImmediate, err := e.reviewImmediateOperations(ctx, capableOf, onlineDDL, ddlAction, isRevert, isView)
  2322  		if err != nil {
  2323  			return err
  2324  		}
  2325  		if isImmediate {
  2326  			if err := e.updateMigrationSetImmediateOperation(ctx, onlineDDL.UUID); err != nil {
  2327  				return err
  2328  			}
  2329  		}
  2330  		// Find conditions where the migration cannot take place:
  2331  		switch onlineDDL.Strategy {
  2332  		case schema.DDLStrategyMySQL:
  2333  			strategySetting := onlineDDL.StrategySetting()
  2334  			if strategySetting.IsPostponeCompletion() {
  2335  				e.failMigration(ctx, onlineDDL, vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "--postpone-completion not supported in 'mysql' strategy"))
  2336  			}
  2337  			if strategySetting.IsAllowZeroInDateFlag() {
  2338  				e.failMigration(ctx, onlineDDL, vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "--allow-zero-in-date not supported in 'mysql' strategy"))
  2339  			}
  2340  		}
  2341  
  2342  		// The review is complete. We've backfilled details on the migration row. We mark
  2343  		// the migration as having been reviewed. The function scheduleNextMigration() will then
  2344  		// have access to this row.
  2345  		if err := e.updateMigrationTimestamp(ctx, "reviewed_timestamp", uuid); err != nil {
  2346  			return err
  2347  		}
  2348  
  2349  	}
  2350  	return nil
  2351  }
  2352  
  2353  func (e *Executor) validateMigrationRevertible(ctx context.Context, revertMigration *schema.OnlineDDL, revertingMigrationUUID string) (err error) {
  2354  	// Validation: migration to revert exists and is in complete state
  2355  	action, actionStr, err := revertMigration.GetActionStr()
  2356  	if err != nil {
  2357  		return err
  2358  	}
  2359  	switch action {
  2360  	case sqlparser.AlterDDLAction:
  2361  		if revertMigration.Strategy != schema.DDLStrategyOnline && revertMigration.Strategy != schema.DDLStrategyVitess {
  2362  			return fmt.Errorf("can only revert a %s strategy migration. Migration %s has %s strategy", schema.DDLStrategyOnline, revertMigration.UUID, revertMigration.Strategy)
  2363  		}
  2364  	case sqlparser.RevertDDLAction:
  2365  	case sqlparser.CreateDDLAction:
  2366  	case sqlparser.DropDDLAction:
  2367  	default:
  2368  		return fmt.Errorf("cannot revert migration %s: unexpected action %s", revertMigration.UUID, actionStr)
  2369  	}
  2370  	if revertMigration.Status != schema.OnlineDDLStatusComplete {
  2371  		return fmt.Errorf("can only revert a migration in a '%s' state. Migration %s is in '%s' state", schema.OnlineDDLStatusComplete, revertMigration.UUID, revertMigration.Status)
  2372  	}
  2373  	{
  2374  		// Validation: see if there's a pending migration on this table:
  2375  		r, err := e.execQuery(ctx, sqlSelectPendingMigrations)
  2376  		if err != nil {
  2377  			return err
  2378  		}
  2379  		// we identify running migrations on requested table
  2380  		for _, row := range r.Named().Rows {
  2381  			pendingUUID := row["migration_uuid"].ToString()
  2382  			if pendingUUID == revertingMigrationUUID {
  2383  				// that's fine; the migration we're looking at is the very one that's trying to issue this revert
  2384  				continue
  2385  			}
  2386  			keyspace := row["keyspace"].ToString()
  2387  			table := row["mysql_table"].ToString()
  2388  			status := schema.OnlineDDLStatus(row["migration_status"].ToString())
  2389  
  2390  			if keyspace == e.keyspace && table == revertMigration.Table {
  2391  				return fmt.Errorf("can not revert migration %s on table %s because migration %s is in %s status. May only revert if all migrations on this table are completed or failed", revertMigration.UUID, revertMigration.Table, pendingUUID, status)
  2392  			}
  2393  		}
  2394  		{
  2395  			// Validation: see that we're reverting the last successful migration on this table:
  2396  			query, err := sqlparser.ParseAndBind(sqlSelectCompleteMigrationsOnTable,
  2397  				sqltypes.StringBindVariable(e.keyspace),
  2398  				sqltypes.StringBindVariable(revertMigration.Table),
  2399  			)
  2400  			if err != nil {
  2401  				return err
  2402  			}
  2403  			r, err := e.execQuery(ctx, query)
  2404  			if err != nil {
  2405  				return err
  2406  			}
  2407  			for _, row := range r.Named().Rows {
  2408  				completeUUID := row["migration_uuid"].ToString()
  2409  				if completeUUID != revertMigration.UUID {
  2410  					return fmt.Errorf("can not revert migration %s on table %s because it is not the last migration to complete on that table. The last migration to complete was %s", revertMigration.UUID, revertMigration.Table, completeUUID)
  2411  				}
  2412  			}
  2413  		}
  2414  	}
  2415  	return nil
  2416  }
  2417  
  2418  // executeRevert is called for 'revert' migrations (SQL is of the form "revert 99caeca2_74e2_11eb_a693_f875a4d24e90", not a real SQL of course).
  2419  // In this function we:
  2420  // - figure out whether the revert is valid: can we really revert requested migration?
  2421  // - what type of migration we're reverting? (CREATE/DROP/ALTER)
  2422  // - revert appropriately to the type of migration
  2423  func (e *Executor) executeRevert(ctx context.Context, onlineDDL *schema.OnlineDDL) (err error) {
  2424  	revertUUID, err := onlineDDL.GetRevertUUID()
  2425  	if err != nil {
  2426  		return fmt.Errorf("cannot run a revert migration %v: %+v", onlineDDL.UUID, err)
  2427  	}
  2428  
  2429  	revertMigration, row, err := e.readMigration(ctx, revertUUID)
  2430  	if err != nil {
  2431  		return err
  2432  	}
  2433  	if err := e.validateMigrationRevertible(ctx, revertMigration, onlineDDL.UUID); err != nil {
  2434  		return err
  2435  	}
  2436  	revertedActionStr := row["ddl_action"].ToString()
  2437  	if onlineDDL.Table == "" {
  2438  		// table name should be populated by reviewQueuedMigrations
  2439  		// but this was a newly added functionality. To be backwards compatible,
  2440  		// we double check here, and populate table name and ddl_action.
  2441  
  2442  		// TODO: remove in v14
  2443  		mimickedActionStr := ""
  2444  
  2445  		switch revertedActionStr {
  2446  		case sqlparser.CreateStr:
  2447  			mimickedActionStr = sqlparser.DropStr
  2448  		case sqlparser.DropStr:
  2449  			mimickedActionStr = sqlparser.CreateStr
  2450  		case sqlparser.AlterStr:
  2451  			mimickedActionStr = sqlparser.AlterStr
  2452  		default:
  2453  			return fmt.Errorf("cannot run migration %s reverting %s: unexpected action %s", onlineDDL.UUID, revertMigration.UUID, revertedActionStr)
  2454  		}
  2455  		if err := e.updateDDLAction(ctx, onlineDDL.UUID, mimickedActionStr); err != nil {
  2456  			return err
  2457  		}
  2458  		if err := e.updateMySQLTable(ctx, onlineDDL.UUID, revertMigration.Table); err != nil {
  2459  			return err
  2460  		}
  2461  	}
  2462  
  2463  	switch revertedActionStr {
  2464  	case sqlparser.CreateStr:
  2465  		{
  2466  			// We are reverting a CREATE migration. The revert is to DROP, only we don't actually
  2467  			// drop the table, we rename it into lifecycle
  2468  			// Possibly this was a CREATE TABLE IF NOT EXISTS, and possibly the table already existed
  2469  			// before the DDL, in which case the CREATE was a noop. In that scenario we _do not_ drop
  2470  			// the table.
  2471  			// We can tell the difference by looking at the artifacts. A successful CREATE TABLE, where
  2472  			// a table actually gets created, has a sentry, dummy artifact. A noop has not.
  2473  
  2474  			artifacts := row["artifacts"].ToString()
  2475  			artifactTables := textutil.SplitDelimitedList(artifacts)
  2476  			if len(artifactTables) > 1 {
  2477  				return fmt.Errorf("cannot run migration %s reverting %s: found %d artifact tables, expected maximum 1", onlineDDL.UUID, revertMigration.UUID, len(artifactTables))
  2478  			}
  2479  			if len(artifactTables) == 0 {
  2480  				// This indicates no table was actually created. this must have been a CREATE TABLE IF NOT EXISTS where the table already existed.
  2481  				_ = e.onSchemaMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusComplete, false, progressPctFull, etaSecondsNow, rowsCopiedUnknown, emptyHint)
  2482  			}
  2483  
  2484  			for _, artifactTable := range artifactTables {
  2485  				if err := e.updateArtifacts(ctx, onlineDDL.UUID, artifactTable); err != nil {
  2486  					return err
  2487  				}
  2488  				onlineDDL.SQL = sqlparser.BuildParsedQuery(sqlRenameTable, revertMigration.Table, artifactTable).Query
  2489  				if _, err := e.executeDirectly(ctx, onlineDDL); err != nil {
  2490  					return err
  2491  				}
  2492  			}
  2493  		}
  2494  	case sqlparser.DropStr:
  2495  		{
  2496  			// We are reverting a DROP migration. But the table wasn't really dropped, because that's not how
  2497  			// we run DROP migrations. It was renamed. So we need to rename it back.
  2498  			// But we impose as if we are now CREATE-ing the table.
  2499  
  2500  			artifacts := row["artifacts"].ToString()
  2501  			artifactTables := textutil.SplitDelimitedList(artifacts)
  2502  			if len(artifactTables) > 1 {
  2503  				return fmt.Errorf("cannot run migration %s reverting %s: found %d artifact tables, expected maximum 1", onlineDDL.UUID, revertMigration.UUID, len(artifactTables))
  2504  			}
  2505  			if len(artifactTables) == 0 {
  2506  				// Could happen on `DROP TABLE IF EXISTS` where the table did not exist...
  2507  				_ = e.onSchemaMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusComplete, false, progressPctFull, etaSecondsNow, rowsCopiedUnknown, emptyHint)
  2508  			}
  2509  			for _, artifactTable := range artifactTables {
  2510  				if err := e.updateArtifacts(ctx, onlineDDL.UUID, artifactTable); err != nil {
  2511  					return err
  2512  				}
  2513  				onlineDDL.SQL = sqlparser.BuildParsedQuery(sqlRenameTable, artifactTable, revertMigration.Table).Query
  2514  				if _, err := e.executeDirectly(ctx, onlineDDL); err != nil {
  2515  					return err
  2516  				}
  2517  			}
  2518  		}
  2519  	case sqlparser.AlterStr:
  2520  		{
  2521  			if row.AsBool("is_view", false) {
  2522  				artifacts := row["artifacts"].ToString()
  2523  				artifactTables := textutil.SplitDelimitedList(artifacts)
  2524  				if len(artifactTables) > 1 {
  2525  					return vterrors.Errorf(vtrpcpb.Code_FAILED_PRECONDITION, "cannot run migration %s reverting %s: found %d artifact tables, expected maximum 1", onlineDDL.UUID, revertMigration.UUID, len(artifactTables))
  2526  				}
  2527  				if len(artifactTables) == 0 {
  2528  					return vterrors.Errorf(vtrpcpb.Code_FAILED_PRECONDITION, "cannot run migration %s reverting %s: found %d artifact tables, expected 1", onlineDDL.UUID, revertMigration.UUID, len(artifactTables))
  2529  				}
  2530  				for _, artifactTable := range artifactTables {
  2531  					if err := e.updateArtifacts(ctx, onlineDDL.UUID, artifactTable); err != nil {
  2532  						return err
  2533  					}
  2534  					onlineDDL.SQL, _, err = e.generateSwapTablesStatement(ctx, onlineDDL.Table, artifactTable)
  2535  					if err != nil {
  2536  						return err
  2537  					}
  2538  					if _, err := e.executeDirectly(ctx, onlineDDL); err != nil {
  2539  						return err
  2540  					}
  2541  				}
  2542  				return nil
  2543  			}
  2544  			// Real table
  2545  			if err := e.ExecuteWithVReplication(ctx, onlineDDL, revertMigration); err != nil {
  2546  				return err
  2547  			}
  2548  		}
  2549  	default:
  2550  		return fmt.Errorf("cannot run migration %s reverting %s: unexpected action %s", onlineDDL.UUID, revertMigration.UUID, revertedActionStr)
  2551  	}
  2552  
  2553  	return nil
  2554  }
  2555  
  2556  // evaluateDeclarativeDiff is called for -declarative CREATE statements, where the table already exists. The function generates a SQL diff, which can be:
  2557  // - empty, in which case the migration is noop and implicitly successful, or
  2558  // - non-empty, in which case the migration turns to be an ALTER
  2559  func (e *Executor) evaluateDeclarativeDiff(ctx context.Context, onlineDDL *schema.OnlineDDL) (diff schemadiff.EntityDiff, err error) {
  2560  
  2561  	// Modify the CREATE TABLE statement to indicate a different, made up table name, known as the "comparison table"
  2562  	ddlStmt, _, err := schema.ParseOnlineDDLStatement(onlineDDL.SQL)
  2563  	if err != nil {
  2564  		return nil, err
  2565  	}
  2566  	// Is this CREATE TABLE or CREATE VIEW?
  2567  	comparisonTableName, err := schema.GenerateGCTableName(schema.HoldTableGCState, newGCTableRetainTime())
  2568  	if err != nil {
  2569  		return nil, err
  2570  	}
  2571  
  2572  	conn, err := dbconnpool.NewDBConnection(ctx, e.env.Config().DB.DbaWithDB())
  2573  	if err != nil {
  2574  		return nil, err
  2575  	}
  2576  	defer conn.Close()
  2577  
  2578  	{
  2579  		// Create the comparison table
  2580  		ddlStmt.SetTable("", comparisonTableName)
  2581  		modifiedCreateSQL := sqlparser.String(ddlStmt)
  2582  
  2583  		restoreSQLModeFunc, err := e.initMigrationSQLMode(ctx, onlineDDL, conn)
  2584  		defer restoreSQLModeFunc()
  2585  		if err != nil {
  2586  			return nil, err
  2587  		}
  2588  
  2589  		if _, err := conn.ExecuteFetch(modifiedCreateSQL, 0, false); err != nil {
  2590  			return nil, err
  2591  		}
  2592  
  2593  		defer func() {
  2594  			// Drop the comparison table
  2595  			parsed := sqlparser.BuildParsedQuery(sqlDropTable, comparisonTableName)
  2596  			_, _ = conn.ExecuteFetch(parsed.Query, 0, false)
  2597  			// Nothing bad happens for not checking the error code. The table is GC/HOLD. If we
  2598  			// can't drop it now, it still gets collected later by tablegc mechanism
  2599  		}()
  2600  	}
  2601  
  2602  	existingShowCreateTable, err := e.showCreateTable(ctx, onlineDDL.Table)
  2603  	if err != nil {
  2604  		return nil, vterrors.Wrapf(err, "in evaluateDeclarativeDiff(), for onlineDDL.Table")
  2605  	}
  2606  	if existingShowCreateTable == "" {
  2607  		return nil, vterrors.Errorf(vtrpcpb.Code_NOT_FOUND, "unexpected: cannot find table or view %v", onlineDDL.Table)
  2608  	}
  2609  	newShowCreateTable, err := e.showCreateTable(ctx, comparisonTableName)
  2610  	if err != nil {
  2611  		return nil, vterrors.Wrapf(err, "in evaluateDeclarativeDiff(), for comparisonTableName")
  2612  	}
  2613  	if newShowCreateTable == "" {
  2614  		return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "unexpected: cannot find table or view even as it was just created: %v", onlineDDL.Table)
  2615  	}
  2616  	hints := &schemadiff.DiffHints{AutoIncrementStrategy: schemadiff.AutoIncrementApplyHigher}
  2617  	switch ddlStmt.(type) {
  2618  	case *sqlparser.CreateTable:
  2619  		diff, err = schemadiff.DiffCreateTablesQueries(existingShowCreateTable, newShowCreateTable, hints)
  2620  	case *sqlparser.CreateView:
  2621  		diff, err = schemadiff.DiffCreateViewsQueries(existingShowCreateTable, newShowCreateTable, hints)
  2622  	default:
  2623  		return nil, vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "expected CREATE TABLE or CREATE VIEW in online DDL statement: %v", onlineDDL.SQL)
  2624  	}
  2625  	if err != nil {
  2626  		return nil, err
  2627  	}
  2628  	return diff, nil
  2629  }
  2630  
  2631  // getCompletedMigrationByContextAndSQL chceks if there exists a completed migration with exact same
  2632  // context and SQL as given migration. If so, it returns its UUID.
  2633  func (e *Executor) getCompletedMigrationByContextAndSQL(ctx context.Context, onlineDDL *schema.OnlineDDL) (completedUUID string, err error) {
  2634  	if onlineDDL.MigrationContext == "" {
  2635  		// only applies to migrations with an explicit context
  2636  		return "", nil
  2637  	}
  2638  	query, err := sqlparser.ParseAndBind(sqlSelectCompleteMigrationsByContextAndSQL,
  2639  		sqltypes.StringBindVariable(e.keyspace),
  2640  		sqltypes.StringBindVariable(onlineDDL.MigrationContext),
  2641  		sqltypes.StringBindVariable(onlineDDL.SQL),
  2642  	)
  2643  	if err != nil {
  2644  		return "", err
  2645  	}
  2646  	r, err := e.execQuery(ctx, query)
  2647  	if err != nil {
  2648  		return "", err
  2649  	}
  2650  	for _, row := range r.Named().Rows {
  2651  		completedUUID = row["migration_uuid"].ToString()
  2652  	}
  2653  	return completedUUID, nil
  2654  }
  2655  
  2656  // failMigration marks a migration as failed
  2657  func (e *Executor) failMigration(ctx context.Context, onlineDDL *schema.OnlineDDL, withError error) error {
  2658  	defer e.triggerNextCheckInterval()
  2659  	_ = e.updateMigrationStatusFailedOrCancelled(ctx, onlineDDL.UUID)
  2660  	if withError != nil {
  2661  		_ = e.updateMigrationMessage(ctx, onlineDDL.UUID, withError.Error())
  2662  	}
  2663  	e.ownedRunningMigrations.Delete(onlineDDL.UUID)
  2664  	return withError
  2665  }
  2666  
  2667  func (e *Executor) executeDropDDLActionMigration(ctx context.Context, onlineDDL *schema.OnlineDDL) error {
  2668  	failMigration := func(err error) error {
  2669  		return e.failMigration(ctx, onlineDDL, err)
  2670  	}
  2671  	e.migrationMutex.Lock()
  2672  	defer e.migrationMutex.Unlock()
  2673  
  2674  	// Drop statement.
  2675  	// Normally, we're going to modify DROP to RENAME (see later on). But if table name is
  2676  	// already a GC-lifecycle table, then we don't put it through yet another GC lifecycle,
  2677  	// we just drop it.
  2678  	if schema.IsGCTableName(onlineDDL.Table) {
  2679  		if _, err := e.executeDirectly(ctx, onlineDDL); err != nil {
  2680  			return failMigration(err)
  2681  		}
  2682  		return nil
  2683  	}
  2684  
  2685  	// We transform a DROP TABLE into a RENAME TABLE statement, so as to remove the table safely and asynchronously.
  2686  
  2687  	ddlStmt, _, err := schema.ParseOnlineDDLStatement(onlineDDL.SQL)
  2688  	if err != nil {
  2689  		return failMigration(err)
  2690  	}
  2691  
  2692  	var toTableName string
  2693  	onlineDDL.SQL, toTableName, err = schema.GenerateRenameStatementWithUUID(onlineDDL.Table, schema.HoldTableGCState, onlineDDL.GetGCUUID(), newGCTableRetainTime())
  2694  	if err != nil {
  2695  		return failMigration(err)
  2696  	}
  2697  	if err := e.updateArtifacts(ctx, onlineDDL.UUID, toTableName); err != nil {
  2698  		return err
  2699  	}
  2700  
  2701  	acceptableErrorCodes := []int{}
  2702  	if ddlStmt.GetIfExists() {
  2703  		acceptableErrorCodes = acceptableDropTableIfExistsErrorCodes
  2704  	}
  2705  	acceptableErrCodeFound, err := e.executeDirectly(ctx, onlineDDL, acceptableErrorCodes...)
  2706  	if err != nil {
  2707  		return failMigration(err)
  2708  	}
  2709  	if acceptableErrCodeFound {
  2710  		// Table did not exist after all. There is no artifact
  2711  		if err := e.clearArtifacts(ctx, onlineDDL.UUID); err != nil {
  2712  			return err
  2713  		}
  2714  	}
  2715  
  2716  	return nil
  2717  }
  2718  
  2719  func (e *Executor) executeCreateDDLActionMigration(ctx context.Context, onlineDDL *schema.OnlineDDL) error {
  2720  	failMigration := func(err error) error {
  2721  		return e.failMigration(ctx, onlineDDL, err)
  2722  	}
  2723  	e.migrationMutex.Lock()
  2724  	defer e.migrationMutex.Unlock()
  2725  
  2726  	ddlStmt, _, err := schema.ParseOnlineDDLStatement(onlineDDL.SQL)
  2727  	if err != nil {
  2728  		return failMigration(err)
  2729  	}
  2730  	if _, isCreateView := ddlStmt.(*sqlparser.CreateView); isCreateView {
  2731  		if ddlStmt.GetIsReplace() {
  2732  			// This is a CREATE OR REPLACE VIEW
  2733  			exists, err := e.tableExists(ctx, onlineDDL.Table)
  2734  			if err != nil {
  2735  				return failMigration(err)
  2736  			}
  2737  			if exists {
  2738  				// the view already exists. This CREATE OR REPLACE VIEW statement should
  2739  				// actually turn into an ALTER
  2740  				if err := e.executeAlterViewOnline(ctx, onlineDDL); err != nil {
  2741  					return failMigration(err)
  2742  				}
  2743  				return nil
  2744  			}
  2745  		}
  2746  	}
  2747  	// from now on, whether a VIEW or a TABLE, they get the same treatment
  2748  
  2749  	sentryArtifactTableName, err := schema.GenerateGCTableName(schema.HoldTableGCState, newGCTableRetainTime())
  2750  	if err != nil {
  2751  		return failMigration(err)
  2752  	}
  2753  	// we create a dummy artifact. Its existence means the table was created by this migration.
  2754  	// It will be read by the revert operation.
  2755  	if err := e.updateArtifacts(ctx, onlineDDL.UUID, sentryArtifactTableName); err != nil {
  2756  		return err
  2757  	}
  2758  
  2759  	if ddlStmt.GetIfNotExists() {
  2760  		// This is a CREATE TABLE IF NOT EXISTS
  2761  		// We want to know if the table actually exists before running this migration.
  2762  		// If so, then the operation is noop, and when we revert the migration, we also do a noop.
  2763  		exists, err := e.tableExists(ctx, onlineDDL.Table)
  2764  		if err != nil {
  2765  			return failMigration(err)
  2766  		}
  2767  		if exists {
  2768  			// the table already exists. This CREATE TABLE IF NOT EXISTS statement is a noop.
  2769  			// We therefore clear the artifact field. A revert operation will use this as a hint.
  2770  			if err := e.clearArtifacts(ctx, onlineDDL.UUID); err != nil {
  2771  				return failMigration(err)
  2772  			}
  2773  		}
  2774  	}
  2775  	if _, err := e.executeDirectly(ctx, onlineDDL); err != nil {
  2776  		return failMigration(err)
  2777  	}
  2778  	return nil
  2779  }
  2780  
  2781  // generateSwapTablesStatement creates a RENAME statement that swaps two tables, with assistance
  2782  // of temporary third table. It returns the name of generated third table, though normally
  2783  // that table should not exist before & after operation, only _during_ operation time.
  2784  func (e *Executor) generateSwapTablesStatement(ctx context.Context, tableName1, tableName2 string) (query string, swapTableName string, err error) {
  2785  	swapTableName, err = schema.GenerateGCTableName(schema.HoldTableGCState, newGCTableRetainTime())
  2786  	if err != nil {
  2787  		return "", swapTableName, err
  2788  	}
  2789  	parsed := sqlparser.BuildParsedQuery(sqlSwapTables,
  2790  		tableName1, swapTableName,
  2791  		tableName2, tableName1,
  2792  		swapTableName, tableName2,
  2793  	)
  2794  	return parsed.Query, swapTableName, nil
  2795  }
  2796  
  2797  // renameTableIfApplicable renames a table, assuming it exists and that the target does not exist.
  2798  func (e *Executor) renameTableIfApplicable(ctx context.Context, fromTableName, toTableName string) (attemptMade bool, err error) {
  2799  	if fromTableName == "" {
  2800  		return false, nil
  2801  	}
  2802  	exists, err := e.tableExists(ctx, fromTableName)
  2803  	if err != nil {
  2804  		return false, err
  2805  	}
  2806  	if !exists {
  2807  		// can't rename from table when it does not exist
  2808  		return false, nil
  2809  	}
  2810  	exists, err = e.tableExists(ctx, toTableName)
  2811  	if err != nil {
  2812  		return false, err
  2813  	}
  2814  	if exists {
  2815  		// target table exists, abort.
  2816  		return false, nil
  2817  	}
  2818  	parsed := sqlparser.BuildParsedQuery(sqlRenameTable, fromTableName, toTableName)
  2819  	_, err = e.execQuery(ctx, parsed.Query)
  2820  	return true, err
  2821  }
  2822  
  2823  func (e *Executor) executeAlterViewOnline(ctx context.Context, onlineDDL *schema.OnlineDDL) (err error) {
  2824  	artifactViewName, err := schema.GenerateGCTableName(schema.HoldTableGCState, newGCTableRetainTime())
  2825  	if err != nil {
  2826  		return err
  2827  	}
  2828  	stmt, _, err := schema.ParseOnlineDDLStatement(onlineDDL.SQL)
  2829  	if err != nil {
  2830  		return err
  2831  	}
  2832  	switch viewStmt := stmt.(type) {
  2833  	case *sqlparser.CreateView:
  2834  		stmt.SetTable("", artifactViewName)
  2835  	case *sqlparser.AlterView:
  2836  		// consolidate the logic. We treat ALTER like we treat CREATE OR REPLACE
  2837  		// it actually easier for us to issue a CREATE OR REPLACE, because it
  2838  		// actually creates a view...
  2839  		stmt = &sqlparser.CreateView{
  2840  			Algorithm:   viewStmt.Algorithm,
  2841  			Definer:     viewStmt.Definer,
  2842  			Security:    viewStmt.Security,
  2843  			Columns:     viewStmt.Columns,
  2844  			Select:      viewStmt.Select,
  2845  			CheckOption: viewStmt.CheckOption,
  2846  			IsReplace:   true,
  2847  			Comments:    viewStmt.Comments,
  2848  		}
  2849  		stmt.SetTable("", artifactViewName)
  2850  	default:
  2851  		return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "executeAlterViewOnline only supports CreateView and AlterView statements. Got: %v", sqlparser.String(viewStmt))
  2852  	}
  2853  	artifactViewCreateSQL := sqlparser.String(stmt)
  2854  
  2855  	conn, err := dbconnpool.NewDBConnection(ctx, e.env.Config().DB.DbaWithDB())
  2856  	if err != nil {
  2857  		return err
  2858  	}
  2859  	defer conn.Close()
  2860  
  2861  	_ = e.onSchemaMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusRunning, false, progressPctStarted, etaSecondsUnknown, rowsCopiedUnknown, emptyHint)
  2862  
  2863  	if _, err := conn.ExecuteFetch(artifactViewCreateSQL, 0, false); err != nil {
  2864  		return err
  2865  	}
  2866  	if err := e.clearArtifacts(ctx, onlineDDL.UUID); err != nil {
  2867  		return err
  2868  	}
  2869  	if err := e.updateArtifacts(ctx, onlineDDL.UUID, artifactViewName); err != nil {
  2870  		return err
  2871  	}
  2872  
  2873  	// view created in requested format, but under different name. We now swap the views
  2874  	swapQuery, _, err := e.generateSwapTablesStatement(ctx, onlineDDL.Table, artifactViewName)
  2875  	if err != nil {
  2876  		return err
  2877  	}
  2878  	if _, err := conn.ExecuteFetch(swapQuery, 0, false); err != nil {
  2879  		return err
  2880  	}
  2881  	// Make sure this is considered as an ALTER.
  2882  	// Either the user issued a ALTER VIEW, and the action is trivially ALTER,
  2883  	// or the user issues a CREATE OR REPLACE, and the view existed, in which case this is implicitly an ALTER
  2884  	if err := e.updateDDLAction(ctx, onlineDDL.UUID, sqlparser.AlterStr); err != nil {
  2885  		return err
  2886  	}
  2887  
  2888  	_ = e.onSchemaMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusComplete, false, progressPctFull, etaSecondsNow, rowsCopiedUnknown, emptyHint)
  2889  
  2890  	return nil
  2891  }
  2892  
  2893  // addInstantAlgorithm adds or modifies the AlterTable's ALGORITHM to INSTANT
  2894  func (e *Executor) addInstantAlgorithm(alterTable *sqlparser.AlterTable) {
  2895  	instantOpt := sqlparser.AlgorithmValue("INSTANT")
  2896  	for i, opt := range alterTable.AlterOptions {
  2897  		if _, ok := opt.(sqlparser.AlgorithmValue); ok {
  2898  			// replace an existing algorithm
  2899  			alterTable.AlterOptions[i] = instantOpt
  2900  			return
  2901  		}
  2902  	}
  2903  	// append an algorithm
  2904  	alterTable.AlterOptions = append(alterTable.AlterOptions, instantOpt)
  2905  }
  2906  
  2907  // executeSpecialAlterDDLActionMigrationIfApplicable sees if the given migration can be executed via special execution path, that isn't a full blown online schema change process.
  2908  func (e *Executor) executeSpecialAlterDDLActionMigrationIfApplicable(ctx context.Context, onlineDDL *schema.OnlineDDL) (specialMigrationExecuted bool, err error) {
  2909  	// Before we jump on to strategies... Some ALTERs can be optimized without having to run through
  2910  	// a full online schema change process. Let's find out if this is the case!
  2911  	conn, err := dbconnpool.NewDBConnection(ctx, e.env.Config().DB.DbaWithDB())
  2912  	if err != nil {
  2913  		return false, err
  2914  	}
  2915  	defer conn.Close()
  2916  	_, capableOf, _ := mysql.GetFlavor(conn.ServerVersion, nil)
  2917  
  2918  	specialPlan, err := e.analyzeSpecialAlterPlan(ctx, onlineDDL, capableOf)
  2919  	if err != nil {
  2920  		return false, err
  2921  	}
  2922  	if specialPlan == nil {
  2923  		return false, nil
  2924  	}
  2925  
  2926  	switch specialPlan.operation {
  2927  	case instantDDLSpecialOperation:
  2928  		e.addInstantAlgorithm(specialPlan.alterTable)
  2929  		onlineDDL.SQL = sqlparser.CanonicalString(specialPlan.alterTable)
  2930  		if _, err := e.executeDirectly(ctx, onlineDDL); err != nil {
  2931  			return false, err
  2932  		}
  2933  	case dropRangePartitionSpecialOperation:
  2934  		dropPartition := func() error {
  2935  			artifactTableName, err := schema.GenerateGCTableName(schema.HoldTableGCState, newGCTableRetainTime())
  2936  			if err != nil {
  2937  				return err
  2938  			}
  2939  			if err := e.updateArtifacts(ctx, onlineDDL.UUID, artifactTableName); err != nil {
  2940  				return err
  2941  			}
  2942  
  2943  			// Apply CREATE TABLE for artifact table
  2944  			if _, err := e.createTableLike(ctx, artifactTableName, onlineDDL, conn); err != nil {
  2945  				return err
  2946  			}
  2947  			// Remove partitioning
  2948  			parsed := sqlparser.BuildParsedQuery(sqlAlterTableRemovePartitioning, artifactTableName)
  2949  			if _, err := conn.ExecuteFetch(parsed.Query, 0, false); err != nil {
  2950  				return err
  2951  			}
  2952  			// Exchange with partition
  2953  			partitionName := specialPlan.Detail("partition_name")
  2954  			parsed = sqlparser.BuildParsedQuery(sqlAlterTableExchangePartition, onlineDDL.Table, partitionName, artifactTableName)
  2955  			if _, err := conn.ExecuteFetch(parsed.Query, 0, false); err != nil {
  2956  				return err
  2957  			}
  2958  			// Drop table's partition
  2959  			parsed = sqlparser.BuildParsedQuery(sqlAlterTableDropPartition, onlineDDL.Table, partitionName)
  2960  			if _, err := conn.ExecuteFetch(parsed.Query, 0, false); err != nil {
  2961  				return err
  2962  			}
  2963  			return nil
  2964  		}
  2965  		if err := dropPartition(); err != nil {
  2966  			return false, err
  2967  		}
  2968  	case addRangePartitionSpecialOperation:
  2969  		if _, err := e.executeDirectly(ctx, onlineDDL); err != nil {
  2970  			return false, err
  2971  		}
  2972  	default:
  2973  		return false, nil
  2974  	}
  2975  	if err := e.updateMigrationSpecialPlan(ctx, onlineDDL.UUID, specialPlan.String()); err != nil {
  2976  		return true, err
  2977  	}
  2978  	_ = e.onSchemaMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusComplete, false, progressPctFull, etaSecondsNow, rowsCopiedUnknown, emptyHint)
  2979  	return true, nil
  2980  }
  2981  
  2982  // executeAlterDDLActionMigration
  2983  func (e *Executor) executeAlterDDLActionMigration(ctx context.Context, onlineDDL *schema.OnlineDDL) error {
  2984  	failMigration := func(err error) error {
  2985  		return e.failMigration(ctx, onlineDDL, err)
  2986  	}
  2987  	ddlStmt, _, err := schema.ParseOnlineDDLStatement(onlineDDL.SQL)
  2988  	if err != nil {
  2989  		return failMigration(err)
  2990  	}
  2991  	if _, isAlterView := ddlStmt.(*sqlparser.AlterView); isAlterView {
  2992  		// Same treatment for all online strategies
  2993  		exists, err := e.tableExists(ctx, onlineDDL.Table)
  2994  		if err != nil {
  2995  			return failMigration(err)
  2996  		}
  2997  		if !exists {
  2998  			// We cannot ALTER VIEW if the view does not exist. We could bail out directly here,
  2999  			// but we prefer to actually get an authentic MySQL error. We know MySQL will fail running
  3000  			// this statement.
  3001  			_, err := e.executeDirectly(ctx, onlineDDL)
  3002  			return failMigration(err)
  3003  		}
  3004  		// OK, view exists
  3005  		if err := e.executeAlterViewOnline(ctx, onlineDDL); err != nil {
  3006  			return failMigration(err)
  3007  		}
  3008  		return nil
  3009  	}
  3010  	// This is a real TABLE and not a VIEW
  3011  
  3012  	// Before we jump on to strategies... Some ALTERs can be optimized without having to run through
  3013  	// a full online schema change process. Let's find out if this is the case!
  3014  	specialMigrationExecuted, err := e.executeSpecialAlterDDLActionMigrationIfApplicable(ctx, onlineDDL)
  3015  	if err != nil {
  3016  		return failMigration(err)
  3017  	}
  3018  	if specialMigrationExecuted {
  3019  		return nil
  3020  	}
  3021  
  3022  	// OK, nothing special about this ALTER. Let's go ahead and execute it.
  3023  	switch onlineDDL.Strategy {
  3024  	case schema.DDLStrategyOnline, schema.DDLStrategyVitess:
  3025  		go func() {
  3026  			e.migrationMutex.Lock()
  3027  			defer e.migrationMutex.Unlock()
  3028  
  3029  			if err := e.ExecuteWithVReplication(ctx, onlineDDL, nil); err != nil {
  3030  				failMigration(err)
  3031  			}
  3032  		}()
  3033  	case schema.DDLStrategyGhost:
  3034  		go func() {
  3035  			e.migrationMutex.Lock()
  3036  			defer e.migrationMutex.Unlock()
  3037  
  3038  			if err := e.ExecuteWithGhost(ctx, onlineDDL); err != nil {
  3039  				failMigration(err)
  3040  			}
  3041  		}()
  3042  	case schema.DDLStrategyPTOSC:
  3043  		go func() {
  3044  			e.migrationMutex.Lock()
  3045  			defer e.migrationMutex.Unlock()
  3046  
  3047  			if err := e.ExecuteWithPTOSC(ctx, onlineDDL); err != nil {
  3048  				failMigration(err)
  3049  			}
  3050  		}()
  3051  	case schema.DDLStrategyMySQL:
  3052  		go func() {
  3053  			e.migrationMutex.Lock()
  3054  			defer e.migrationMutex.Unlock()
  3055  
  3056  			if _, err := e.executeDirectly(ctx, onlineDDL); err != nil {
  3057  				failMigration(err)
  3058  			}
  3059  		}()
  3060  	default:
  3061  		{
  3062  			return failMigration(fmt.Errorf("Unsupported strategy: %+v", onlineDDL.Strategy))
  3063  		}
  3064  	}
  3065  	return nil
  3066  }
  3067  
  3068  // executeMigration executes a single migration. It analyzes the migration type:
  3069  // - is it declarative?
  3070  // - is it CREATE / DROP / ALTER?
  3071  // - it is a Revert request?
  3072  // - what's the migration strategy?
  3073  // The function invokes the appropriate handlers for each of those cases.
  3074  func (e *Executor) executeMigration(ctx context.Context, onlineDDL *schema.OnlineDDL) error {
  3075  	defer e.triggerNextCheckInterval()
  3076  	failMigration := func(err error) error {
  3077  		return e.failMigration(ctx, onlineDDL, err)
  3078  	}
  3079  
  3080  	ddlAction, err := onlineDDL.GetAction()
  3081  	if err != nil {
  3082  		return failMigration(err)
  3083  	}
  3084  
  3085  	// See if this is a duplicate submission. A submission is considered duplicate if it has the exact same
  3086  	// migration context and DDL as a previous one. We are only interested in our scenario in a duplicate
  3087  	// whose predecessor is "complete". If this is the case, then we can mark our own migration as
  3088  	// implicitly "complete", too.
  3089  	{
  3090  		completedUUID, err := e.getCompletedMigrationByContextAndSQL(ctx, onlineDDL)
  3091  		if err != nil {
  3092  			return err
  3093  		}
  3094  		if completedUUID != "" {
  3095  			// Yep. We mark this migration as implicitly complete, and we're done with it!
  3096  			_ = e.onSchemaMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusComplete, false, progressPctFull, etaSecondsNow, rowsCopiedUnknown, emptyHint)
  3097  			_ = e.updateMigrationMessage(ctx, onlineDDL.UUID, fmt.Sprintf("duplicate DDL as %s for migration context %s", completedUUID, onlineDDL.MigrationContext))
  3098  			return nil
  3099  		}
  3100  	}
  3101  
  3102  	if onlineDDL.StrategySetting().IsDeclarative() {
  3103  		switch ddlAction {
  3104  		case sqlparser.RevertDDLAction:
  3105  			// No special action. Declarative Revert migrations are handled like any normal Revert migration.
  3106  		case sqlparser.AlterDDLAction:
  3107  			return failMigration(vterrors.Errorf(vtrpcpb.Code_UNIMPLEMENTED, "strategy is declarative. ALTER cannot run in declarative mode for migration %v", onlineDDL.UUID))
  3108  		case sqlparser.DropDDLAction:
  3109  			// This DROP is declarative, meaning it may:
  3110  			// - actually DROP a table, if that table exists, or
  3111  			// - Implicitly do nothing, if the table does not exist
  3112  			{
  3113  				// Sanity: reject IF NOT EXISTS statements, because they don't make sense (or are ambiguous) in declarative mode
  3114  				ddlStmt, _, err := schema.ParseOnlineDDLStatement(onlineDDL.SQL)
  3115  				if err != nil {
  3116  					return failMigration(err)
  3117  				}
  3118  				if ddlStmt.GetIfExists() {
  3119  					return failMigration(vterrors.Errorf(vtrpcpb.Code_UNIMPLEMENTED, "strategy is declarative. IF EXISTS does not work in declarative mode for migration %v", onlineDDL.UUID))
  3120  				}
  3121  			}
  3122  			exists, err := e.tableExists(ctx, onlineDDL.Table)
  3123  			if err != nil {
  3124  				return failMigration(err)
  3125  			}
  3126  			if exists {
  3127  				// table does exist, so this declarative DROP turns out to really be an actual DROP. No further action is needed here
  3128  			} else {
  3129  				// table does not exist. We mark this DROP as implicitly sucessful
  3130  				_ = e.onSchemaMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusComplete, false, progressPctFull, etaSecondsNow, rowsCopiedUnknown, emptyHint)
  3131  				_ = e.updateMigrationMessage(ctx, onlineDDL.UUID, "no change")
  3132  				return nil
  3133  			}
  3134  		case sqlparser.CreateDDLAction:
  3135  			// This CREATE is declarative, meaning it may:
  3136  			// - actually CREATE a table, if that table does not exist, or
  3137  			// - ALTER the table, if it exists and is different, or
  3138  			// - Implicitly do nothing, if the table exists and is identical to CREATE statement
  3139  
  3140  			// Sanity: reject IF NOT EXISTS statements, because they don't make sense (or are ambiguous) in declarative mode
  3141  			ddlStmt, _, err := schema.ParseOnlineDDLStatement(onlineDDL.SQL)
  3142  			if err != nil {
  3143  				return failMigration(err)
  3144  			}
  3145  			if ddlStmt.GetIfNotExists() {
  3146  				return failMigration(vterrors.Errorf(vtrpcpb.Code_UNIMPLEMENTED, "strategy is declarative. IF NOT EXISTS does not work in declarative mode for migration %v", onlineDDL.UUID))
  3147  			}
  3148  			if ddlStmt.GetIsReplace() {
  3149  				return failMigration(vterrors.Errorf(vtrpcpb.Code_UNIMPLEMENTED, "strategy is declarative. OR REPLACE does not work in declarative mode for migration %v", onlineDDL.UUID))
  3150  			}
  3151  
  3152  			exists, err := e.tableExists(ctx, onlineDDL.Table)
  3153  			if err != nil {
  3154  				return failMigration(err)
  3155  			}
  3156  			if exists {
  3157  				diff, err := e.evaluateDeclarativeDiff(ctx, onlineDDL)
  3158  				if err != nil {
  3159  					return failMigration(err)
  3160  				}
  3161  				if diff == nil || diff.IsEmpty() {
  3162  					// No diff! We mark this CREATE as implicitly sucessful
  3163  					_ = e.onSchemaMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusComplete, false, progressPctFull, etaSecondsNow, rowsCopiedUnknown, emptyHint)
  3164  					_ = e.updateMigrationMessage(ctx, onlineDDL.UUID, "no change")
  3165  					return nil
  3166  				}
  3167  				// alterClause is non empty. We convert this migration into an ALTER
  3168  				if err := e.updateDDLAction(ctx, onlineDDL.UUID, sqlparser.AlterStr); err != nil {
  3169  					return failMigration(err)
  3170  				}
  3171  				if createViewStmt, isCreateView := ddlStmt.(*sqlparser.CreateView); isCreateView {
  3172  					// Rewrite as CREATE OR REPLACE
  3173  					// this will be handled later on.
  3174  					createViewStmt.IsReplace = true
  3175  					onlineDDL.SQL = sqlparser.String(createViewStmt)
  3176  				} else {
  3177  					// a TABLE
  3178  					ddlAction = sqlparser.AlterDDLAction
  3179  					onlineDDL.SQL = diff.CanonicalStatementString()
  3180  				}
  3181  				_ = e.updateMigrationMessage(ctx, onlineDDL.UUID, diff.CanonicalStatementString())
  3182  			} else {
  3183  				{
  3184  					// table does not exist, so this declarative CREATE turns out to really be an actual CREATE. No further action is needed here.
  3185  					// the statement is empty, but I want to keep the 'else' clause here just for sake of this comment.
  3186  				}
  3187  			}
  3188  		}
  3189  	} // endif onlineDDL.IsDeclarative()
  3190  	// Noting that if the migration is declarative, then it may have been modified in the above block, to meet the next operations.
  3191  
  3192  	switch ddlAction {
  3193  	case sqlparser.DropDDLAction:
  3194  		go func() error {
  3195  			return e.executeDropDDLActionMigration(ctx, onlineDDL)
  3196  		}()
  3197  	case sqlparser.CreateDDLAction:
  3198  		go func() error {
  3199  			return e.executeCreateDDLActionMigration(ctx, onlineDDL)
  3200  		}()
  3201  	case sqlparser.AlterDDLAction:
  3202  		return e.executeAlterDDLActionMigration(ctx, onlineDDL)
  3203  	case sqlparser.RevertDDLAction:
  3204  		go func() {
  3205  			e.migrationMutex.Lock()
  3206  			defer e.migrationMutex.Unlock()
  3207  
  3208  			if err := e.executeRevert(ctx, onlineDDL); err != nil {
  3209  				failMigration(err)
  3210  			}
  3211  		}()
  3212  	}
  3213  	return nil
  3214  }
  3215  
  3216  // runNextMigration picks up to one 'ready' migration that is able to run, and executes it.
  3217  // Possible scenarios:
  3218  // - no migration is in 'ready' state -- nothing to be done
  3219  // - a migration is 'ready', but conflicts with other running migrations -- try another 'ready' migration
  3220  // - multiple migrations are 'ready' -- we just handle one here
  3221  // Note that per the above breakdown, and due to potential conflicts, it is possible to have one or
  3222  // more 'ready' migration, and still none is executed.
  3223  func (e *Executor) runNextMigration(ctx context.Context) error {
  3224  	e.migrationMutex.Lock()
  3225  	defer e.migrationMutex.Unlock()
  3226  
  3227  	if !e.reviewedRunningMigrationsFlag {
  3228  		// Since Open(), we havent's once executed reviewRunningMigrations() successfully.
  3229  		// This means we may not have a good picture of what is actually running. Perhaps there's
  3230  		// a vreplication migration from a pre-PRS/ERS that we still need to learn about?
  3231  		// We're going to be careful here, and avoid running new migrations until we have
  3232  		// a better picture. It will likely take a couple seconds till next iteration.
  3233  		// This delay only takes place shortly after Open().
  3234  		return nil
  3235  	}
  3236  
  3237  	// getNonConflictingMigration finds a single 'ready' migration which does not conflict with running migrations.
  3238  	// Conflicts are:
  3239  	// - a migration is 'ready' but is not set to run _concurrently_, and there's a running migration that is also non-concurrent
  3240  	// - a migration is 'ready' but there's another migration 'running' on the exact same table
  3241  	getNonConflictingMigration := func() (*schema.OnlineDDL, error) {
  3242  		pendingMigrationsUUIDs, err := e.readPendingMigrationsUUIDs(ctx)
  3243  		if err != nil {
  3244  			return nil, err
  3245  		}
  3246  		r, err := e.execQuery(ctx, sqlSelectReadyMigrations)
  3247  		if err != nil {
  3248  			return nil, err
  3249  		}
  3250  		for _, row := range r.Named().Rows {
  3251  			uuid := row["migration_uuid"].ToString()
  3252  			onlineDDL, migrationRow, err := e.readMigration(ctx, uuid)
  3253  			if err != nil {
  3254  				return nil, err
  3255  			}
  3256  			isImmediateOperation := migrationRow.AsBool("is_immediate_operation", false)
  3257  
  3258  			if conflictFound, _ := e.isAnyConflictingMigrationRunning(onlineDDL); conflictFound {
  3259  				continue // this migration conflicts with a running one
  3260  			}
  3261  			if e.countOwnedRunningMigrations() >= maxConcurrentOnlineDDLs {
  3262  				continue // too many running migrations
  3263  			}
  3264  			if isImmediateOperation && onlineDDL.StrategySetting().IsInOrderCompletion() {
  3265  				// This migration is immediate: if we run it now, it will complete within a second or two at most.
  3266  				if len(pendingMigrationsUUIDs) > 0 && pendingMigrationsUUIDs[0] != onlineDDL.UUID {
  3267  					continue
  3268  				}
  3269  			}
  3270  			// This migration seems good to go
  3271  			return onlineDDL, err
  3272  		}
  3273  		// no non-conflicting migration found...
  3274  		// Either all ready migrations are conflicting, or there are no ready migrations...
  3275  		return nil, nil
  3276  	}
  3277  	onlineDDL, err := getNonConflictingMigration()
  3278  	if err != nil {
  3279  		return err
  3280  	}
  3281  	if onlineDDL == nil {
  3282  		// nothing to do
  3283  		return nil
  3284  	}
  3285  	{
  3286  		// We strip out any VT query comments because our simplified parser doesn't work well with comments
  3287  		ddlStmt, _, err := schema.ParseOnlineDDLStatement(onlineDDL.SQL)
  3288  		if err == nil {
  3289  			ddlStmt.SetComments(sqlparser.Comments{})
  3290  			onlineDDL.SQL = sqlparser.String(ddlStmt)
  3291  		}
  3292  	}
  3293  	log.Infof("Executor.runNextMigration: migration %s is non conflicting and will be executed next", onlineDDL.UUID)
  3294  	e.executeMigration(ctx, onlineDDL)
  3295  	return nil
  3296  }
  3297  
  3298  // isPTOSCMigrationRunning sees if pt-online-schema-change is running a specific migration,
  3299  // by examining its PID file
  3300  func (e *Executor) isPTOSCMigrationRunning(ctx context.Context, uuid string) (isRunning bool, pid int, err error) {
  3301  	// Try and read its PID file:
  3302  	content, err := os.ReadFile(e.ptPidFileName(uuid))
  3303  	if err != nil {
  3304  		// file probably does not exist (migration not running)
  3305  		// or any other issue --> we can't confirm that the migration is actually running
  3306  		return false, pid, err
  3307  	}
  3308  	contentString := strings.TrimSpace(string(content))
  3309  	//
  3310  	pid, err = strconv.Atoi(contentString)
  3311  	if err != nil {
  3312  		// can't get the PID right. Can't confirm migration is running.
  3313  		return false, pid, err
  3314  	}
  3315  	p, err := os.FindProcess(pid)
  3316  	if err != nil {
  3317  		// can't find the process. Can't confirm migration is running.
  3318  		return false, pid, err
  3319  	}
  3320  	err = p.Signal(syscall.Signal(0))
  3321  	if err != nil {
  3322  		// can't verify process is running. Can't confirm migration is running.
  3323  		return false, pid, err
  3324  	}
  3325  	// AHA! We are able to confirm this pt-osc migration is actually running!
  3326  	return true, pid, nil
  3327  }
  3328  
  3329  // dropOnlineDDLUser drops the given ddl user account at the end of migration
  3330  func (e *Executor) dropPTOSCMigrationTriggers(ctx context.Context, onlineDDL *schema.OnlineDDL) error {
  3331  	conn, err := dbconnpool.NewDBConnection(ctx, e.env.Config().DB.DbaConnector())
  3332  	if err != nil {
  3333  		return err
  3334  	}
  3335  	defer conn.Close()
  3336  
  3337  	parsed := sqlparser.BuildParsedQuery(sqlSelectPTOSCMigrationTriggers, ":mysql_schema", ":mysql_table")
  3338  	bindVars := map[string]*querypb.BindVariable{
  3339  		"mysql_schema": sqltypes.StringBindVariable(onlineDDL.Schema),
  3340  		"mysql_table":  sqltypes.StringBindVariable(onlineDDL.Table),
  3341  	}
  3342  	bound, err := parsed.GenerateQuery(bindVars, nil)
  3343  	if err != nil {
  3344  		return err
  3345  	}
  3346  	r, err := e.execQuery(ctx, bound)
  3347  	if err != nil {
  3348  		return err
  3349  	}
  3350  	for _, row := range r.Named().Rows {
  3351  		// iterate pt-osc triggers and drop them
  3352  		triggerSchema := row.AsString("trigger_schema", "")
  3353  		triggerName := row.AsString("trigger_name", "")
  3354  
  3355  		dropParsed := sqlparser.BuildParsedQuery(sqlDropTrigger, triggerSchema, triggerName)
  3356  		if _, err := conn.ExecuteFetch(dropParsed.Query, 0, false); err != nil {
  3357  			return err
  3358  		}
  3359  	}
  3360  
  3361  	return err
  3362  }
  3363  
  3364  // readVReplStream reads _vt.vreplication entries for given workflow
  3365  func (e *Executor) readVReplStream(ctx context.Context, uuid string, okIfMissing bool) (*VReplStream, error) {
  3366  	query, err := sqlparser.ParseAndBind(sqlReadVReplStream,
  3367  		sqltypes.StringBindVariable(uuid),
  3368  	)
  3369  	if err != nil {
  3370  		return nil, err
  3371  	}
  3372  	r, err := e.execQuery(ctx, query)
  3373  	if err != nil {
  3374  		return nil, err
  3375  	}
  3376  	if len(r.Rows) == 0 && okIfMissing {
  3377  		return nil, nil
  3378  	}
  3379  	row := r.Named().Row()
  3380  	if row == nil {
  3381  		return nil, vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "Cannot find unique workflow for UUID: %+v", uuid)
  3382  	}
  3383  	s := &VReplStream{
  3384  		id:                   row.AsInt64("id", 0),
  3385  		workflow:             row.AsString("workflow", ""),
  3386  		source:               row.AsString("source", ""),
  3387  		pos:                  row.AsString("pos", ""),
  3388  		timeUpdated:          row.AsInt64("time_updated", 0),
  3389  		timeHeartbeat:        row.AsInt64("time_heartbeat", 0),
  3390  		timeThrottled:        row.AsInt64("time_throttled", 0),
  3391  		componentThrottled:   row.AsString("component_throttled", ""),
  3392  		transactionTimestamp: row.AsInt64("transaction_timestamp", 0),
  3393  		state:                row.AsString("state", ""),
  3394  		message:              row.AsString("message", ""),
  3395  		rowsCopied:           row.AsInt64("rows_copied", 0),
  3396  		bls:                  &binlogdatapb.BinlogSource{},
  3397  	}
  3398  	if err := prototext.Unmarshal([]byte(s.source), s.bls); err != nil {
  3399  		return nil, err
  3400  	}
  3401  	return s, nil
  3402  }
  3403  
  3404  // isVReplMigrationReadyToCutOver sees if the vreplication migration has completed the row copy
  3405  // and is up to date with the binlogs.
  3406  func (e *Executor) isVReplMigrationReadyToCutOver(ctx context.Context, s *VReplStream) (isReady bool, err error) {
  3407  	// Check all the cases where migration is still running:
  3408  	{
  3409  		// when ready to cut-over, pos must have some value
  3410  		if s.pos == "" {
  3411  			return false, nil
  3412  		}
  3413  	}
  3414  	{
  3415  		// Both time_updated and transaction_timestamp must be in close priximity to each
  3416  		// other and to the time now, otherwise that means we're lagging and it's not a good time
  3417  		// to cut-over
  3418  		durationDiff := func(t1, t2 time.Time) time.Duration {
  3419  			diff := t1.Sub(t2)
  3420  			if diff < 0 {
  3421  				diff = -diff
  3422  			}
  3423  			return diff
  3424  		}
  3425  		timeNow := time.Now()
  3426  		timeUpdated := time.Unix(s.timeUpdated, 0)
  3427  		if durationDiff(timeNow, timeUpdated) > vreplicationCutOverThreshold {
  3428  			return false, nil
  3429  		}
  3430  		// Let's look at transaction timestamp. This gets written by any ongoing
  3431  		// writes on the server (whether on this table or any other table)
  3432  		transactionTimestamp := time.Unix(s.transactionTimestamp, 0)
  3433  		if durationDiff(timeNow, transactionTimestamp) > vreplicationCutOverThreshold {
  3434  			return false, nil
  3435  		}
  3436  	}
  3437  	{
  3438  		// copy_state must have no entries for this vreplication id: if entries are
  3439  		// present that means copy is still in progress
  3440  		query, err := sqlparser.ParseAndBind(sqlReadCountCopyState,
  3441  			sqltypes.Int64BindVariable(s.id),
  3442  		)
  3443  		if err != nil {
  3444  			return false, err
  3445  		}
  3446  		r, err := e.execQuery(ctx, query)
  3447  		if err != nil {
  3448  			return false, err
  3449  		}
  3450  		csRow := r.Named().Row()
  3451  		if csRow == nil {
  3452  			return false, err
  3453  		}
  3454  		count := csRow.AsInt64("cnt", 0)
  3455  		if count > 0 {
  3456  			// Still copying
  3457  			return false, nil
  3458  		}
  3459  	}
  3460  
  3461  	return true, nil
  3462  }
  3463  
  3464  // isVReplMigrationRunning sees if there is a VReplication migration actively running
  3465  func (e *Executor) isVReplMigrationRunning(ctx context.Context, uuid string) (isRunning bool, s *VReplStream, err error) {
  3466  	s, err = e.readVReplStream(ctx, uuid, true)
  3467  	if err != nil {
  3468  		return false, s, err
  3469  	}
  3470  	if s == nil {
  3471  		return false, s, nil
  3472  	}
  3473  	switch s.state {
  3474  	case binlogplayer.BlpError:
  3475  		return false, s, nil
  3476  	case binlogplayer.VReplicationInit, binlogplayer.VReplicationCopying, binlogplayer.BlpRunning:
  3477  		return true, s, nil
  3478  	}
  3479  	if strings.Contains(strings.ToLower(s.message), "error") {
  3480  		return false, s, nil
  3481  	}
  3482  	return false, s, nil
  3483  }
  3484  
  3485  // reviewRunningMigrations iterates migrations in 'running' state. Normally there's only one running, which was
  3486  // spawned by this tablet; but vreplication migrations could also resume from failure.
  3487  func (e *Executor) reviewRunningMigrations(ctx context.Context) (countRunnning int, cancellable []*cancellableMigration, err error) {
  3488  	e.migrationMutex.Lock()
  3489  	defer e.migrationMutex.Unlock()
  3490  
  3491  	if atomic.LoadInt64(&e.isOpen) == 0 {
  3492  		return countRunnning, cancellable, nil
  3493  	}
  3494  
  3495  	var currentUserThrottleRatio float64
  3496  	if err := e.lagThrottler.CheckIsReady(); err == nil {
  3497  		// No point in reviewing throttler info if it's not enabled&open
  3498  		for _, app := range e.lagThrottler.ThrottledApps() {
  3499  			if app.AppName == throttlerOnlineDDLApp {
  3500  				currentUserThrottleRatio = app.Ratio
  3501  				break
  3502  			}
  3503  		}
  3504  	}
  3505  
  3506  	var throttlerOnce sync.Once
  3507  	r, err := e.execQuery(ctx, sqlSelectRunningMigrations)
  3508  	if err != nil {
  3509  		return countRunnning, cancellable, err
  3510  	}
  3511  	pendingMigrationsUUIDs, err := e.readPendingMigrationsUUIDs(ctx)
  3512  	if err != nil {
  3513  		return countRunnning, cancellable, err
  3514  	}
  3515  	uuidsFoundRunning := map[string]bool{}
  3516  	for _, row := range r.Named().Rows {
  3517  		uuid := row["migration_uuid"].ToString()
  3518  		onlineDDL, migrationRow, err := e.readMigration(ctx, uuid)
  3519  		if err != nil {
  3520  			return countRunnning, cancellable, err
  3521  		}
  3522  		postponeCompletion := row.AsBool("postpone_completion", false)
  3523  		elapsedSeconds := row.AsInt64("elapsed_seconds", 0)
  3524  
  3525  		if stowawayTable := row.AsString("stowaway_table", ""); stowawayTable != "" {
  3526  			// whoa
  3527  			// stowawayTable is an original table stowed away while cutting over a vrepl migration, see call to cutOverVReplMigration() down below in this function.
  3528  			// In a normal operation, the table should not exist outside the scope of cutOverVReplMigration
  3529  			// If it exists, that means a tablet crashed while running a cut-over, and left the database in a bad state, where the migrated table does not exist.
  3530  			// thankfully, we have tracked this situation and just realized what happened. Now, first thing to do is to restore the original table.
  3531  			log.Infof("found stowaway table %s journal in migration %s for table %s", stowawayTable, uuid, onlineDDL.Table)
  3532  			attemptMade, err := e.renameTableIfApplicable(ctx, stowawayTable, onlineDDL.Table)
  3533  			if err != nil {
  3534  				// unable to restore table; we bail out, and we will try again next round.
  3535  				return countRunnning, cancellable, err
  3536  			}
  3537  			// success
  3538  			if attemptMade {
  3539  				log.Infof("stowaway table %s restored back into %s", stowawayTable, onlineDDL.Table)
  3540  			} else {
  3541  				log.Infof("stowaway table %s did not exist and there was no need to restore it", stowawayTable)
  3542  			}
  3543  			// OK good, table restored. We can remove the record.
  3544  			if err := e.updateMigrationStowawayTable(ctx, uuid, ""); err != nil {
  3545  				return countRunnning, cancellable, err
  3546  			}
  3547  		}
  3548  
  3549  		uuidsFoundRunning[uuid] = true
  3550  
  3551  		_ = e.updateMigrationUserThrottleRatio(ctx, uuid, currentUserThrottleRatio)
  3552  		switch onlineDDL.StrategySetting().Strategy {
  3553  		case schema.DDLStrategyOnline, schema.DDLStrategyVitess:
  3554  			{
  3555  				// We check the _vt.vreplication table
  3556  				s, err := e.readVReplStream(ctx, uuid, true)
  3557  				if err != nil {
  3558  					return countRunnning, cancellable, err
  3559  				}
  3560  				isVreplicationTestSuite := onlineDDL.StrategySetting().IsVreplicationTestSuite()
  3561  				if isVreplicationTestSuite {
  3562  					e.triggerNextCheckInterval()
  3563  				}
  3564  				if s == nil {
  3565  					continue
  3566  				}
  3567  				// Let's see if vreplication indicates an error. Many errors are recoverable, and
  3568  				// we do not wish to fail on first sight. We will use LastError to repeatedly
  3569  				// check if this error persists, until finally, after some timeout, we give up.
  3570  				if _, ok := e.vreplicationLastError[uuid]; !ok {
  3571  					e.vreplicationLastError[uuid] = vterrors.NewLastError(
  3572  						fmt.Sprintf("Online DDL migration %v", uuid),
  3573  						staleMigrationMinutes*time.Minute,
  3574  					)
  3575  				}
  3576  				lastError := e.vreplicationLastError[uuid]
  3577  				isTerminal, vreplError := s.hasError()
  3578  				lastError.Record(vreplError)
  3579  				if isTerminal || !lastError.ShouldRetry() {
  3580  					cancellable = append(cancellable, newCancellableMigration(uuid, s.message))
  3581  				}
  3582  				if s.isRunning() {
  3583  					// This VRepl migration may have started from outside this tablet, so
  3584  					// this executor may not own the migration _yet_. We make sure to own it.
  3585  					// VReplication migrations are unique in this respect: we are able to complete
  3586  					// a vreplicaiton migration started by another tablet.
  3587  					e.ownedRunningMigrations.Store(uuid, onlineDDL)
  3588  					if lastVitessLivenessIndicator := migrationRow.AsInt64("vitess_liveness_indicator", 0); lastVitessLivenessIndicator < s.livenessTimeIndicator() {
  3589  						_ = e.updateMigrationTimestamp(ctx, "liveness_timestamp", uuid)
  3590  						_ = e.updateVitessLivenessIndicator(ctx, uuid, s.livenessTimeIndicator())
  3591  					}
  3592  					if onlineDDL.TabletAlias != e.TabletAliasString() {
  3593  						_ = e.updateMigrationTablet(ctx, uuid)
  3594  						log.Infof("migration %s adopted by tablet %s", uuid, e.TabletAliasString())
  3595  					}
  3596  					_ = e.updateRowsCopied(ctx, uuid, s.rowsCopied)
  3597  					_ = e.updateMigrationProgressByRowsCopied(ctx, uuid, s.rowsCopied)
  3598  					_ = e.updateMigrationETASecondsByProgress(ctx, uuid)
  3599  					_ = e.updateMigrationLastThrottled(ctx, uuid, s.timeThrottled, s.componentThrottled)
  3600  
  3601  					isReady, err := e.isVReplMigrationReadyToCutOver(ctx, s)
  3602  					if err != nil {
  3603  						_ = e.updateMigrationMessage(ctx, uuid, err.Error())
  3604  						return countRunnning, cancellable, err
  3605  					}
  3606  					if isReady && isVreplicationTestSuite {
  3607  						// This is a endtoend test suite execution. We intentionally delay it by at least
  3608  						// vreplicationTestSuiteWaitSeconds
  3609  						if elapsedSeconds < vreplicationTestSuiteWaitSeconds {
  3610  							isReady = false
  3611  						}
  3612  					}
  3613  					// Indicate to outside observers whether the migration is generally ready to complete.
  3614  					// In the case of a postponed migration, we will not complete it, but the user will
  3615  					// understand whether "now is a good time" or "not there yet"
  3616  					_ = e.updateMigrationReadyToComplete(ctx, uuid, isReady)
  3617  					if postponeCompletion {
  3618  						// override. Even if migration is ready, we do not complete it.
  3619  						isReady = false
  3620  					}
  3621  					if isReady && onlineDDL.StrategySetting().IsInOrderCompletion() {
  3622  						if len(pendingMigrationsUUIDs) > 0 && pendingMigrationsUUIDs[0] != onlineDDL.UUID {
  3623  							// wait for earlier pending migrations to complete
  3624  							isReady = false
  3625  						}
  3626  					}
  3627  					if isReady {
  3628  						if err := e.cutOverVReplMigration(ctx, s); err != nil {
  3629  							_ = e.updateMigrationMessage(ctx, uuid, err.Error())
  3630  							log.Errorf("cutOverVReplMigration failed: err=%v", err)
  3631  							if merr, ok := err.(*mysql.SQLError); ok {
  3632  								switch merr.Num {
  3633  								case mysql.ERTooLongIdent:
  3634  									go e.CancelMigration(ctx, uuid, err.Error(), false)
  3635  								}
  3636  							}
  3637  							return countRunnning, cancellable, err
  3638  						}
  3639  					}
  3640  					go throttlerOnce.Do(func() {
  3641  						if e.lagThrottler.CheckIsReady() != nil {
  3642  							return
  3643  						}
  3644  						// Self healing: in the following scenario:
  3645  						// - a vitess migration
  3646  						// - with on demand heartbeats
  3647  						// - the streamer running on a replica
  3648  						// - the streamer was throttled for long enough
  3649  						// - then vplayer and vcopier are locked, waiting for the streamer to do something
  3650  						// - since they are blocked, they're not running throttler checks
  3651  						// - since streamer runs on replica, it only checks that replica
  3652  						// - therefore no one asking for on-demand heartbeats
  3653  						// - then, if the conditions for the streamer's throttling are done, the streamer then thinks there's replication lag, with nothing to remediate it.
  3654  						// - it's a deadlock.
  3655  						// And so, once per reviewRunningMigrations(), and assuming there _are_ running migrations, we ensure to hit a throttler check. This will kick
  3656  						// on-demand heartbeats, unlocking the deadlock.
  3657  						e.lagThrottler.CheckByType(ctx, throttlerOnlineDDLApp, "", throttleCheckFlags, throttle.ThrottleCheckPrimaryWrite)
  3658  					})
  3659  				}
  3660  			}
  3661  		case schema.DDLStrategyPTOSC:
  3662  			{
  3663  				// Since pt-osc doesn't have a "liveness" plugin entry point, we do it externally:
  3664  				// if the process is alive, we update the `liveness_timestamp` for this migration.
  3665  				running, _, err := e.isPTOSCMigrationRunning(ctx, uuid)
  3666  				if err != nil {
  3667  					return countRunnning, cancellable, err
  3668  				}
  3669  				if running {
  3670  					_ = e.updateMigrationTimestamp(ctx, "liveness_timestamp", uuid)
  3671  				}
  3672  				if _, ok := e.ownedRunningMigrations.Load(uuid); !ok {
  3673  					// Ummm, the migration is running but we don't own it. This means the migration
  3674  					// is rogue. Maybe executed by another tablet. Anyway, if we don't own it, we can't
  3675  					// complete the migration. Even if it runs, the logic around announcing it as complete
  3676  					// is missing. So we may as well cancel it.
  3677  					message := fmt.Sprintf("cancelling a pt-osc running migration %s which is not owned (not started, or is assumed to be terminated) by this executor", uuid)
  3678  					cancellable = append(cancellable, newCancellableMigration(uuid, message))
  3679  				}
  3680  			}
  3681  		case schema.DDLStrategyGhost:
  3682  			{
  3683  				if _, ok := e.ownedRunningMigrations.Load(uuid); !ok {
  3684  					// Ummm, the migration is running but we don't own it. This means the migration
  3685  					// is rogue. Maybe executed by another tablet. Anyway, if we don't own it, we can't
  3686  					// complete the migration. Even if it runs, the logic around announcing it as complete
  3687  					// is missing. So we may as well cancel it.
  3688  					message := fmt.Sprintf("cancelling a gh-ost running migration %s which is not owned (not started, or is assumed to be terminated) by this executor", uuid)
  3689  					cancellable = append(cancellable, newCancellableMigration(uuid, message))
  3690  				}
  3691  			}
  3692  		}
  3693  		countRunnning++
  3694  	}
  3695  	{
  3696  		// now, let's look at UUIDs we own and _think_ should be running, and see which of tham _isn't_ actually running or pending...
  3697  		uuidsFoundPending := map[string]bool{}
  3698  		for _, uuid := range pendingMigrationsUUIDs {
  3699  			uuidsFoundPending[uuid] = true
  3700  		}
  3701  
  3702  		e.ownedRunningMigrations.Range(func(k, _ any) bool {
  3703  			uuid, ok := k.(string)
  3704  			if !ok {
  3705  				return true
  3706  			}
  3707  			// due to race condition, it's possible that ownedRunningMigrations will list a migration
  3708  			// that is _just about to run_ but is still, in fact, in `ready` state. This is fine.
  3709  			// If we find such a migration, we do nothing. We're only looking for migrations we really
  3710  			// don't have any information of.
  3711  			if !uuidsFoundRunning[uuid] && !uuidsFoundPending[uuid] {
  3712  				log.Infof("removing migration %s from ownedRunningMigrations because it's not running and not pending", uuid)
  3713  				e.ownedRunningMigrations.Delete(uuid)
  3714  			}
  3715  			return true
  3716  		})
  3717  	}
  3718  
  3719  	e.reviewedRunningMigrationsFlag = true
  3720  	return countRunnning, cancellable, nil
  3721  }
  3722  
  3723  // reviewStaleMigrations marks as 'failed' migrations whose status is 'running' but which have
  3724  // shown no liveness in past X minutes. It also attempts to terminate them
  3725  func (e *Executor) reviewStaleMigrations(ctx context.Context) error {
  3726  	e.migrationMutex.Lock()
  3727  	defer e.migrationMutex.Unlock()
  3728  
  3729  	query, err := sqlparser.ParseAndBind(sqlSelectStaleMigrations,
  3730  		sqltypes.Int64BindVariable(staleMigrationMinutes),
  3731  	)
  3732  	if err != nil {
  3733  		return err
  3734  	}
  3735  	r, err := e.execQuery(ctx, query)
  3736  	if err != nil {
  3737  		return err
  3738  	}
  3739  	for _, row := range r.Named().Rows {
  3740  		uuid := row["migration_uuid"].ToString()
  3741  
  3742  		onlineDDL, _, err := e.readMigration(ctx, uuid)
  3743  		if err != nil {
  3744  			return err
  3745  		}
  3746  		log.Infof("reviewStaleMigrations: stale migration found: %s", onlineDDL.UUID)
  3747  		message := fmt.Sprintf("stale migration %s: found running but indicates no liveness in the past %v minutes", onlineDDL.UUID, staleMigrationMinutes)
  3748  		if onlineDDL.TabletAlias != e.TabletAliasString() {
  3749  			// This means another tablet started the migration, and the migration has failed due to the tablet failure (e.g. primary failover)
  3750  			if err := e.updateTabletFailure(ctx, onlineDDL.UUID); err != nil {
  3751  				return err
  3752  			}
  3753  			message = fmt.Sprintf("%s; executed by different tablet %s", message, onlineDDL.TabletAlias)
  3754  		}
  3755  		if _, err := e.terminateMigration(ctx, onlineDDL); err != nil {
  3756  			message = fmt.Sprintf("error terminating migration (%v): %v", message, err)
  3757  			e.updateMigrationMessage(ctx, onlineDDL.UUID, message)
  3758  			continue // we still want to handle rest of migrations
  3759  		}
  3760  		if err := e.updateMigrationMessage(ctx, onlineDDL.UUID, message); err != nil {
  3761  			return err
  3762  		}
  3763  		if err := e.updateMigrationStatus(ctx, onlineDDL.UUID, schema.OnlineDDLStatusFailed); err != nil {
  3764  			return err
  3765  		}
  3766  		defer e.triggerNextCheckInterval()
  3767  		_ = e.updateMigrationStartedTimestamp(ctx, uuid)
  3768  		// Because the migration is stale, it may not update completed_timestamp. It is essential to set completed_timestamp
  3769  		// as this is then used when cleaning artifacts
  3770  		if err := e.updateMigrationTimestamp(ctx, "completed_timestamp", onlineDDL.UUID); err != nil {
  3771  			return err
  3772  		}
  3773  	}
  3774  
  3775  	return nil
  3776  }
  3777  
  3778  // retryTabletFailureMigrations looks for migrations failed by tablet failure (e.g. by failover)
  3779  // and retry them (put them back in the queue)
  3780  func (e *Executor) retryTabletFailureMigrations(ctx context.Context) error {
  3781  	_, err := e.retryMigrationWhere(ctx, sqlWhereTabletFailure)
  3782  	return err
  3783  }
  3784  
  3785  func (e *Executor) tabletManagerClient() tmclient.TabletManagerClient {
  3786  	return tmclient.NewTabletManagerClient()
  3787  }
  3788  
  3789  // vreplicationExec runs a vreplication query, and makes sure to initialize vreplication
  3790  func (e *Executor) vreplicationExec(ctx context.Context, tablet *topodatapb.Tablet, query string) (*querypb.QueryResult, error) {
  3791  	tmClient := e.tabletManagerClient()
  3792  	defer tmClient.Close()
  3793  
  3794  	return tmClient.VReplicationExec(ctx, tablet, query)
  3795  }
  3796  
  3797  // reloadSchema issues a ReloadSchema on this tablet
  3798  func (e *Executor) reloadSchema(ctx context.Context) error {
  3799  	tmClient := e.tabletManagerClient()
  3800  	defer tmClient.Close()
  3801  
  3802  	tablet, err := e.ts.GetTablet(ctx, e.tabletAlias)
  3803  	if err != nil {
  3804  		return err
  3805  	}
  3806  	return tmClient.ReloadSchema(ctx, tablet.Tablet, "")
  3807  }
  3808  
  3809  // deleteVReplicationEntry cleans up a _vt.vreplication entry; this function is called as part of
  3810  // migration termination and as part of artifact cleanup
  3811  func (e *Executor) deleteVReplicationEntry(ctx context.Context, uuid string) error {
  3812  	query, err := sqlparser.ParseAndBind(sqlDeleteVReplStream,
  3813  		sqltypes.StringBindVariable(e.dbName),
  3814  		sqltypes.StringBindVariable(uuid),
  3815  	)
  3816  	if err != nil {
  3817  		return err
  3818  	}
  3819  	tablet, err := e.ts.GetTablet(ctx, e.tabletAlias)
  3820  	if err != nil {
  3821  		return err
  3822  	}
  3823  
  3824  	if _, err := e.vreplicationExec(ctx, tablet.Tablet, query); err != nil {
  3825  		return err
  3826  	}
  3827  	return nil
  3828  }
  3829  
  3830  // gcArtifactTable garbage-collects a single table
  3831  func (e *Executor) gcArtifactTable(ctx context.Context, artifactTable, uuid string, t time.Time) (string, error) {
  3832  	tableExists, err := e.tableExists(ctx, artifactTable)
  3833  	if err != nil {
  3834  		return "", err
  3835  	}
  3836  	if !tableExists {
  3837  		return "", nil
  3838  	}
  3839  	// We've already concluded in gcArtifacts() that this table was held for long enough.
  3840  	// We therefore move it into PURGE state.
  3841  	renameStatement, toTableName, err := schema.GenerateRenameStatementWithUUID(artifactTable, schema.PurgeTableGCState, schema.OnlineDDLToGCUUID(uuid), t)
  3842  	if err != nil {
  3843  		return toTableName, err
  3844  	}
  3845  	_, err = e.execQuery(ctx, renameStatement)
  3846  	return toTableName, err
  3847  }
  3848  
  3849  // gcArtifacts garbage-collects migration artifacts from completed/failed migrations
  3850  func (e *Executor) gcArtifacts(ctx context.Context) error {
  3851  	e.migrationMutex.Lock()
  3852  	defer e.migrationMutex.Unlock()
  3853  
  3854  	if _, err := e.execQuery(ctx, sqlFixCompletedTimestamp); err != nil {
  3855  		// This query fixes a bug where stale migrations were marked as 'failed' without updating 'completed_timestamp'
  3856  		// see https://github.com/vitessio/vitess/issues/8499
  3857  		// Running this query retroactively sets completed_timestamp
  3858  		// This 'if' clause can be removed in version v13
  3859  		return err
  3860  	}
  3861  	query, err := sqlparser.ParseAndBind(sqlSelectUncollectedArtifacts,
  3862  		sqltypes.Int64BindVariable(int64((retainOnlineDDLTables).Seconds())),
  3863  	)
  3864  	if err != nil {
  3865  		return err
  3866  	}
  3867  	r, err := e.execQuery(ctx, query)
  3868  	if err != nil {
  3869  		return err
  3870  	}
  3871  	for _, row := range r.Named().Rows {
  3872  		uuid := row["migration_uuid"].ToString()
  3873  		artifacts := row["artifacts"].ToString()
  3874  		logPath := row["log_path"].ToString()
  3875  
  3876  		log.Infof("Executor.gcArtifacts: will GC artifacts for migration %s", uuid)
  3877  		// Remove tables:
  3878  		artifactTables := textutil.SplitDelimitedList(artifacts)
  3879  
  3880  		timeNow := time.Now()
  3881  		for i, artifactTable := range artifactTables {
  3882  			// We wish to generate distinct timestamp values for each table in this UUID,
  3883  			// because all tables will be renamed as _something_UUID_timestamp. Since UUID
  3884  			// is shared for all artifacts in this loop, we differentiate via timestamp
  3885  			log.Infof("Executor.gcArtifacts: will GC artifact %s for migration %s", artifactTable, uuid)
  3886  			t := timeNow.Add(time.Duration(i) * time.Second).UTC()
  3887  			toTableName, err := e.gcArtifactTable(ctx, artifactTable, uuid, t)
  3888  			if err != nil {
  3889  				return vterrors.Wrapf(err, "in gcArtifacts() for %s", artifactTable)
  3890  			}
  3891  			log.Infof("Executor.gcArtifacts: renamed away artifact %s to %s", artifactTable, toTableName)
  3892  		}
  3893  
  3894  		// Remove logs:
  3895  		{
  3896  			// logPath is in 'hostname:/path/to/logs' format
  3897  			tokens := strings.SplitN(logPath, ":", 2)
  3898  			logPath = tokens[len(tokens)-1]
  3899  			if err := os.RemoveAll(logPath); err != nil {
  3900  				return err
  3901  			}
  3902  		}
  3903  
  3904  		// while the next function only applies to 'online' strategy ALTER and REVERT, there is no
  3905  		// harm in invoking it for other migrations.
  3906  		if err := e.deleteVReplicationEntry(ctx, uuid); err != nil {
  3907  			return err
  3908  		}
  3909  
  3910  		if err := e.updateMigrationTimestamp(ctx, "cleanup_timestamp", uuid); err != nil {
  3911  			return err
  3912  		}
  3913  		log.Infof("Executor.gcArtifacts: done migration %s", uuid)
  3914  	}
  3915  
  3916  	return nil
  3917  }
  3918  
  3919  // onMigrationCheckTick runs all migrations life cycle
  3920  func (e *Executor) onMigrationCheckTick() {
  3921  	// This function can be called by multiple triggers. First, there's the normal ticker.
  3922  	// Then, any time a migration completes, we set a timer to trigger this function.
  3923  	// also, any time a new INSERT arrives, we set a timer to trigger this function.
  3924  	// Some of these may be correlated. To avoid spamming of this function we:
  3925  	// - ensure the function is non-reentrant, using tickReentranceFlag
  3926  	// - clean up tickReentranceFlag 1 second after function completes; this throttles calls to
  3927  	//   this function at no more than 1/sec rate.
  3928  	if atomic.CompareAndSwapInt64(&e.tickReentranceFlag, 0, 1) {
  3929  		defer time.AfterFunc(time.Second, func() { atomic.StoreInt64(&e.tickReentranceFlag, 0) })
  3930  	} else {
  3931  		// An instance of this function is already running
  3932  		return
  3933  	}
  3934  
  3935  	if e.tabletTypeFunc() != topodatapb.TabletType_PRIMARY {
  3936  		return
  3937  	}
  3938  	if e.keyspace == "" {
  3939  		log.Errorf("Executor.onMigrationCheckTick(): empty keyspace")
  3940  		return
  3941  	}
  3942  
  3943  	ctx := context.Background()
  3944  	if err := e.retryTabletFailureMigrations(ctx); err != nil {
  3945  		log.Error(err)
  3946  	}
  3947  	if err := e.reviewQueuedMigrations(ctx); err != nil {
  3948  		log.Error(err)
  3949  	}
  3950  	if err := e.scheduleNextMigration(ctx); err != nil {
  3951  		log.Error(err)
  3952  	}
  3953  	if err := e.runNextMigration(ctx); err != nil {
  3954  		log.Error(err)
  3955  	}
  3956  	if _, cancellable, err := e.reviewRunningMigrations(ctx); err != nil {
  3957  		log.Error(err)
  3958  	} else if err := e.cancelMigrations(ctx, cancellable, false); err != nil {
  3959  		log.Error(err)
  3960  	}
  3961  	if err := e.reviewStaleMigrations(ctx); err != nil {
  3962  		log.Error(err)
  3963  	}
  3964  	if err := e.gcArtifacts(ctx); err != nil {
  3965  		log.Error(err)
  3966  	}
  3967  }
  3968  
  3969  func (e *Executor) updateMigrationStartedTimestamp(ctx context.Context, uuid string) error {
  3970  	parsed := sqlparser.BuildParsedQuery(sqlUpdateMigrationStartedTimestamp,
  3971  		":migration_uuid",
  3972  	)
  3973  	bindVars := map[string]*querypb.BindVariable{
  3974  		"migration_uuid": sqltypes.StringBindVariable(uuid),
  3975  	}
  3976  	bound, err := parsed.GenerateQuery(bindVars, nil)
  3977  	if err != nil {
  3978  		return err
  3979  	}
  3980  	_, err = e.execQuery(ctx, bound)
  3981  	if err != nil {
  3982  		log.Errorf("FAIL updateMigrationStartedTimestamp: uuid=%s, error=%v", uuid, err)
  3983  	}
  3984  	return err
  3985  }
  3986  
  3987  func (e *Executor) updateMigrationTimestamp(ctx context.Context, timestampColumn string, uuid string) error {
  3988  	parsed := sqlparser.BuildParsedQuery(sqlUpdateMigrationTimestamp, timestampColumn,
  3989  		":migration_uuid",
  3990  	)
  3991  	bindVars := map[string]*querypb.BindVariable{
  3992  		"migration_uuid": sqltypes.StringBindVariable(uuid),
  3993  	}
  3994  	bound, err := parsed.GenerateQuery(bindVars, nil)
  3995  	if err != nil {
  3996  		return err
  3997  	}
  3998  	_, err = e.execQuery(ctx, bound)
  3999  	if err != nil {
  4000  		log.Errorf("FAIL updateMigrationStartedTimestamp: uuid=%s, timestampColumn=%v, error=%v", uuid, timestampColumn, err)
  4001  	}
  4002  	return err
  4003  }
  4004  
  4005  func (e *Executor) updateMigrationLogPath(ctx context.Context, uuid string, hostname, logPath string) error {
  4006  	logFile := path.Join(logPath, migrationLogFileName)
  4007  	hostLogPath := fmt.Sprintf("%s:%s", hostname, logPath)
  4008  	query, err := sqlparser.ParseAndBind(sqlUpdateMigrationLogPath,
  4009  		sqltypes.StringBindVariable(hostLogPath),
  4010  		sqltypes.StringBindVariable(logFile),
  4011  		sqltypes.StringBindVariable(uuid),
  4012  	)
  4013  	if err != nil {
  4014  		return err
  4015  	}
  4016  	_, err = e.execQuery(ctx, query)
  4017  	return err
  4018  }
  4019  
  4020  func (e *Executor) updateArtifacts(ctx context.Context, uuid string, artifacts ...string) error {
  4021  	bindArtifacts := strings.Join(artifacts, ",")
  4022  	query, err := sqlparser.ParseAndBind(sqlUpdateArtifacts,
  4023  		sqltypes.StringBindVariable(bindArtifacts),
  4024  		sqltypes.StringBindVariable(uuid),
  4025  	)
  4026  	if err != nil {
  4027  		return err
  4028  	}
  4029  	_, err = e.execQuery(ctx, query)
  4030  	return err
  4031  }
  4032  
  4033  func (e *Executor) clearArtifacts(ctx context.Context, uuid string) error {
  4034  	query, err := sqlparser.ParseAndBind(sqlClearArtifacts,
  4035  		sqltypes.StringBindVariable(uuid),
  4036  	)
  4037  	if err != nil {
  4038  		return err
  4039  	}
  4040  	_, err = e.execQuery(ctx, query)
  4041  	return err
  4042  }
  4043  
  4044  func (e *Executor) updateMigrationSpecialPlan(ctx context.Context, uuid string, specialPlan string) error {
  4045  	query, err := sqlparser.ParseAndBind(sqlUpdateSpecialPlan,
  4046  		sqltypes.StringBindVariable(specialPlan),
  4047  		sqltypes.StringBindVariable(uuid),
  4048  	)
  4049  	if err != nil {
  4050  		return err
  4051  	}
  4052  	_, err = e.execQuery(ctx, query)
  4053  	return err
  4054  }
  4055  
  4056  func (e *Executor) updateMigrationStage(ctx context.Context, uuid string, stage string, args ...interface{}) error {
  4057  	msg := fmt.Sprintf(stage, args...)
  4058  	log.Infof("updateMigrationStage: uuid=%s, stage=%s", uuid, msg)
  4059  	query, err := sqlparser.ParseAndBind(sqlUpdateStage,
  4060  		sqltypes.StringBindVariable(msg),
  4061  		sqltypes.StringBindVariable(uuid),
  4062  	)
  4063  	if err != nil {
  4064  		return err
  4065  	}
  4066  	_, err = e.execQuery(ctx, query)
  4067  	return err
  4068  }
  4069  
  4070  func (e *Executor) incrementCutoverAttempts(ctx context.Context, uuid string) error {
  4071  	query, err := sqlparser.ParseAndBind(sqlIncrementCutoverAttempts,
  4072  		sqltypes.StringBindVariable(uuid),
  4073  	)
  4074  	if err != nil {
  4075  		return err
  4076  	}
  4077  	_, err = e.execQuery(ctx, query)
  4078  	return err
  4079  }
  4080  
  4081  // updateMigrationTablet sets 'tablet' column to be this executor's tablet alias for given migration
  4082  func (e *Executor) updateMigrationTablet(ctx context.Context, uuid string) error {
  4083  	query, err := sqlparser.ParseAndBind(sqlUpdateTablet,
  4084  		sqltypes.StringBindVariable(e.TabletAliasString()),
  4085  		sqltypes.StringBindVariable(uuid),
  4086  	)
  4087  	if err != nil {
  4088  		return err
  4089  	}
  4090  	_, err = e.execQuery(ctx, query)
  4091  	return err
  4092  }
  4093  
  4094  // updateTabletFailure marks a given migration as "tablet_failed"
  4095  func (e *Executor) updateTabletFailure(ctx context.Context, uuid string) error {
  4096  	parsed := sqlparser.BuildParsedQuery(sqlUpdateTabletFailure,
  4097  		":migration_uuid",
  4098  	)
  4099  	bindVars := map[string]*querypb.BindVariable{
  4100  		"migration_uuid": sqltypes.StringBindVariable(uuid),
  4101  	}
  4102  	bound, err := parsed.GenerateQuery(bindVars, nil)
  4103  	if err != nil {
  4104  		return err
  4105  	}
  4106  	_, err = e.execQuery(ctx, bound)
  4107  	return err
  4108  }
  4109  
  4110  func (e *Executor) updateMigrationStatusFailedOrCancelled(ctx context.Context, uuid string) error {
  4111  	log.Infof("updateMigrationStatus: transitioning migration: %s into status failed or cancelled", uuid)
  4112  	query, err := sqlparser.ParseAndBind(sqlUpdateMigrationStatusFailedOrCancelled,
  4113  		sqltypes.StringBindVariable(uuid),
  4114  	)
  4115  	if err != nil {
  4116  		return err
  4117  	}
  4118  	_, err = e.execQuery(ctx, query)
  4119  	return err
  4120  }
  4121  
  4122  func (e *Executor) updateMigrationStatus(ctx context.Context, uuid string, status schema.OnlineDDLStatus) error {
  4123  	log.Infof("updateMigrationStatus: transitioning migration: %s into status: %s", uuid, string(status))
  4124  	query, err := sqlparser.ParseAndBind(sqlUpdateMigrationStatus,
  4125  		sqltypes.StringBindVariable(string(status)),
  4126  		sqltypes.StringBindVariable(uuid),
  4127  	)
  4128  	if err != nil {
  4129  		return err
  4130  	}
  4131  	_, err = e.execQuery(ctx, query)
  4132  	if err != nil {
  4133  		log.Errorf("FAIL updateMigrationStatus: uuid=%s, query=%v, error=%v", uuid, query, err)
  4134  	}
  4135  	return err
  4136  }
  4137  
  4138  func (e *Executor) updateDDLAction(ctx context.Context, uuid string, actionStr string) error {
  4139  	query, err := sqlparser.ParseAndBind(sqlUpdateDDLAction,
  4140  		sqltypes.StringBindVariable(actionStr),
  4141  		sqltypes.StringBindVariable(uuid),
  4142  	)
  4143  	if err != nil {
  4144  		return err
  4145  	}
  4146  	_, err = e.execQuery(ctx, query)
  4147  	return err
  4148  }
  4149  
  4150  func (e *Executor) updateMigrationMessage(ctx context.Context, uuid string, message string) error {
  4151  	log.Infof("updateMigrationMessage: uuid=%s, message=%s", uuid, message)
  4152  
  4153  	maxlen := 16383
  4154  	update := func(message string) error {
  4155  		if len(message) > maxlen {
  4156  			message = message[0:maxlen]
  4157  		}
  4158  		message = strings.ToValidUTF8(message, "�")
  4159  		query, err := sqlparser.ParseAndBind(sqlUpdateMessage,
  4160  			sqltypes.StringBindVariable(message),
  4161  			sqltypes.StringBindVariable(uuid),
  4162  		)
  4163  		if err != nil {
  4164  			return err
  4165  		}
  4166  		_, err = e.execQuery(ctx, query)
  4167  		return err
  4168  	}
  4169  	err := update(message)
  4170  	if err != nil {
  4171  		// If, for some reason, we're unable to update the error message, let's write a generic message
  4172  		err = update("unable to update with original migration error message")
  4173  	}
  4174  	return err
  4175  }
  4176  
  4177  func (e *Executor) updateSchemaAnalysis(ctx context.Context, uuid string,
  4178  	addedUniqueKeys, removedUnqiueKeys int, removedUniqueKeyNames string,
  4179  	droppedNoDefaultColumnNames string, expandedColumnNames string,
  4180  	revertibleNotes string) error {
  4181  	query, err := sqlparser.ParseAndBind(sqlUpdateSchemaAnalysis,
  4182  		sqltypes.Int64BindVariable(int64(addedUniqueKeys)),
  4183  		sqltypes.Int64BindVariable(int64(removedUnqiueKeys)),
  4184  		sqltypes.StringBindVariable(removedUniqueKeyNames),
  4185  		sqltypes.StringBindVariable(droppedNoDefaultColumnNames),
  4186  		sqltypes.StringBindVariable(expandedColumnNames),
  4187  		sqltypes.StringBindVariable(revertibleNotes),
  4188  		sqltypes.StringBindVariable(uuid),
  4189  	)
  4190  	if err != nil {
  4191  		return err
  4192  	}
  4193  	_, err = e.execQuery(ctx, query)
  4194  	return err
  4195  }
  4196  
  4197  func (e *Executor) updateMySQLTable(ctx context.Context, uuid string, tableName string) error {
  4198  	query, err := sqlparser.ParseAndBind(sqlUpdateMySQLTable,
  4199  		sqltypes.StringBindVariable(tableName),
  4200  		sqltypes.StringBindVariable(uuid),
  4201  	)
  4202  	if err != nil {
  4203  		return err
  4204  	}
  4205  	_, err = e.execQuery(ctx, query)
  4206  	return err
  4207  }
  4208  
  4209  func (e *Executor) updateMigrationETASeconds(ctx context.Context, uuid string, etaSeconds int64) error {
  4210  	query, err := sqlparser.ParseAndBind(sqlUpdateMigrationETASeconds,
  4211  		sqltypes.Int64BindVariable(etaSeconds),
  4212  		sqltypes.StringBindVariable(uuid),
  4213  	)
  4214  	if err != nil {
  4215  		return err
  4216  	}
  4217  	_, err = e.execQuery(ctx, query)
  4218  	return err
  4219  }
  4220  
  4221  func (e *Executor) updateMigrationProgress(ctx context.Context, uuid string, progress float64) error {
  4222  	if progress <= 0 {
  4223  		// progress starts at 0, and can only increase.
  4224  		// A value of "0" either means "This is the actual current progress" or "No information"
  4225  		// In both cases there's nothing to update
  4226  		return nil
  4227  	}
  4228  	query, err := sqlparser.ParseAndBind(sqlUpdateMigrationProgress,
  4229  		sqltypes.Float64BindVariable(progress),
  4230  		sqltypes.StringBindVariable(uuid),
  4231  	)
  4232  	if err != nil {
  4233  		return err
  4234  	}
  4235  	_, err = e.execQuery(ctx, query)
  4236  	return err
  4237  }
  4238  
  4239  func (e *Executor) updateMigrationProgressByRowsCopied(ctx context.Context, uuid string, rowsCopied int64) error {
  4240  	query, err := sqlparser.ParseAndBind(sqlUpdateMigrationProgressByRowsCopied,
  4241  		sqltypes.Int64BindVariable(rowsCopied),
  4242  		sqltypes.StringBindVariable(uuid),
  4243  	)
  4244  	if err != nil {
  4245  		return err
  4246  	}
  4247  	_, err = e.execQuery(ctx, query)
  4248  	return err
  4249  }
  4250  
  4251  func (e *Executor) updateMigrationETASecondsByProgress(ctx context.Context, uuid string) error {
  4252  	query, err := sqlparser.ParseAndBind(sqlUpdateMigrationETASecondsByProgress,
  4253  		sqltypes.StringBindVariable(uuid),
  4254  	)
  4255  	if err != nil {
  4256  		return err
  4257  	}
  4258  	_, err = e.execQuery(ctx, query)
  4259  	return err
  4260  }
  4261  
  4262  func (e *Executor) updateMigrationLastThrottled(ctx context.Context, uuid string, lastThrottledUnixTime int64, throttledCompnent string) error {
  4263  	query, err := sqlparser.ParseAndBind(sqlUpdateLastThrottled,
  4264  		sqltypes.Int64BindVariable(lastThrottledUnixTime),
  4265  		sqltypes.StringBindVariable(throttledCompnent),
  4266  		sqltypes.StringBindVariable(uuid),
  4267  	)
  4268  	if err != nil {
  4269  		return err
  4270  	}
  4271  	_, err = e.execQuery(ctx, query)
  4272  	return err
  4273  }
  4274  
  4275  func (e *Executor) updateMigrationTableRows(ctx context.Context, uuid string, tableRows int64) error {
  4276  	query, err := sqlparser.ParseAndBind(sqlUpdateMigrationTableRows,
  4277  		sqltypes.Int64BindVariable(tableRows),
  4278  		sqltypes.StringBindVariable(uuid),
  4279  	)
  4280  	if err != nil {
  4281  		return err
  4282  	}
  4283  	_, err = e.execQuery(ctx, query)
  4284  	return err
  4285  }
  4286  
  4287  func (e *Executor) updateRowsCopied(ctx context.Context, uuid string, rowsCopied int64) error {
  4288  	if rowsCopied <= 0 {
  4289  		// Number of rows can only be positive. Zero or negative must mean "no information" and
  4290  		// we don't update the table value.
  4291  		return nil
  4292  	}
  4293  	query, err := sqlparser.ParseAndBind(sqlUpdateMigrationRowsCopied,
  4294  		sqltypes.Int64BindVariable(rowsCopied),
  4295  		sqltypes.StringBindVariable(uuid),
  4296  	)
  4297  	if err != nil {
  4298  		return err
  4299  	}
  4300  	_, err = e.execQuery(ctx, query)
  4301  	return err
  4302  }
  4303  
  4304  func (e *Executor) updateVitessLivenessIndicator(ctx context.Context, uuid string, livenessIndicator int64) error {
  4305  	query, err := sqlparser.ParseAndBind(sqlUpdateMigrationVitessLivenessIndicator,
  4306  		sqltypes.Int64BindVariable(livenessIndicator),
  4307  		sqltypes.StringBindVariable(uuid),
  4308  	)
  4309  	if err != nil {
  4310  		return err
  4311  	}
  4312  	_, err = e.execQuery(ctx, query)
  4313  	return err
  4314  }
  4315  
  4316  func (e *Executor) updateMigrationIsView(ctx context.Context, uuid string, isView bool) error {
  4317  	query, err := sqlparser.ParseAndBind(sqlUpdateMigrationIsView,
  4318  		sqltypes.BoolBindVariable(isView),
  4319  		sqltypes.StringBindVariable(uuid),
  4320  	)
  4321  	if err != nil {
  4322  		return err
  4323  	}
  4324  	_, err = e.execQuery(ctx, query)
  4325  	return err
  4326  }
  4327  
  4328  func (e *Executor) updateMigrationSetImmediateOperation(ctx context.Context, uuid string) error {
  4329  	query, err := sqlparser.ParseAndBind(sqlUpdateMigrationSetImmediateOperation,
  4330  		sqltypes.StringBindVariable(uuid),
  4331  	)
  4332  	if err != nil {
  4333  		return err
  4334  	}
  4335  	_, err = e.execQuery(ctx, query)
  4336  	return err
  4337  }
  4338  
  4339  func (e *Executor) updateMigrationReadyToComplete(ctx context.Context, uuid string, isReady bool) error {
  4340  	query, err := sqlparser.ParseAndBind(sqlUpdateMigrationReadyToComplete,
  4341  		sqltypes.BoolBindVariable(isReady),
  4342  		sqltypes.StringBindVariable(uuid),
  4343  	)
  4344  	if err != nil {
  4345  		return err
  4346  	}
  4347  	if _, err := e.execQuery(ctx, query); err != nil {
  4348  		return err
  4349  	}
  4350  	if val, ok := e.ownedRunningMigrations.Load(uuid); ok {
  4351  		if runningMigration, ok := val.(*schema.OnlineDDL); ok {
  4352  			var storeValue int64
  4353  			if isReady {
  4354  				storeValue = 1
  4355  			}
  4356  			atomic.StoreInt64(&runningMigration.ReadyToComplete, storeValue)
  4357  		}
  4358  	}
  4359  	return nil
  4360  }
  4361  
  4362  func (e *Executor) updateMigrationStowawayTable(ctx context.Context, uuid string, tableName string) error {
  4363  	query, err := sqlparser.ParseAndBind(sqlUpdateMigrationStowawayTable,
  4364  		sqltypes.StringBindVariable(tableName),
  4365  		sqltypes.StringBindVariable(uuid),
  4366  	)
  4367  	if err != nil {
  4368  		return err
  4369  	}
  4370  	_, err = e.execQuery(ctx, query)
  4371  	return err
  4372  }
  4373  
  4374  func (e *Executor) updateMigrationUserThrottleRatio(ctx context.Context, uuid string, ratio float64) error {
  4375  	query, err := sqlparser.ParseAndBind(sqlUpdateMigrationUserThrottleRatio,
  4376  		sqltypes.Float64BindVariable(ratio),
  4377  		sqltypes.StringBindVariable(uuid),
  4378  	)
  4379  	if err != nil {
  4380  		return err
  4381  	}
  4382  	_, err = e.execQuery(ctx, query)
  4383  	return err
  4384  }
  4385  
  4386  // retryMigrationWhere retries a migration based on a given WHERE clause
  4387  func (e *Executor) retryMigrationWhere(ctx context.Context, whereExpr string) (result *sqltypes.Result, err error) {
  4388  	e.migrationMutex.Lock()
  4389  	defer e.migrationMutex.Unlock()
  4390  	parsed := sqlparser.BuildParsedQuery(sqlRetryMigrationWhere, ":tablet", whereExpr)
  4391  	bindVars := map[string]*querypb.BindVariable{
  4392  		"tablet": sqltypes.StringBindVariable(e.TabletAliasString()),
  4393  	}
  4394  	bound, err := parsed.GenerateQuery(bindVars, nil)
  4395  	if err != nil {
  4396  		return nil, err
  4397  	}
  4398  	result, err = e.execQuery(ctx, bound)
  4399  	return result, err
  4400  }
  4401  
  4402  // RetryMigration marks given migration for retry
  4403  func (e *Executor) RetryMigration(ctx context.Context, uuid string) (result *sqltypes.Result, err error) {
  4404  	if atomic.LoadInt64(&e.isOpen) == 0 {
  4405  		return nil, vterrors.New(vtrpcpb.Code_FAILED_PRECONDITION, "online ddl is disabled")
  4406  	}
  4407  	if !schema.IsOnlineDDLUUID(uuid) {
  4408  		return nil, vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "Not a valid migration ID in RETRY: %s", uuid)
  4409  	}
  4410  	e.migrationMutex.Lock()
  4411  	defer e.migrationMutex.Unlock()
  4412  
  4413  	query, err := sqlparser.ParseAndBind(sqlRetryMigration,
  4414  		sqltypes.StringBindVariable(e.TabletAliasString()),
  4415  		sqltypes.StringBindVariable(uuid),
  4416  	)
  4417  	if err != nil {
  4418  		return nil, err
  4419  	}
  4420  	defer e.triggerNextCheckInterval()
  4421  	return e.execQuery(ctx, query)
  4422  }
  4423  
  4424  // CleanupMigration sets migration is ready for artifact cleanup. Artifacts are not immediately deleted:
  4425  // all we do is set retain_artifacts_seconds to a very small number (it's actually a negative) so that the
  4426  // next iteration of gcArtifacts() picks up the migration's artifacts and schedules them for deletion
  4427  func (e *Executor) CleanupMigration(ctx context.Context, uuid string) (result *sqltypes.Result, err error) {
  4428  	if atomic.LoadInt64(&e.isOpen) == 0 {
  4429  		return nil, vterrors.New(vtrpcpb.Code_FAILED_PRECONDITION, "online ddl is disabled")
  4430  	}
  4431  	if !schema.IsOnlineDDLUUID(uuid) {
  4432  		return nil, vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "Not a valid migration ID in CLEANUP: %s", uuid)
  4433  	}
  4434  	log.Infof("CleanupMigration: request to cleanup migration %s", uuid)
  4435  	e.migrationMutex.Lock()
  4436  	defer e.migrationMutex.Unlock()
  4437  
  4438  	query, err := sqlparser.ParseAndBind(sqlUpdateReadyForCleanup,
  4439  		sqltypes.StringBindVariable(uuid),
  4440  	)
  4441  	if err != nil {
  4442  		return nil, err
  4443  	}
  4444  	rs, err := e.execQuery(ctx, query)
  4445  	if err != nil {
  4446  		return nil, err
  4447  	}
  4448  	log.Infof("CleanupMigration: migration %s marked as ready to clean up", uuid)
  4449  	return rs, nil
  4450  }
  4451  
  4452  // CompleteMigration clears the postpone_completion flag for a given migration, assuming it was set in the first place
  4453  func (e *Executor) CompleteMigration(ctx context.Context, uuid string) (result *sqltypes.Result, err error) {
  4454  	if atomic.LoadInt64(&e.isOpen) == 0 {
  4455  		return nil, vterrors.New(vtrpcpb.Code_FAILED_PRECONDITION, "online ddl is disabled")
  4456  	}
  4457  	if !schema.IsOnlineDDLUUID(uuid) {
  4458  		return nil, vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "Not a valid migration ID in COMPLETE: %s", uuid)
  4459  	}
  4460  	log.Infof("CompleteMigration: request to complete migration %s", uuid)
  4461  
  4462  	e.migrationMutex.Lock()
  4463  	defer e.migrationMutex.Unlock()
  4464  
  4465  	query, err := sqlparser.ParseAndBind(sqlUpdateCompleteMigration,
  4466  		sqltypes.StringBindVariable(uuid),
  4467  	)
  4468  	if err != nil {
  4469  		return nil, err
  4470  	}
  4471  	defer e.triggerNextCheckInterval()
  4472  	if err := e.deleteGhostPostponeFlagFile(uuid); err != nil {
  4473  		// This should work without error even if the migration is not a gh-ost migration, and even
  4474  		// if the file does not exist. An error here indicates a general system error of sorts.
  4475  		return nil, err
  4476  	}
  4477  	rs, err := e.execQuery(ctx, query)
  4478  	if err != nil {
  4479  		return nil, err
  4480  	}
  4481  	log.Infof("CompleteMigration: migration %s marked as unpostponed", uuid)
  4482  	return rs, nil
  4483  }
  4484  
  4485  // CompletePendingMigrations completes all pending migrations (that are expected to run or are running)
  4486  // for this keyspace
  4487  func (e *Executor) CompletePendingMigrations(ctx context.Context) (result *sqltypes.Result, err error) {
  4488  	if atomic.LoadInt64(&e.isOpen) == 0 {
  4489  		return nil, vterrors.New(vtrpcpb.Code_FAILED_PRECONDITION, "online ddl is disabled")
  4490  	}
  4491  
  4492  	uuids, err := e.readPendingMigrationsUUIDs(ctx)
  4493  	if err != nil {
  4494  		return result, err
  4495  	}
  4496  	log.Infof("CompletePendingMigrations: iterating %v migrations %s", len(uuids))
  4497  
  4498  	result = &sqltypes.Result{}
  4499  	for _, uuid := range uuids {
  4500  		log.Infof("CompletePendingMigrations: completing %s", uuid)
  4501  		res, err := e.CompleteMigration(ctx, uuid)
  4502  		if err != nil {
  4503  			return result, err
  4504  		}
  4505  		result.AppendResult(res)
  4506  	}
  4507  	log.Infof("CompletePendingMigrations: done iterating %v migrations %s", len(uuids))
  4508  	return result, nil
  4509  }
  4510  
  4511  // LaunchMigration clears the postpone_launch flag for a given migration, assuming it was set in the first place
  4512  func (e *Executor) LaunchMigration(ctx context.Context, uuid string, shardsArg string) (result *sqltypes.Result, err error) {
  4513  	if atomic.LoadInt64(&e.isOpen) == 0 {
  4514  		return nil, vterrors.New(vtrpcpb.Code_FAILED_PRECONDITION, "online ddl is disabled")
  4515  	}
  4516  	if !schema.IsOnlineDDLUUID(uuid) {
  4517  		return nil, vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "Not a valid migration ID in EXECUTE: %s", uuid)
  4518  	}
  4519  	if !e.matchesShards(shardsArg) {
  4520  		// Does not apply  to this shard!
  4521  		return &sqltypes.Result{}, nil
  4522  	}
  4523  	log.Infof("LaunchMigration: request to execute migration %s", uuid)
  4524  
  4525  	e.migrationMutex.Lock()
  4526  	defer e.migrationMutex.Unlock()
  4527  
  4528  	query, err := sqlparser.ParseAndBind(sqlUpdateLaunchMigration,
  4529  		sqltypes.StringBindVariable(uuid),
  4530  	)
  4531  	if err != nil {
  4532  		return nil, err
  4533  	}
  4534  	defer e.triggerNextCheckInterval()
  4535  	rs, err := e.execQuery(ctx, query)
  4536  	if err != nil {
  4537  		return nil, err
  4538  	}
  4539  	log.Infof("LaunchMigration: migration %s marked as unpostponed", uuid)
  4540  	return rs, nil
  4541  }
  4542  
  4543  // LaunchMigrations launches all launch-postponed queued migrations for this keyspace
  4544  func (e *Executor) LaunchMigrations(ctx context.Context) (result *sqltypes.Result, err error) {
  4545  	if atomic.LoadInt64(&e.isOpen) == 0 {
  4546  		return nil, vterrors.New(vtrpcpb.Code_FAILED_PRECONDITION, "online ddl is disabled")
  4547  	}
  4548  
  4549  	uuids, err := e.readPendingMigrationsUUIDs(ctx)
  4550  	if err != nil {
  4551  		return result, err
  4552  	}
  4553  	r, err := e.execQuery(ctx, sqlSelectQueuedMigrations)
  4554  	if err != nil {
  4555  		return result, err
  4556  	}
  4557  	rows := r.Named().Rows
  4558  	log.Infof("LaunchMigrations: iterating %v migrations %s", len(rows))
  4559  	result = &sqltypes.Result{}
  4560  	for _, row := range rows {
  4561  		uuid := row["migration_uuid"].ToString()
  4562  		log.Infof("LaunchMigrations: unpostponing %s", uuid)
  4563  		res, err := e.LaunchMigration(ctx, uuid, "")
  4564  		if err != nil {
  4565  			return result, err
  4566  		}
  4567  		result.AppendResult(res)
  4568  	}
  4569  	log.Infof("LaunchMigrations: done iterating %v migrations %s", len(uuids))
  4570  	return result, nil
  4571  }
  4572  
  4573  func (e *Executor) submittedMigrationConflictsWithPendingMigrationInSingletonContext(
  4574  	ctx context.Context, submittedMigration, pendingOnlineDDL *schema.OnlineDDL,
  4575  ) bool {
  4576  	if pendingOnlineDDL.MigrationContext == submittedMigration.MigrationContext {
  4577  		// same migration context. this is obviously allowed
  4578  		return false
  4579  	}
  4580  	// Let's see if the pending migration is a revert:
  4581  	if _, err := pendingOnlineDDL.GetRevertUUID(); err != nil {
  4582  		// Not a revert. So the pending migration definitely conflicts with our migration.
  4583  		return true
  4584  	}
  4585  
  4586  	// The pending migration is a revert
  4587  	if !pendingOnlineDDL.StrategySetting().IsSingletonContext() {
  4588  		// Aha! So, our "conflict" is with a REVERT migration, which does _not_ have a -singleton-context
  4589  		// flag. Because we want to allow REVERT migrations to run as concurrently as possible, we allow this scenario.
  4590  		return false
  4591  	}
  4592  	return true
  4593  }
  4594  
  4595  // submitCallbackIfNonConflicting is called internally by SubmitMigration, and is given a callack to execute
  4596  // if the given migration does not conflict any terms. Specifically, this function looks for singleton or
  4597  // singleton-context conflicts.
  4598  // The call back can be an insertion of a new migration, or a retry of an existing migration, or whatnot.
  4599  func (e *Executor) submitCallbackIfNonConflicting(
  4600  	ctx context.Context,
  4601  	onlineDDL *schema.OnlineDDL,
  4602  	callback func() (*sqltypes.Result, error),
  4603  ) (
  4604  	result *sqltypes.Result, err error,
  4605  ) {
  4606  	if !onlineDDL.StrategySetting().IsSingleton() && !onlineDDL.StrategySetting().IsSingletonContext() {
  4607  		// not a singleton. No conflict
  4608  		return callback()
  4609  	}
  4610  	// This is either singleton or singleton-context
  4611  
  4612  	// This entire next logic is wrapped in an anonymous func just to get the migrationMutex released
  4613  	// before calling the callback function. Reason is: the callback function itself may need to acquire
  4614  	// the mutex. And specifically, one of the callback functions used is e.RetryMigration(), which does
  4615  	// lock the mutex...
  4616  	err = func() error {
  4617  		e.migrationMutex.Lock()
  4618  		defer e.migrationMutex.Unlock()
  4619  
  4620  		pendingUUIDs, err := e.readPendingMigrationsUUIDs(ctx)
  4621  		if err != nil {
  4622  			return err
  4623  		}
  4624  		switch {
  4625  		case onlineDDL.StrategySetting().IsSingleton():
  4626  			// We will reject this migration if there's any pending migration
  4627  			if len(pendingUUIDs) > 0 {
  4628  				return vterrors.Errorf(vtrpcpb.Code_FAILED_PRECONDITION, "singleton migration rejected: found pending migrations [%s]", strings.Join(pendingUUIDs, ", "))
  4629  			}
  4630  		case onlineDDL.StrategySetting().IsSingletonContext():
  4631  			// We will reject this migration if there's any pending migration within a different context
  4632  			for _, pendingUUID := range pendingUUIDs {
  4633  				pendingOnlineDDL, _, err := e.readMigration(ctx, pendingUUID)
  4634  				if err != nil {
  4635  					return vterrors.Wrapf(err, "validateSingleton() migration: %s", pendingUUID)
  4636  				}
  4637  				if e.submittedMigrationConflictsWithPendingMigrationInSingletonContext(ctx, onlineDDL, pendingOnlineDDL) {
  4638  					return vterrors.Errorf(vtrpcpb.Code_FAILED_PRECONDITION, "singleton-context migration rejected: found pending migration: %s in different context: %s", pendingUUID, pendingOnlineDDL.MigrationContext)
  4639  				}
  4640  				// no conflict? continue looking for other pending migrations
  4641  			}
  4642  		}
  4643  		return nil
  4644  	}()
  4645  	if err != nil {
  4646  		return nil, err
  4647  	}
  4648  	// OK to go!
  4649  	return callback()
  4650  }
  4651  
  4652  // SubmitMigration inserts a new migration request
  4653  func (e *Executor) SubmitMigration(
  4654  	ctx context.Context,
  4655  	stmt sqlparser.Statement,
  4656  ) (*sqltypes.Result, error) {
  4657  	if atomic.LoadInt64(&e.isOpen) == 0 {
  4658  		return nil, vterrors.New(vtrpcpb.Code_FAILED_PRECONDITION, "online ddl is disabled")
  4659  	}
  4660  
  4661  	log.Infof("SubmitMigration: request to submit migration with statement: %0.50s...", sqlparser.CanonicalString(stmt))
  4662  	if ddlStmt, ok := stmt.(sqlparser.DDLStatement); ok {
  4663  		// This validation should have taken place on submission. However, the query may have mutated
  4664  		// during transfer, and this validation is here to catch any malformed mutation.
  4665  		if !ddlStmt.IsFullyParsed() {
  4666  			return nil, vterrors.New(vtrpcpb.Code_FAILED_PRECONDITION, "error parsing statement")
  4667  		}
  4668  	}
  4669  
  4670  	onlineDDL, err := schema.OnlineDDLFromCommentedStatement(stmt)
  4671  	if err != nil {
  4672  		return nil, vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "Error submitting migration %s: %v", sqlparser.String(stmt), err)
  4673  	}
  4674  
  4675  	// The logic below has multiple steps. We hence protect the rest of the code with a mutex, only used by this function.
  4676  	e.submitMutex.Lock()
  4677  	defer e.submitMutex.Unlock()
  4678  
  4679  	// Is there already a migration by this same UUID?
  4680  	storedMigration, _, err := e.readMigration(ctx, onlineDDL.UUID)
  4681  	if err != nil && err != ErrMigrationNotFound {
  4682  		return nil, vterrors.Wrapf(err, "while checking whether migration %s exists", onlineDDL.UUID)
  4683  	}
  4684  	if storedMigration != nil {
  4685  		log.Infof("SubmitMigration: migration %s already exists with migration_context=%s, table=%s", onlineDDL.UUID, storedMigration.MigrationContext, onlineDDL.Table)
  4686  		// A migration already exists with the same UUID. This is fine, we allow re-submitting migrations
  4687  		// with the same UUID, as we provide idempotency.
  4688  		// So we will _mostly_ ignore the request: we will not submit a new migration. However, we will do
  4689  		// these things:
  4690  
  4691  		// 1. Check that the requested submmited migration macthes the existing one's migration-context, otherwise
  4692  		//    this doesn't seem right, not the idempotency we were looking for
  4693  		if storedMigration.MigrationContext != onlineDDL.MigrationContext {
  4694  			return nil, vterrors.Errorf(vtrpcpb.Code_FAILED_PRECONDITION, "migration rejected: found migration %s with different context: %s than submmitted migration's context: %s", onlineDDL.UUID, storedMigration.MigrationContext, onlineDDL.MigrationContext)
  4695  		}
  4696  		// 2. Possibly, the existing migration is in 'failed' or 'cancelled' state, in which case this
  4697  		//    resubmission should retry the migration.
  4698  		return e.submitCallbackIfNonConflicting(
  4699  			ctx, onlineDDL,
  4700  			func() (*sqltypes.Result, error) { return e.RetryMigration(ctx, onlineDDL.UUID) },
  4701  		)
  4702  	}
  4703  
  4704  	// OK, this is a new UUID
  4705  
  4706  	_, actionStr, err := onlineDDL.GetActionStr()
  4707  	if err != nil {
  4708  		return nil, err
  4709  	}
  4710  	log.Infof("SubmitMigration: request to submit migration %s; action=%s, table=%s", onlineDDL.UUID, actionStr, onlineDDL.Table)
  4711  
  4712  	revertedUUID, _ := onlineDDL.GetRevertUUID() // Empty value if the migration is not actually a REVERT. Safe to ignore error.
  4713  	retainArtifactsSeconds := int64((retainOnlineDDLTables).Seconds())
  4714  	_, allowConcurrentMigration := e.allowConcurrentMigration(onlineDDL)
  4715  	submitQuery, err := sqlparser.ParseAndBind(sqlInsertMigration,
  4716  		sqltypes.StringBindVariable(onlineDDL.UUID),
  4717  		sqltypes.StringBindVariable(e.keyspace),
  4718  		sqltypes.StringBindVariable(e.shard),
  4719  		sqltypes.StringBindVariable(e.dbName),
  4720  		sqltypes.StringBindVariable(onlineDDL.Table),
  4721  		sqltypes.StringBindVariable(onlineDDL.SQL),
  4722  		sqltypes.StringBindVariable(string(onlineDDL.Strategy)),
  4723  		sqltypes.StringBindVariable(onlineDDL.Options),
  4724  		sqltypes.StringBindVariable(actionStr),
  4725  		sqltypes.StringBindVariable(onlineDDL.MigrationContext),
  4726  		sqltypes.StringBindVariable(string(schema.OnlineDDLStatusQueued)),
  4727  		sqltypes.StringBindVariable(e.TabletAliasString()),
  4728  		sqltypes.Int64BindVariable(retainArtifactsSeconds),
  4729  		sqltypes.BoolBindVariable(onlineDDL.StrategySetting().IsPostponeLaunch()),
  4730  		sqltypes.BoolBindVariable(onlineDDL.StrategySetting().IsPostponeCompletion()),
  4731  		sqltypes.BoolBindVariable(allowConcurrentMigration),
  4732  		sqltypes.StringBindVariable(revertedUUID),
  4733  		sqltypes.BoolBindVariable(onlineDDL.IsView()),
  4734  	)
  4735  	if err != nil {
  4736  		return nil, err
  4737  	}
  4738  	result, err := e.submitCallbackIfNonConflicting(
  4739  		ctx, onlineDDL,
  4740  		func() (*sqltypes.Result, error) { return e.execQuery(ctx, submitQuery) },
  4741  	)
  4742  	if err != nil {
  4743  		return nil, vterrors.Wrapf(err, "submitting migration %v", onlineDDL.UUID)
  4744  
  4745  	}
  4746  	log.Infof("SubmitMigration: migration %s submitted", onlineDDL.UUID)
  4747  
  4748  	defer e.triggerNextCheckInterval()
  4749  
  4750  	return result, nil
  4751  }
  4752  
  4753  // ShowMigrationLogs reads the migration log for a given migration
  4754  func (e *Executor) ShowMigrationLogs(ctx context.Context, stmt *sqlparser.ShowMigrationLogs) (result *sqltypes.Result, err error) {
  4755  	if atomic.LoadInt64(&e.isOpen) == 0 {
  4756  		return nil, vterrors.New(vtrpcpb.Code_FAILED_PRECONDITION, "online ddl is disabled")
  4757  	}
  4758  	_, row, err := e.readMigration(ctx, stmt.UUID)
  4759  	if err != nil {
  4760  		return nil, err
  4761  	}
  4762  	logFile := row["log_file"].ToString()
  4763  	if logFile == "" {
  4764  		return nil, vterrors.Errorf(vtrpcpb.Code_NOT_FOUND, "No log file for migration %v", stmt.UUID)
  4765  	}
  4766  	content, err := os.ReadFile(logFile)
  4767  	if err != nil {
  4768  		return nil, err
  4769  	}
  4770  
  4771  	result = &sqltypes.Result{
  4772  		Fields: []*querypb.Field{
  4773  			{
  4774  				Name: "migration_log",
  4775  				Type: sqltypes.VarChar,
  4776  			},
  4777  		},
  4778  		Rows: [][]sqltypes.Value{},
  4779  	}
  4780  	result.Rows = append(result.Rows, []sqltypes.Value{
  4781  		sqltypes.NewVarChar(string(content)),
  4782  	})
  4783  	return result, nil
  4784  }
  4785  
  4786  // onSchemaMigrationStatus is called when a status is set/changed for a running migration
  4787  func (e *Executor) onSchemaMigrationStatus(ctx context.Context,
  4788  	uuid string, status schema.OnlineDDLStatus, dryRun bool, progressPct float64, etaSeconds int64, rowsCopied int64, hint string) (err error) {
  4789  	if dryRun && status != schema.OnlineDDLStatusFailed {
  4790  		// We don't consider dry-run reports unless there's a failure
  4791  		return nil
  4792  	}
  4793  	switch status {
  4794  	case schema.OnlineDDLStatusReady:
  4795  		{
  4796  			err = e.updateMigrationTimestamp(ctx, "ready_timestamp", uuid)
  4797  		}
  4798  	case schema.OnlineDDLStatusRunning:
  4799  		{
  4800  			_ = e.updateMigrationStartedTimestamp(ctx, uuid)
  4801  			err = e.updateMigrationTimestamp(ctx, "liveness_timestamp", uuid)
  4802  		}
  4803  	case schema.OnlineDDLStatusComplete:
  4804  		{
  4805  			progressPct = progressPctFull
  4806  			_ = e.updateMigrationStartedTimestamp(ctx, uuid)
  4807  			err = e.updateMigrationTimestamp(ctx, "completed_timestamp", uuid)
  4808  		}
  4809  	case schema.OnlineDDLStatusFailed:
  4810  		{
  4811  			_ = e.updateMigrationStartedTimestamp(ctx, uuid)
  4812  			err = e.updateMigrationTimestamp(ctx, "completed_timestamp", uuid)
  4813  		}
  4814  	}
  4815  	if err != nil {
  4816  		return err
  4817  	}
  4818  	if err = e.updateMigrationStatus(ctx, uuid, status); err != nil {
  4819  		return err
  4820  	}
  4821  	if err = e.updateMigrationProgress(ctx, uuid, progressPct); err != nil {
  4822  		return err
  4823  	}
  4824  	if err = e.updateMigrationETASeconds(ctx, uuid, etaSeconds); err != nil {
  4825  		return err
  4826  	}
  4827  	if err := e.updateRowsCopied(ctx, uuid, rowsCopied); err != nil {
  4828  		return err
  4829  	}
  4830  	if hint == readyToCompleteHint {
  4831  		if err := e.updateMigrationReadyToComplete(ctx, uuid, true); err != nil {
  4832  			return err
  4833  		}
  4834  	}
  4835  	if !dryRun {
  4836  		switch status {
  4837  		case schema.OnlineDDLStatusComplete, schema.OnlineDDLStatusFailed:
  4838  			e.triggerNextCheckInterval()
  4839  		}
  4840  	}
  4841  
  4842  	return nil
  4843  }
  4844  
  4845  // OnSchemaMigrationStatus is called by TabletServer's API, which is invoked by a running gh-ost migration's hooks.
  4846  func (e *Executor) OnSchemaMigrationStatus(ctx context.Context,
  4847  	uuidParam, statusParam, dryrunParam, progressParam, etaParam, rowsCopiedParam, hint string) (err error) {
  4848  	status := schema.OnlineDDLStatus(statusParam)
  4849  	dryRun := (dryrunParam == "true")
  4850  	var progressPct float64
  4851  	if pct, err := strconv.ParseFloat(progressParam, 64); err == nil {
  4852  		progressPct = pct
  4853  	}
  4854  	var etaSeconds int64 = etaSecondsUnknown
  4855  	if eta, err := strconv.ParseInt(etaParam, 10, 64); err == nil {
  4856  		etaSeconds = eta
  4857  	}
  4858  	var rowsCopied int64
  4859  	if rows, err := strconv.ParseInt(rowsCopiedParam, 10, 64); err == nil {
  4860  		rowsCopied = rows
  4861  	}
  4862  
  4863  	return e.onSchemaMigrationStatus(ctx, uuidParam, status, dryRun, progressPct, etaSeconds, rowsCopied, hint)
  4864  }
  4865  
  4866  // VExec is called by a VExec invocation
  4867  // Implements vitess.io/vitess/go/vt/vttablet/vexec.Executor interface
  4868  func (e *Executor) VExec(ctx context.Context, vx *vexec.TabletVExec) (qr *querypb.QueryResult, err error) {
  4869  	response := func(result *sqltypes.Result, err error) (*querypb.QueryResult, error) {
  4870  		if err != nil {
  4871  			return nil, err
  4872  		}
  4873  		return sqltypes.ResultToProto3(result), nil
  4874  	}
  4875  
  4876  	switch stmt := vx.Stmt.(type) {
  4877  	case *sqlparser.Delete:
  4878  		return nil, fmt.Errorf("DELETE statements not supported for this table. query=%s", vx.Query)
  4879  	case *sqlparser.Select:
  4880  		return response(e.execQuery(ctx, vx.Query))
  4881  	case *sqlparser.Insert:
  4882  		match, err := sqlparser.QueryMatchesTemplates(vx.Query, vexecInsertTemplates)
  4883  		if err != nil {
  4884  			return nil, err
  4885  		}
  4886  		if !match {
  4887  			return nil, fmt.Errorf("Query must match one of these templates: %s", strings.Join(vexecInsertTemplates, "; "))
  4888  		}
  4889  		// Vexec naturally runs outside shard/schema context. It does not supply values for those columns.
  4890  		// We can fill them in.
  4891  		vx.ReplaceInsertColumnVal("shard", vx.ToStringVal(e.shard))
  4892  		vx.ReplaceInsertColumnVal("mysql_schema", vx.ToStringVal(e.dbName))
  4893  		vx.AddOrReplaceInsertColumnVal("tablet", vx.ToStringVal(e.TabletAliasString()))
  4894  		e.triggerNextCheckInterval()
  4895  		return response(e.execQuery(ctx, vx.Query))
  4896  	case *sqlparser.Update:
  4897  		match, err := sqlparser.QueryMatchesTemplates(vx.Query, vexecUpdateTemplates)
  4898  		if err != nil {
  4899  			return nil, err
  4900  		}
  4901  		if !match {
  4902  			return nil, fmt.Errorf("Query must match one of these templates: %s; query=%s", strings.Join(vexecUpdateTemplates, "; "), vx.Query)
  4903  		}
  4904  		if shard, _ := vx.ColumnStringVal(vx.WhereCols, "shard"); shard != "" {
  4905  			// shard is specified.
  4906  			if shard != e.shard {
  4907  				// specified shard is not _this_ shard. So we're skipping this UPDATE
  4908  				return sqltypes.ResultToProto3(emptyResult), nil
  4909  			}
  4910  		}
  4911  		statusVal, err := vx.ColumnStringVal(vx.UpdateCols, "migration_status")
  4912  		if err != nil {
  4913  			return nil, err
  4914  		}
  4915  		switch statusVal {
  4916  		case retryMigrationHint:
  4917  			return response(e.retryMigrationWhere(ctx, sqlparser.String(stmt.Where.Expr)))
  4918  		case completeMigrationHint:
  4919  			uuid, err := vx.ColumnStringVal(vx.WhereCols, "migration_uuid")
  4920  			if err != nil {
  4921  				return nil, err
  4922  			}
  4923  			if !schema.IsOnlineDDLUUID(uuid) {
  4924  				return nil, fmt.Errorf("Not an Online DDL UUID: %s", uuid)
  4925  			}
  4926  			return response(e.CompleteMigration(ctx, uuid))
  4927  		case cancelMigrationHint:
  4928  			uuid, err := vx.ColumnStringVal(vx.WhereCols, "migration_uuid")
  4929  			if err != nil {
  4930  				return nil, err
  4931  			}
  4932  			if !schema.IsOnlineDDLUUID(uuid) {
  4933  				return nil, fmt.Errorf("Not an Online DDL UUID: %s", uuid)
  4934  			}
  4935  			return response(e.CancelMigration(ctx, uuid, "cancel by user", true))
  4936  		case cancelAllMigrationHint:
  4937  			uuid, _ := vx.ColumnStringVal(vx.WhereCols, "migration_uuid")
  4938  			if uuid != "" {
  4939  				return nil, fmt.Errorf("Unexpetced UUID: %s", uuid)
  4940  			}
  4941  			return response(e.CancelPendingMigrations(ctx, "cancel-all by user", true))
  4942  		default:
  4943  			return nil, fmt.Errorf("Unexpected value for migration_status: %v. Supported values are: %s, %s",
  4944  				statusVal, retryMigrationHint, cancelMigrationHint)
  4945  		}
  4946  	default:
  4947  		return nil, fmt.Errorf("No handler for this query: %s", vx.Query)
  4948  	}
  4949  }