vitess.io/vitess@v0.16.2/go/test/endtoend/onlineddl/vrepl/onlineddl_vrepl_test.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vrepl
    18  
    19  import (
    20  	"flag"
    21  	"fmt"
    22  	"io"
    23  	"os"
    24  	"path"
    25  	"strings"
    26  	"sync"
    27  	"testing"
    28  	"time"
    29  
    30  	"vitess.io/vitess/go/mysql"
    31  	"vitess.io/vitess/go/test/endtoend/cluster"
    32  	"vitess.io/vitess/go/test/endtoend/onlineddl"
    33  	"vitess.io/vitess/go/test/endtoend/throttler"
    34  	"vitess.io/vitess/go/vt/schema"
    35  	"vitess.io/vitess/go/vt/vttablet/tabletmanager/vreplication"
    36  	throttlebase "vitess.io/vitess/go/vt/vttablet/tabletserver/throttle/base"
    37  
    38  	"github.com/stretchr/testify/assert"
    39  	"github.com/stretchr/testify/require"
    40  )
    41  
    42  var (
    43  	clusterInstance           *cluster.LocalProcessCluster
    44  	shards                    []cluster.Shard
    45  	vtParams                  mysql.ConnParams
    46  	httpClient                = throttlebase.SetupHTTPClient(time.Second)
    47  	onlineDDLThrottlerAppName = "online-ddl"
    48  	vstreamerThrottlerAppName = "vstreamer"
    49  
    50  	normalMigrationWait   = 45 * time.Second
    51  	extendedMigrationWait = 60 * time.Second
    52  
    53  	hostname              = "localhost"
    54  	keyspaceName          = "ks"
    55  	cell                  = "zone1"
    56  	schemaChangeDirectory = ""
    57  	totalTableCount       = 4
    58  	createTable           = `
    59  		CREATE TABLE %s (
    60  			id bigint(20) NOT NULL,
    61  			test_val bigint unsigned NOT NULL DEFAULT 0,
    62  			msg varchar(64),
    63  			PRIMARY KEY (id)
    64  		) ENGINE=InnoDB;`
    65  	// To verify non online-DDL behavior
    66  	alterTableNormalStatement = `
    67  		ALTER TABLE %s
    68  			ADD COLUMN non_online int UNSIGNED NOT NULL DEFAULT 0`
    69  	// A trivial statement which must succeed and does not change the schema
    70  	alterTableTrivialStatement = `
    71  		ALTER TABLE %s
    72  			ENGINE=InnoDB`
    73  	// The following statement is valid
    74  	alterTableSuccessfulStatement = `
    75  		ALTER TABLE %s
    76  			MODIFY id bigint UNSIGNED NOT NULL,
    77  			ADD COLUMN vrepl_col int NOT NULL DEFAULT 0,
    78  			ADD INDEX idx_msg(msg)`
    79  	// The following statement will fail because vreplication requires shared PRIMARY KEY columns
    80  	alterTableFailedStatement = `
    81  		ALTER TABLE %s
    82  			DROP PRIMARY KEY,
    83  			DROP COLUMN vrepl_col`
    84  	// We will run this query while throttling vreplication
    85  	alterTableThrottlingStatement = `
    86  		ALTER TABLE %s
    87  			DROP COLUMN vrepl_col`
    88  	onlineDDLCreateTableStatement = `
    89  		CREATE TABLE %s (
    90  			id bigint NOT NULL,
    91  			test_val bigint unsigned NOT NULL DEFAULT 0,
    92  			online_ddl_create_col INT NOT NULL DEFAULT 0,
    93  			PRIMARY KEY (id)
    94  		) ENGINE=InnoDB;`
    95  	onlineDDLDropTableStatement = `
    96  		DROP TABLE %s`
    97  	onlineDDLDropTableIfExistsStatement = `
    98  		DROP TABLE IF EXISTS %s`
    99  	insertRowStatement = `
   100  		INSERT INTO %s (id, test_val) VALUES (%d, 1)
   101  	`
   102  	selectCountRowsStatement = `
   103  		SELECT COUNT(*) AS c FROM %s
   104  	`
   105  	countInserts int64
   106  	insertMutex  sync.Mutex
   107  
   108  	vSchema = `
   109  	{
   110  		"sharded": true,
   111  		"vindexes": {
   112  			"hash_index": {
   113  				"type": "hash"
   114  			}
   115  		},
   116  		"tables": {
   117  			"vt_onlineddl_test_00": {
   118  				"column_vindexes": [
   119  					{
   120  						"column": "id",
   121  						"name": "hash_index"
   122  					}
   123  				]
   124  			},
   125  			"vt_onlineddl_test_01": {
   126  				"column_vindexes": [
   127  					{
   128  						"column": "id",
   129  						"name": "hash_index"
   130  					}
   131  				]
   132  			},
   133  			"vt_onlineddl_test_02": {
   134  				"column_vindexes": [
   135  					{
   136  						"column": "id",
   137  						"name": "hash_index"
   138  					}
   139  				]
   140  			},
   141  			"vt_onlineddl_test_03": {
   142  				"column_vindexes": [
   143  					{
   144  						"column": "id",
   145  						"name": "hash_index"
   146  					}
   147  				]
   148  			}
   149  		}
   150  	}
   151  	`
   152  )
   153  
   154  func TestMain(m *testing.M) {
   155  	defer cluster.PanicHandler(nil)
   156  	flag.Parse()
   157  
   158  	exitcode, err := func() (int, error) {
   159  		clusterInstance = cluster.NewCluster(cell, hostname)
   160  		schemaChangeDirectory = path.Join("/tmp", fmt.Sprintf("schema_change_dir_%d", clusterInstance.GetAndReserveTabletUID()))
   161  		defer os.RemoveAll(schemaChangeDirectory)
   162  		defer clusterInstance.Teardown()
   163  
   164  		if _, err := os.Stat(schemaChangeDirectory); os.IsNotExist(err) {
   165  			_ = os.Mkdir(schemaChangeDirectory, 0700)
   166  		}
   167  
   168  		clusterInstance.VtctldExtraArgs = []string{
   169  			"--schema_change_dir", schemaChangeDirectory,
   170  			"--schema_change_controller", "local",
   171  			"--schema_change_check_interval", "1",
   172  		}
   173  
   174  		clusterInstance.VtTabletExtraArgs = []string{
   175  			"--throttler-config-via-topo",
   176  			"--heartbeat_enable",
   177  			"--heartbeat_interval", "250ms",
   178  			"--heartbeat_on_demand_duration", "5s",
   179  			"--migration_check_interval", "5s",
   180  			"--watch_replication_stream",
   181  		}
   182  		clusterInstance.VtGateExtraArgs = []string{
   183  			"--ddl_strategy", "online",
   184  		}
   185  
   186  		if err := clusterInstance.StartTopo(); err != nil {
   187  			return 1, err
   188  		}
   189  
   190  		keyspace := &cluster.Keyspace{
   191  			Name:    keyspaceName,
   192  			VSchema: vSchema,
   193  		}
   194  
   195  		if err := clusterInstance.StartKeyspace(*keyspace, []string{"-80", "80-"}, 1, false); err != nil {
   196  			return 1, err
   197  		}
   198  
   199  		vtgateInstance := clusterInstance.NewVtgateInstance()
   200  		// Start vtgate
   201  		if err := vtgateInstance.Setup(); err != nil {
   202  			return 1, err
   203  		}
   204  		// ensure it is torn down during cluster TearDown
   205  		clusterInstance.VtgateProcess = *vtgateInstance
   206  		vtParams = mysql.ConnParams{
   207  			Host: clusterInstance.Hostname,
   208  			Port: clusterInstance.VtgateMySQLPort,
   209  		}
   210  
   211  		return m.Run(), nil
   212  	}()
   213  	if err != nil {
   214  		fmt.Printf("%v\n", err)
   215  		os.Exit(1)
   216  	} else {
   217  		os.Exit(exitcode)
   218  	}
   219  
   220  }
   221  
   222  // direct per-tablet throttler API instruction
   223  func throttleResponse(tablet *cluster.Vttablet, path string) (respBody string, err error) {
   224  	apiURL := fmt.Sprintf("http://%s:%d/%s", tablet.VttabletProcess.TabletHostname, tablet.HTTPPort, path)
   225  	resp, err := httpClient.Get(apiURL)
   226  	if err != nil {
   227  		return "", err
   228  	}
   229  	defer resp.Body.Close()
   230  	b, err := io.ReadAll(resp.Body)
   231  	respBody = string(b)
   232  	return respBody, err
   233  }
   234  
   235  // direct per-tablet throttler API instruction
   236  func throttleApp(tablet *cluster.Vttablet, app string) (string, error) {
   237  	return throttleResponse(tablet, fmt.Sprintf("throttler/throttle-app?app=%s&duration=1h", app))
   238  }
   239  
   240  // direct per-tablet throttler API instruction
   241  func unthrottleApp(tablet *cluster.Vttablet, app string) (string, error) {
   242  	return throttleResponse(tablet, fmt.Sprintf("throttler/unthrottle-app?app=%s", app))
   243  }
   244  
   245  func TestSchemaChange(t *testing.T) {
   246  	defer cluster.PanicHandler(t)
   247  
   248  	shards = clusterInstance.Keyspaces[0].Shards
   249  	require.Equal(t, 2, len(shards))
   250  	for _, shard := range shards {
   251  		require.Equal(t, 2, len(shard.Vttablets))
   252  	}
   253  
   254  	providedUUID := ""
   255  	providedMigrationContext := ""
   256  
   257  	// We execute the throttler commands via vtgate, which in turn
   258  	// executes them via vttablet. So let's wait until vtgate's view
   259  	// is updated.
   260  	err := clusterInstance.WaitForTabletsToHealthyInVtgate()
   261  	require.NoError(t, err)
   262  
   263  	_, err = throttler.UpdateThrottlerTopoConfig(clusterInstance, true, false, 0, "", false)
   264  	require.NoError(t, err)
   265  
   266  	for _, ks := range clusterInstance.Keyspaces {
   267  		for _, shard := range ks.Shards {
   268  			for _, tablet := range shard.Vttablets {
   269  				throttler.WaitForThrottlerStatusEnabled(t, tablet, true, nil, extendedMigrationWait)
   270  			}
   271  		}
   272  	}
   273  
   274  	testWithInitialSchema(t)
   275  	t.Run("alter non_online", func(t *testing.T) {
   276  		_ = testOnlineDDLStatement(t, alterTableNormalStatement, string(schema.DDLStrategyDirect), providedUUID, providedMigrationContext, "vtctl", "non_online", "", false)
   277  		insertRows(t, 2)
   278  		testRows(t)
   279  	})
   280  	t.Run("successful online alter, vtgate", func(t *testing.T) {
   281  		insertRows(t, 2)
   282  		uuid := testOnlineDDLStatement(t, alterTableSuccessfulStatement, "online", providedUUID, providedMigrationContext, "vtgate", "vrepl_col", "", false)
   283  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusComplete)
   284  		testRows(t)
   285  		testMigrationRowCount(t, uuid)
   286  		onlineddl.CheckCancelMigration(t, &vtParams, shards, uuid, false)
   287  		onlineddl.CheckRetryMigration(t, &vtParams, shards, uuid, false)
   288  		onlineddl.CheckMigrationArtifacts(t, &vtParams, shards, uuid, true)
   289  
   290  		rs := onlineddl.ReadMigrations(t, &vtParams, uuid)
   291  		require.NotNil(t, rs)
   292  		for _, row := range rs.Named().Rows {
   293  			retainArtifactSeconds := row.AsInt64("retain_artifacts_seconds", 0)
   294  			assert.Equal(t, int64(86400), retainArtifactSeconds)
   295  		}
   296  
   297  		onlineddl.CheckCleanupMigration(t, &vtParams, shards, uuid)
   298  
   299  		rs = onlineddl.ReadMigrations(t, &vtParams, uuid)
   300  		require.NotNil(t, rs)
   301  		for _, row := range rs.Named().Rows {
   302  			retainArtifactSeconds := row.AsInt64("retain_artifacts_seconds", 0)
   303  			assert.Equal(t, int64(-1), retainArtifactSeconds)
   304  		}
   305  	})
   306  	t.Run("successful online alter, vtctl", func(t *testing.T) {
   307  		insertRows(t, 2)
   308  		uuid := testOnlineDDLStatement(t, alterTableTrivialStatement, "online", providedUUID, providedMigrationContext, "vtctl", "vrepl_col", "", false)
   309  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusComplete)
   310  		testRows(t)
   311  		testMigrationRowCount(t, uuid)
   312  		onlineddl.CheckCancelMigration(t, &vtParams, shards, uuid, false)
   313  		onlineddl.CheckRetryMigration(t, &vtParams, shards, uuid, false)
   314  		onlineddl.CheckMigrationArtifacts(t, &vtParams, shards, uuid, true)
   315  	})
   316  	t.Run("successful online alter, vtctl, explicit UUID", func(t *testing.T) {
   317  		insertRows(t, 2)
   318  		providedUUID = "00000000_51c9_11ec_9cf2_0a43f95f28a3"
   319  		providedMigrationContext = "endtoend:0000-1111"
   320  		uuid := testOnlineDDLStatement(t, alterTableTrivialStatement, "vitess", providedUUID, providedMigrationContext, "vtctl", "vrepl_col", "", false)
   321  		assert.Equal(t, providedUUID, uuid)
   322  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusComplete)
   323  		testRows(t)
   324  		testMigrationRowCount(t, uuid)
   325  		onlineddl.CheckCancelMigration(t, &vtParams, shards, uuid, false)
   326  		onlineddl.CheckRetryMigration(t, &vtParams, shards, uuid, false)
   327  		onlineddl.CheckMigrationArtifacts(t, &vtParams, shards, uuid, true)
   328  	})
   329  	t.Run("duplicate migration, implicitly ignored", func(t *testing.T) {
   330  		uuid := testOnlineDDLStatement(t, alterTableTrivialStatement, "online", providedUUID, providedMigrationContext, "vtctl", "vrepl_col", "", true)
   331  		assert.Equal(t, providedUUID, uuid)
   332  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusComplete)
   333  	})
   334  	t.Run("fail duplicate migration with different context", func(t *testing.T) {
   335  		_ = testOnlineDDLStatement(t, alterTableTrivialStatement, "online", providedUUID, "endtoend:different-context-0000", "vtctl", "vrepl_col", "rejected", true)
   336  	})
   337  	providedUUID = ""
   338  	providedMigrationContext = ""
   339  
   340  	t.Run("successful online alter, postponed, vtgate", func(t *testing.T) {
   341  		insertRows(t, 2)
   342  		uuid := testOnlineDDLStatement(t, alterTableTrivialStatement, "vitess -postpone-completion", providedUUID, providedMigrationContext, "vtgate", "test_val", "", false)
   343  		// Should be still running!
   344  		_ = onlineddl.WaitForMigrationStatus(t, &vtParams, shards, uuid, extendedMigrationWait, schema.OnlineDDLStatusRunning)
   345  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusRunning)
   346  		// Issue a complete and wait for successful completion
   347  		onlineddl.CheckCompleteMigration(t, &vtParams, shards, uuid, true)
   348  		// This part may take a while, because we depend on vreplicatoin polling
   349  		status := onlineddl.WaitForMigrationStatus(t, &vtParams, shards, uuid, extendedMigrationWait, schema.OnlineDDLStatusComplete, schema.OnlineDDLStatusFailed)
   350  		fmt.Printf("# Migration status (for debug purposes): <%s>\n", status)
   351  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusComplete)
   352  
   353  		testRows(t)
   354  		testMigrationRowCount(t, uuid)
   355  		onlineddl.CheckCancelMigration(t, &vtParams, shards, uuid, false)
   356  		onlineddl.CheckRetryMigration(t, &vtParams, shards, uuid, false)
   357  	})
   358  	// Notes about throttling:
   359  	// In this endtoend test we test both direct tablet API for throttling, as well as VTGate queries.
   360  	// - VTGate queries (`ALTER VITESS_MIGRATION THROTTLE ALL ...`) are sent to all relevant shards/tablets via QueryExecutor
   361  	// - tablet API calls have to be sent per-shard to the primary tablet of that shard
   362  	t.Run("throttled migration", func(t *testing.T) {
   363  		// Use VTGate for throttling, issue a `ALTER VITESS_MIGRATION THROTTLE ALL ...`
   364  		insertRows(t, 2)
   365  		onlineddl.ThrottleAllMigrations(t, &vtParams)
   366  		defer onlineddl.UnthrottleAllMigrations(t, &vtParams)
   367  
   368  		uuid := testOnlineDDLStatement(t, alterTableThrottlingStatement, "online", providedUUID, providedMigrationContext, "vtgate", "vrepl_col", "", true)
   369  		_ = onlineddl.WaitForMigrationStatus(t, &vtParams, shards, uuid, normalMigrationWait, schema.OnlineDDLStatusRunning)
   370  		testRows(t)
   371  		onlineddl.CheckCancelMigration(t, &vtParams, shards, uuid, true)
   372  		status := onlineddl.WaitForMigrationStatus(t, &vtParams, shards, uuid, normalMigrationWait, schema.OnlineDDLStatusFailed, schema.OnlineDDLStatusCancelled)
   373  		fmt.Printf("# Migration status (for debug purposes): <%s>\n", status)
   374  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusCancelled)
   375  	})
   376  
   377  	t.Run("throttled and unthrottled migration", func(t *testing.T) {
   378  		insertRows(t, 2)
   379  
   380  		// Use VTGate for throttling, issue a `ALTER VITESS_MIGRATION THROTTLE ALL ...`
   381  		// begin throttling:
   382  		onlineddl.ThrottleAllMigrations(t, &vtParams)
   383  		defer onlineddl.UnthrottleAllMigrations(t, &vtParams)
   384  		onlineddl.CheckThrottledApps(t, &vtParams, onlineDDLThrottlerAppName, true)
   385  
   386  		uuid := testOnlineDDLStatement(t, alterTableTrivialStatement, "vitess", providedUUID, providedMigrationContext, "vtgate", "test_val", "", true)
   387  		_ = onlineddl.WaitForMigrationStatus(t, &vtParams, shards, uuid, normalMigrationWait, schema.OnlineDDLStatusRunning)
   388  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusRunning)
   389  		testRows(t)
   390  
   391  		// gotta give the migration a few seconds to read throttling info from _vt.vreplication and write
   392  		// to _vt.schema_migrations
   393  		row, startedTimestamp, lastThrottledTimestamp := onlineddl.WaitForThrottledTimestamp(t, &vtParams, uuid, normalMigrationWait)
   394  		require.NotNil(t, row)
   395  		// vplayer and vcopier update throttle timestamp every second, so we expect the value
   396  		// to be strictly higher than started_timestamp
   397  		assert.GreaterOrEqual(t, lastThrottledTimestamp, startedTimestamp)
   398  		component := row.AsString("component_throttled", "")
   399  		assert.Contains(t, []string{string(vreplication.VCopierComponentName), string(vreplication.VPlayerComponentName)}, component)
   400  
   401  		// unthrottle
   402  		onlineddl.UnthrottleAllMigrations(t, &vtParams)
   403  		onlineddl.CheckThrottledApps(t, &vtParams, onlineDDLThrottlerAppName, false)
   404  
   405  		status := onlineddl.WaitForMigrationStatus(t, &vtParams, shards, uuid, normalMigrationWait, schema.OnlineDDLStatusComplete, schema.OnlineDDLStatusFailed)
   406  		fmt.Printf("# Migration status (for debug purposes): <%s>\n", status)
   407  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusComplete)
   408  	})
   409  
   410  	t.Run("throttled and unthrottled migration via vstreamer", func(t *testing.T) {
   411  		insertRows(t, 2)
   412  		var uuid string
   413  
   414  		func() {
   415  			for _, shard := range shards {
   416  				// technically we only need to throttle on a REPLICA, because that's the
   417  				// vstreamer source; but it's OK to be on the safe side and throttle on all tablets. Doesn't
   418  				// change the essence of this test.
   419  				for _, tablet := range shard.Vttablets {
   420  					body, err := throttleApp(tablet, vstreamerThrottlerAppName)
   421  					defer unthrottleApp(tablet, vstreamerThrottlerAppName)
   422  
   423  					assert.NoError(t, err)
   424  					assert.Contains(t, body, vstreamerThrottlerAppName)
   425  				}
   426  			}
   427  
   428  			uuid = testOnlineDDLStatement(t, alterTableTrivialStatement, "vitess", providedUUID, providedMigrationContext, "vtgate", "test_val", "", true)
   429  			_ = onlineddl.WaitForMigrationStatus(t, &vtParams, shards, uuid, normalMigrationWait, schema.OnlineDDLStatusRunning)
   430  			onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusRunning)
   431  			testRows(t)
   432  
   433  			// gotta give the migration a few seconds to read throttling info from _vt.vreplication and write
   434  			// to _vt.schema_migrations
   435  			row, startedTimestamp, lastThrottledTimestamp := onlineddl.WaitForThrottledTimestamp(t, &vtParams, uuid, normalMigrationWait)
   436  			require.NotNil(t, row)
   437  			// rowstreamer throttle timestamp only updates once in 10 seconds, so greater or equals" is good enough here.
   438  			assert.GreaterOrEqual(t, lastThrottledTimestamp, startedTimestamp)
   439  			component := row.AsString("component_throttled", "")
   440  			assert.Contains(t, []string{string(vreplication.VStreamerComponentName), string(vreplication.RowStreamerComponentName)}, component)
   441  		}()
   442  		// now unthrottled
   443  		status := onlineddl.WaitForMigrationStatus(t, &vtParams, shards, uuid, normalMigrationWait, schema.OnlineDDLStatusComplete, schema.OnlineDDLStatusFailed)
   444  		fmt.Printf("# Migration status (for debug purposes): <%s>\n", status)
   445  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusComplete)
   446  	})
   447  
   448  	t.Run("failed migration", func(t *testing.T) {
   449  		insertRows(t, 2)
   450  		uuid := testOnlineDDLStatement(t, alterTableFailedStatement, "online", providedUUID, providedMigrationContext, "vtgate", "vrepl_col", "", false)
   451  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusFailed)
   452  		testRows(t)
   453  		onlineddl.CheckCancelMigration(t, &vtParams, shards, uuid, false)
   454  		onlineddl.CheckRetryMigration(t, &vtParams, shards, uuid, true)
   455  		onlineddl.CheckMigrationArtifacts(t, &vtParams, shards, uuid, true)
   456  		// migration will fail again
   457  	})
   458  	t.Run("cancel all migrations: nothing to cancel", func(t *testing.T) {
   459  		// no migrations pending at this time
   460  		time.Sleep(10 * time.Second)
   461  		onlineddl.CheckCancelAllMigrations(t, &vtParams, 0)
   462  		// Validate that invoking CANCEL ALL via vtctl works
   463  		onlineddl.CheckCancelAllMigrationsViaVtctl(t, &clusterInstance.VtctlclientProcess, keyspaceName)
   464  	})
   465  	t.Run("cancel all migrations: some migrations to cancel", func(t *testing.T) {
   466  		// Use VTGate for throttling, issue a `ALTER VITESS_MIGRATION THROTTLE ALL ...`
   467  		onlineddl.ThrottleAllMigrations(t, &vtParams)
   468  		defer onlineddl.UnthrottleAllMigrations(t, &vtParams)
   469  		onlineddl.CheckThrottledApps(t, &vtParams, onlineDDLThrottlerAppName, true)
   470  
   471  		// spawn n migrations; cancel them via cancel-all
   472  		var wg sync.WaitGroup
   473  		count := 4
   474  		for i := 0; i < count; i++ {
   475  			wg.Add(1)
   476  			go func() {
   477  				defer wg.Done()
   478  				_ = testOnlineDDLStatement(t, alterTableThrottlingStatement, "vitess", providedUUID, providedMigrationContext, "vtgate", "vrepl_col", "", false)
   479  			}()
   480  		}
   481  		wg.Wait()
   482  		onlineddl.CheckCancelAllMigrations(t, &vtParams, len(shards)*count)
   483  	})
   484  	t.Run("cancel all migrations: some migrations to cancel via vtctl", func(t *testing.T) {
   485  		// Use VTGate for throttling, issue a `ALTER VITESS_MIGRATION THROTTLE ALL ...`
   486  		onlineddl.ThrottleAllMigrations(t, &vtParams)
   487  		defer onlineddl.UnthrottleAllMigrations(t, &vtParams)
   488  		onlineddl.CheckThrottledApps(t, &vtParams, onlineDDLThrottlerAppName, true)
   489  
   490  		// spawn n migrations; cancel them via cancel-all
   491  		var wg sync.WaitGroup
   492  		count := 4
   493  		for i := 0; i < count; i++ {
   494  			wg.Add(1)
   495  			go func() {
   496  				defer wg.Done()
   497  				_ = testOnlineDDLStatement(t, alterTableThrottlingStatement, "online", providedUUID, providedMigrationContext, "vtgate", "vrepl_col", "", false)
   498  			}()
   499  		}
   500  		wg.Wait()
   501  		// cancelling via vtctl does not return values. We CANCEL ALL via vtctl, then validate via VTGate that nothing remains to be cancelled.
   502  		onlineddl.CheckCancelAllMigrationsViaVtctl(t, &clusterInstance.VtctlclientProcess, keyspaceName)
   503  		onlineddl.CheckCancelAllMigrations(t, &vtParams, 0)
   504  	})
   505  
   506  	// reparent shard -80 to replica
   507  	// and then reparent it back to original state
   508  	// (two pretty much identical tests, the point is to end up with original state)
   509  	for _, currentPrimaryTabletIndex := range []int{0, 1} {
   510  		currentPrimaryTablet := shards[0].Vttablets[currentPrimaryTabletIndex]
   511  		reparentTablet := shards[0].Vttablets[1-currentPrimaryTabletIndex]
   512  		t.Run(fmt.Sprintf("PlannedReparentShard via throttling %d/2", (currentPrimaryTabletIndex+1)), func(t *testing.T) {
   513  
   514  			insertRows(t, 2)
   515  			for i := range shards {
   516  				var body string
   517  				var err error
   518  				switch i {
   519  				case 0:
   520  					// this is the shard where we run PRS
   521  					// Use per-tablet throttling API
   522  					body, err = throttleApp(currentPrimaryTablet, onlineDDLThrottlerAppName)
   523  					defer unthrottleApp(currentPrimaryTablet, onlineDDLThrottlerAppName)
   524  				case 1:
   525  					// no PRS on this shard
   526  					// Use per-tablet throttling API
   527  					body, err = throttleApp(shards[i].Vttablets[0], onlineDDLThrottlerAppName)
   528  					defer unthrottleApp(shards[i].Vttablets[0], onlineDDLThrottlerAppName)
   529  				}
   530  				assert.NoError(t, err)
   531  				assert.Contains(t, body, onlineDDLThrottlerAppName)
   532  			}
   533  			uuid := testOnlineDDLStatement(t, alterTableTrivialStatement, "vitess", providedUUID, providedMigrationContext, "vtgate", "test_val", "", true)
   534  
   535  			t.Run("wait for migration to run", func(t *testing.T) {
   536  				_ = onlineddl.WaitForMigrationStatus(t, &vtParams, shards, uuid, normalMigrationWait, schema.OnlineDDLStatusRunning)
   537  				onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusRunning)
   538  			})
   539  			t.Run("wait for vreplication to run on shard -80", func(t *testing.T) {
   540  				vreplStatus := onlineddl.WaitForVReplicationStatus(t, &vtParams, currentPrimaryTablet, uuid, normalMigrationWait, "Copying", "Running")
   541  				require.Contains(t, []string{"Copying", "Running"}, vreplStatus)
   542  			})
   543  			t.Run("wait for vreplication to run on shard 80-", func(t *testing.T) {
   544  				vreplStatus := onlineddl.WaitForVReplicationStatus(t, &vtParams, shards[1].Vttablets[0], uuid, normalMigrationWait, "Copying", "Running")
   545  				require.Contains(t, []string{"Copying", "Running"}, vreplStatus)
   546  			})
   547  			t.Run("check status again", func(t *testing.T) {
   548  				// again see that we're still 'running'
   549  				onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusRunning)
   550  				testRows(t)
   551  			})
   552  
   553  			t.Run("Check tablet", func(t *testing.T) {
   554  				// onlineddl.Executor marks this migration with its tablet alias
   555  				// reminder that onlineddl.Executor runs on the primary tablet.
   556  				rs := onlineddl.ReadMigrations(t, &vtParams, uuid)
   557  				require.NotNil(t, rs)
   558  				for _, row := range rs.Named().Rows {
   559  					shard := row["shard"].ToString()
   560  					tablet := row["tablet"].ToString()
   561  
   562  					switch shard {
   563  					case "-80":
   564  						require.Equal(t, currentPrimaryTablet.Alias, tablet)
   565  					case "80-":
   566  						require.Equal(t, shards[1].Vttablets[0].Alias, tablet)
   567  					default:
   568  						require.NoError(t, fmt.Errorf("unexpected shard name: %s", shard))
   569  					}
   570  				}
   571  			})
   572  			t.Run("PRS shard -80", func(t *testing.T) {
   573  				// migration has started and is throttled. We now run PRS
   574  				err := clusterInstance.VtctlclientProcess.ExecuteCommand("PlannedReparentShard", "--", "--keyspace_shard", keyspaceName+"/-80", "--new_primary", reparentTablet.Alias)
   575  				require.NoError(t, err, "failed PRS: %v", err)
   576  				rs := onlineddl.VtgateExecQuery(t, &vtParams, "show vitess_tablets", "")
   577  				onlineddl.PrintQueryResult(os.Stdout, rs)
   578  			})
   579  			t.Run("unthrottle", func(t *testing.T) {
   580  				for i := range shards {
   581  					var body string
   582  					var err error
   583  					switch i {
   584  					case 0:
   585  						// this is the shard where we run PRS
   586  						// Use per-tablet throttling API
   587  						body, err = unthrottleApp(currentPrimaryTablet, onlineDDLThrottlerAppName)
   588  					case 1:
   589  						// no PRS on this shard
   590  						// Use per-tablet throttling API
   591  						body, err = unthrottleApp(shards[i].Vttablets[0], onlineDDLThrottlerAppName)
   592  					}
   593  					assert.NoError(t, err)
   594  					assert.Contains(t, body, onlineDDLThrottlerAppName)
   595  				}
   596  			})
   597  			t.Run("expect completion", func(t *testing.T) {
   598  				_ = onlineddl.WaitForMigrationStatus(t, &vtParams, shards, uuid, extendedMigrationWait, schema.OnlineDDLStatusComplete, schema.OnlineDDLStatusFailed)
   599  				onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusComplete)
   600  			})
   601  
   602  			t.Run("Check tablet post PRS", func(t *testing.T) {
   603  				// onlineddl.Executor will find that a vrepl migration started in a different tablet.
   604  				// it will own the tablet and will update 'tablet' column in _vt.schema_migrations with its own
   605  				// (promoted primary) tablet alias.
   606  				rs := onlineddl.ReadMigrations(t, &vtParams, uuid)
   607  				require.NotNil(t, rs)
   608  				for _, row := range rs.Named().Rows {
   609  					shard := row["shard"].ToString()
   610  					tablet := row["tablet"].ToString()
   611  
   612  					switch shard {
   613  					case "-80":
   614  						// PRS for this tablet, we promoted tablet[1]
   615  						require.Equal(t, reparentTablet.Alias, tablet)
   616  					case "80-":
   617  						// No PRS for this tablet
   618  						require.Equal(t, shards[1].Vttablets[0].Alias, tablet)
   619  					default:
   620  						require.NoError(t, fmt.Errorf("unexpected shard name: %s", shard))
   621  					}
   622  				}
   623  
   624  				onlineddl.CheckRetryPartialMigration(t, &vtParams, uuid, 1)
   625  				// Now it should complete on the failed shard
   626  				_ = onlineddl.WaitForMigrationStatus(t, &vtParams, shards, uuid, extendedMigrationWait, schema.OnlineDDLStatusComplete)
   627  			})
   628  		})
   629  	}
   630  
   631  	// reparent shard -80 to replica
   632  	// and then reparent it back to original state
   633  	// (two pretty much identical tests, the point is to end up with original state)
   634  	for _, currentPrimaryTabletIndex := range []int{0, 1} {
   635  		currentPrimaryTablet := shards[0].Vttablets[currentPrimaryTabletIndex]
   636  		reparentTablet := shards[0].Vttablets[1-currentPrimaryTabletIndex]
   637  
   638  		t.Run(fmt.Sprintf("PlannedReparentShard via postponed %d/2", (currentPrimaryTabletIndex+1)), func(t *testing.T) {
   639  
   640  			insertRows(t, 2)
   641  
   642  			uuid := testOnlineDDLStatement(t, alterTableTrivialStatement, "vitess --postpone-completion", providedUUID, providedMigrationContext, "vtgate", "test_val", "", true)
   643  
   644  			t.Run("wait for migration to run", func(t *testing.T) {
   645  				_ = onlineddl.WaitForMigrationStatus(t, &vtParams, shards, uuid, normalMigrationWait, schema.OnlineDDLStatusRunning)
   646  				onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusRunning)
   647  			})
   648  			t.Run("wait for vreplication to run on shard -80", func(t *testing.T) {
   649  				vreplStatus := onlineddl.WaitForVReplicationStatus(t, &vtParams, currentPrimaryTablet, uuid, normalMigrationWait, "Copying", "Running")
   650  				require.Contains(t, []string{"Copying", "Running"}, vreplStatus)
   651  			})
   652  			t.Run("wait for vreplication to run on shard 80-", func(t *testing.T) {
   653  				vreplStatus := onlineddl.WaitForVReplicationStatus(t, &vtParams, shards[1].Vttablets[0], uuid, normalMigrationWait, "Copying", "Running")
   654  				require.Contains(t, []string{"Copying", "Running"}, vreplStatus)
   655  			})
   656  			t.Run("check status again", func(t *testing.T) {
   657  				// again see that we're still 'running'
   658  				onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusRunning)
   659  				testRows(t)
   660  			})
   661  
   662  			t.Run("Check tablet", func(t *testing.T) {
   663  				// onlineddl.Executor marks this migration with its tablet alias
   664  				// reminder that onlineddl.Executor runs on the primary tablet.
   665  				rs := onlineddl.ReadMigrations(t, &vtParams, uuid)
   666  				require.NotNil(t, rs)
   667  				for _, row := range rs.Named().Rows {
   668  					shard := row["shard"].ToString()
   669  					tablet := row["tablet"].ToString()
   670  
   671  					switch shard {
   672  					case "-80":
   673  						require.Equal(t, currentPrimaryTablet.Alias, tablet)
   674  					case "80-":
   675  						require.Equal(t, shards[1].Vttablets[0].Alias, tablet)
   676  					default:
   677  						require.NoError(t, fmt.Errorf("unexpected shard name: %s", shard))
   678  					}
   679  				}
   680  			})
   681  			t.Run("PRS shard -80", func(t *testing.T) {
   682  				// migration has started and completion is postponed. We now PRS
   683  				err := clusterInstance.VtctlclientProcess.ExecuteCommand("PlannedReparentShard", "--", "--keyspace_shard", keyspaceName+"/-80", "--new_primary", reparentTablet.Alias)
   684  				require.NoError(t, err, "failed PRS: %v", err)
   685  				rs := onlineddl.VtgateExecQuery(t, &vtParams, "show vitess_tablets", "")
   686  				onlineddl.PrintQueryResult(os.Stdout, rs)
   687  			})
   688  			t.Run("complete and expect completion", func(t *testing.T) {
   689  				query := fmt.Sprintf("select * from _vt.vreplication where workflow ='%s'", uuid)
   690  				rs, err := reparentTablet.VttabletProcess.QueryTablet(query, "", true)
   691  				assert.NoError(t, err)
   692  				onlineddl.PrintQueryResult(os.Stdout, rs)
   693  
   694  				onlineddl.CheckCompleteAllMigrations(t, &vtParams, len(shards))
   695  
   696  				_ = onlineddl.WaitForMigrationStatus(t, &vtParams, shards, uuid, extendedMigrationWait, schema.OnlineDDLStatusComplete, schema.OnlineDDLStatusFailed)
   697  				onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusComplete)
   698  			})
   699  
   700  			t.Run("Check tablet post PRS", func(t *testing.T) {
   701  				// onlineddl.Executor will find that a vrepl migration started in a different tablet.
   702  				// it will own the tablet and will update 'tablet' column in _vt.schema_migrations with its own
   703  				// (promoted primary) tablet alias.
   704  				rs := onlineddl.ReadMigrations(t, &vtParams, uuid)
   705  				require.NotNil(t, rs)
   706  				for _, row := range rs.Named().Rows {
   707  					shard := row["shard"].ToString()
   708  					tablet := row["tablet"].ToString()
   709  
   710  					switch shard {
   711  					case "-80":
   712  						// PRS for this tablet
   713  						require.Equal(t, reparentTablet.Alias, tablet)
   714  					case "80-":
   715  						// No PRS for this tablet
   716  						require.Equal(t, shards[1].Vttablets[0].Alias, tablet)
   717  					default:
   718  						require.NoError(t, fmt.Errorf("unexpected shard name: %s", shard))
   719  					}
   720  				}
   721  
   722  				onlineddl.CheckRetryPartialMigration(t, &vtParams, uuid, 1)
   723  				// Now it should complete on the failed shard
   724  				_ = onlineddl.WaitForMigrationStatus(t, &vtParams, shards, uuid, extendedMigrationWait, schema.OnlineDDLStatusComplete)
   725  			})
   726  		})
   727  	}
   728  
   729  	t.Run("Online DROP, vtctl", func(t *testing.T) {
   730  		uuid := testOnlineDDLStatement(t, onlineDDLDropTableStatement, "online", providedUUID, providedMigrationContext, "vtctl", "", "", false)
   731  		t.Run("test ready to complete", func(t *testing.T) {
   732  			rs := onlineddl.ReadMigrations(t, &vtParams, uuid)
   733  			require.NotNil(t, rs)
   734  			for _, row := range rs.Named().Rows {
   735  				readyToComplete := row.AsInt64("ready_to_complete", 0)
   736  				assert.Equal(t, int64(1), readyToComplete)
   737  			}
   738  		})
   739  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusComplete)
   740  		onlineddl.CheckCancelMigration(t, &vtParams, shards, uuid, false)
   741  		onlineddl.CheckRetryMigration(t, &vtParams, shards, uuid, false)
   742  	})
   743  	t.Run("Online CREATE, vtctl", func(t *testing.T) {
   744  		uuid := testOnlineDDLStatement(t, onlineDDLCreateTableStatement, "vitess", providedUUID, providedMigrationContext, "vtctl", "online_ddl_create_col", "", false)
   745  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusComplete)
   746  		onlineddl.CheckCancelMigration(t, &vtParams, shards, uuid, false)
   747  		onlineddl.CheckRetryMigration(t, &vtParams, shards, uuid, false)
   748  	})
   749  	t.Run("Online DROP TABLE IF EXISTS, vtgate", func(t *testing.T) {
   750  		uuid := testOnlineDDLStatement(t, onlineDDLDropTableIfExistsStatement, "online ", providedUUID, providedMigrationContext, "vtgate", "", "", false)
   751  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusComplete)
   752  		onlineddl.CheckCancelMigration(t, &vtParams, shards, uuid, false)
   753  		onlineddl.CheckRetryMigration(t, &vtParams, shards, uuid, false)
   754  		// this table existed
   755  		checkTables(t, schema.OnlineDDLToGCUUID(uuid), 1)
   756  	})
   757  	t.Run("Online CREATE, vtctl, extra flags", func(t *testing.T) {
   758  		// the flags are meaningless to this migration. The test just validates that they don't get in the way.
   759  		uuid := testOnlineDDLStatement(t, onlineDDLCreateTableStatement, "vitess --prefer-instant-ddl --allow-zero-in-date", providedUUID, providedMigrationContext, "vtctl", "online_ddl_create_col", "", false)
   760  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusComplete)
   761  		onlineddl.CheckCancelMigration(t, &vtParams, shards, uuid, false)
   762  		onlineddl.CheckRetryMigration(t, &vtParams, shards, uuid, false)
   763  	})
   764  	t.Run("Online DROP TABLE IF EXISTS, vtgate, extra flags", func(t *testing.T) {
   765  		// the flags are meaningless to this migration. The test just validates that they don't get in the way.
   766  		uuid := testOnlineDDLStatement(t, onlineDDLDropTableIfExistsStatement, "vitess --prefer-instant-ddl --allow-zero-in-date", providedUUID, providedMigrationContext, "vtgate", "", "", false)
   767  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusComplete)
   768  		onlineddl.CheckCancelMigration(t, &vtParams, shards, uuid, false)
   769  		onlineddl.CheckRetryMigration(t, &vtParams, shards, uuid, false)
   770  		// this table existed
   771  		checkTables(t, schema.OnlineDDLToGCUUID(uuid), 1)
   772  	})
   773  	t.Run("Online DROP TABLE IF EXISTS for nonexistent table, vtgate", func(t *testing.T) {
   774  		uuid := testOnlineDDLStatement(t, onlineDDLDropTableIfExistsStatement, "online", providedUUID, providedMigrationContext, "vtgate", "", "", false)
   775  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusComplete)
   776  		onlineddl.CheckCancelMigration(t, &vtParams, shards, uuid, false)
   777  		onlineddl.CheckRetryMigration(t, &vtParams, shards, uuid, false)
   778  		// this table did not exist
   779  		checkTables(t, schema.OnlineDDLToGCUUID(uuid), 0)
   780  	})
   781  	t.Run("Online DROP TABLE IF EXISTS for nonexistent table, postponed", func(t *testing.T) {
   782  		uuid := testOnlineDDLStatement(t, onlineDDLDropTableIfExistsStatement, "vitess -postpone-completion", providedUUID, providedMigrationContext, "vtgate", "", "", false)
   783  		// Should be still queued, never promoted to 'ready'!
   784  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusQueued)
   785  		// Issue a complete and wait for successful completion
   786  		onlineddl.CheckCompleteMigration(t, &vtParams, shards, uuid, true)
   787  		// This part may take a while, because we depend on vreplicatoin polling
   788  		status := onlineddl.WaitForMigrationStatus(t, &vtParams, shards, uuid, extendedMigrationWait, schema.OnlineDDLStatusComplete, schema.OnlineDDLStatusFailed)
   789  		fmt.Printf("# Migration status (for debug purposes): <%s>\n", status)
   790  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusComplete)
   791  		onlineddl.CheckCancelMigration(t, &vtParams, shards, uuid, false)
   792  		onlineddl.CheckRetryMigration(t, &vtParams, shards, uuid, false)
   793  		// this table did not exist
   794  		checkTables(t, schema.OnlineDDLToGCUUID(uuid), 0)
   795  	})
   796  	t.Run("Online DROP TABLE for nonexistent table, expect error, vtgate", func(t *testing.T) {
   797  		uuid := testOnlineDDLStatement(t, onlineDDLDropTableStatement, "online", providedUUID, providedMigrationContext, "vtgate", "", "", false)
   798  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusFailed)
   799  		onlineddl.CheckCancelMigration(t, &vtParams, shards, uuid, false)
   800  		onlineddl.CheckRetryMigration(t, &vtParams, shards, uuid, true)
   801  	})
   802  	t.Run("Online CREATE, vtctl", func(t *testing.T) {
   803  		uuid := testOnlineDDLStatement(t, onlineDDLCreateTableStatement, "vitess", providedUUID, providedMigrationContext, "vtctl", "online_ddl_create_col", "", false)
   804  		onlineddl.CheckMigrationStatus(t, &vtParams, shards, uuid, schema.OnlineDDLStatusComplete)
   805  		onlineddl.CheckCancelMigration(t, &vtParams, shards, uuid, false)
   806  		onlineddl.CheckRetryMigration(t, &vtParams, shards, uuid, false)
   807  	})
   808  
   809  	// Technically the next test should belong in onlineddl_revert suite. But we're tking advantage of setup and functionality existing in this tets:
   810  	// - two shards as opposed to one
   811  	// - tablet throttling
   812  	t.Run("Revert a migration completed on one shard and cancelled on another", func(t *testing.T) {
   813  		// shard 0 will run normally, shard 1 will be throttled
   814  		defer unthrottleApp(shards[1].Vttablets[0], onlineDDLThrottlerAppName)
   815  		t.Run("throttle shard 1", func(t *testing.T) {
   816  			body, err := throttleApp(shards[1].Vttablets[0], onlineDDLThrottlerAppName)
   817  			assert.NoError(t, err)
   818  			assert.Contains(t, body, onlineDDLThrottlerAppName)
   819  		})
   820  
   821  		var uuid string
   822  		t.Run("run migrations, expect 1st to complete, 2nd to be running", func(t *testing.T) {
   823  			uuid = testOnlineDDLStatement(t, alterTableTrivialStatement, "vitess", providedUUID, providedMigrationContext, "vtgate", "test_val", "", true)
   824  			{
   825  				status := onlineddl.WaitForMigrationStatus(t, &vtParams, shards[:1], uuid, normalMigrationWait, schema.OnlineDDLStatusComplete, schema.OnlineDDLStatusFailed)
   826  				fmt.Printf("# Migration status (for debug purposes): <%s>\n", status)
   827  				onlineddl.CheckMigrationStatus(t, &vtParams, shards[:1], uuid, schema.OnlineDDLStatusComplete)
   828  			}
   829  			{
   830  				// shard 1 is throttled
   831  				status := onlineddl.WaitForMigrationStatus(t, &vtParams, shards[1:], uuid, normalMigrationWait, schema.OnlineDDLStatusRunning)
   832  				fmt.Printf("# Migration status (for debug purposes): <%s>\n", status)
   833  				onlineddl.CheckMigrationStatus(t, &vtParams, shards[1:], uuid, schema.OnlineDDLStatusRunning)
   834  			}
   835  		})
   836  		t.Run("check cancel migration", func(t *testing.T) {
   837  			onlineddl.CheckCancelAllMigrations(t, &vtParams, 1)
   838  		})
   839  		t.Run("unthrottle shard 1", func(t *testing.T) {
   840  			body, err := unthrottleApp(shards[1].Vttablets[0], onlineDDLThrottlerAppName)
   841  			assert.NoError(t, err)
   842  			assert.Contains(t, body, onlineDDLThrottlerAppName)
   843  		})
   844  		var revertUUID string
   845  		t.Run("issue revert migration", func(t *testing.T) {
   846  			revertQuery := fmt.Sprintf("revert vitess_migration '%s'", uuid)
   847  			rs := onlineddl.VtgateExecQuery(t, &vtParams, revertQuery, "")
   848  			require.NotNil(t, rs)
   849  			row := rs.Named().Row()
   850  			require.NotNil(t, row)
   851  			revertUUID = row.AsString("uuid", "")
   852  			assert.NotEmpty(t, revertUUID)
   853  		})
   854  		t.Run("expect one revert successful, another failed", func(t *testing.T) {
   855  			{
   856  				// shard 0 migration was complete. Revert should be successful
   857  				status := onlineddl.WaitForMigrationStatus(t, &vtParams, shards[:1], revertUUID, normalMigrationWait, schema.OnlineDDLStatusComplete, schema.OnlineDDLStatusFailed)
   858  				fmt.Printf("# Migration status (for debug purposes): <%s>\n", status)
   859  				onlineddl.CheckMigrationStatus(t, &vtParams, shards[:1], revertUUID, schema.OnlineDDLStatusComplete)
   860  			}
   861  			{
   862  				// shard 0 migration was cancelled. Revert should not be possible
   863  				status := onlineddl.WaitForMigrationStatus(t, &vtParams, shards[1:], revertUUID, normalMigrationWait, schema.OnlineDDLStatusComplete, schema.OnlineDDLStatusFailed)
   864  				fmt.Printf("# Migration status (for debug purposes): <%s>\n", status)
   865  				onlineddl.CheckMigrationStatus(t, &vtParams, shards[1:], revertUUID, schema.OnlineDDLStatusFailed)
   866  			}
   867  		})
   868  		t.Run("expect two rows in SHOW VITESS_MIGRATIONS", func(t *testing.T) {
   869  			// This validates that the shards are reflected correctly in output of SHOW VITESS_MIGRATIONS
   870  			rs := onlineddl.ReadMigrations(t, &vtParams, revertUUID)
   871  			require.NotNil(t, rs)
   872  			require.Equal(t, 2, len(rs.Rows))
   873  			for _, row := range rs.Named().Rows {
   874  				shard := row["shard"].ToString()
   875  				status := row["migration_status"].ToString()
   876  
   877  				switch shard {
   878  				case "-80":
   879  					require.Equal(t, string(schema.OnlineDDLStatusComplete), status)
   880  				case "80-":
   881  					require.Equal(t, string(schema.OnlineDDLStatusFailed), status)
   882  				default:
   883  					require.NoError(t, fmt.Errorf("unexpected shard name: %s", shard))
   884  				}
   885  			}
   886  		})
   887  	})
   888  	t.Run("summary: validate sequential migration IDs", func(t *testing.T) {
   889  		onlineddl.ValidateSequentialMigrationIDs(t, &vtParams, shards)
   890  	})
   891  }
   892  
   893  func insertRow(t *testing.T) {
   894  	insertMutex.Lock()
   895  	defer insertMutex.Unlock()
   896  
   897  	tableName := fmt.Sprintf("vt_onlineddl_test_%02d", 3)
   898  	sqlQuery := fmt.Sprintf(insertRowStatement, tableName, countInserts)
   899  	r := onlineddl.VtgateExecQuery(t, &vtParams, sqlQuery, "")
   900  	require.NotNil(t, r)
   901  	countInserts++
   902  }
   903  
   904  func insertRows(t *testing.T, count int) {
   905  	for i := 0; i < count; i++ {
   906  		insertRow(t)
   907  	}
   908  }
   909  
   910  func testRows(t *testing.T) {
   911  	insertMutex.Lock()
   912  	defer insertMutex.Unlock()
   913  
   914  	tableName := fmt.Sprintf("vt_onlineddl_test_%02d", 3)
   915  	sqlQuery := fmt.Sprintf(selectCountRowsStatement, tableName)
   916  	r := onlineddl.VtgateExecQuery(t, &vtParams, sqlQuery, "")
   917  	require.NotNil(t, r)
   918  	row := r.Named().Row()
   919  	require.NotNil(t, row)
   920  	require.Equal(t, countInserts, row.AsInt64("c", 0))
   921  }
   922  
   923  func testMigrationRowCount(t *testing.T, uuid string) {
   924  	insertMutex.Lock()
   925  	defer insertMutex.Unlock()
   926  
   927  	var totalRowsCopied uint64
   928  	// count sum of rows copied in all shards, that should be the total number of rows inserted to the table
   929  	rs := onlineddl.ReadMigrations(t, &vtParams, uuid)
   930  	require.NotNil(t, rs)
   931  	for _, row := range rs.Named().Rows {
   932  		rowsCopied := row.AsUint64("rows_copied", 0)
   933  		totalRowsCopied += rowsCopied
   934  	}
   935  	require.Equal(t, uint64(countInserts), totalRowsCopied)
   936  }
   937  
   938  func testWithInitialSchema(t *testing.T) {
   939  	// Create 4 tables
   940  	var sqlQuery = "" //nolint
   941  	for i := 0; i < totalTableCount; i++ {
   942  		sqlQuery = fmt.Sprintf(createTable, fmt.Sprintf("vt_onlineddl_test_%02d", i))
   943  		err := clusterInstance.VtctlclientProcess.ApplySchema(keyspaceName, sqlQuery)
   944  		require.Nil(t, err)
   945  	}
   946  
   947  	// Check if 4 tables are created
   948  	checkTables(t, "", totalTableCount)
   949  }
   950  
   951  // testOnlineDDLStatement runs an online DDL, ALTER statement
   952  func testOnlineDDLStatement(t *testing.T, alterStatement string, ddlStrategy string, providedUUIDList string, providedMigrationContext string, executeStrategy string, expectHint string, expectError string, skipWait bool) (uuid string) {
   953  	tableName := fmt.Sprintf("vt_onlineddl_test_%02d", 3)
   954  	sqlQuery := fmt.Sprintf(alterStatement, tableName)
   955  	if executeStrategy == "vtgate" {
   956  		row := onlineddl.VtgateExecDDL(t, &vtParams, ddlStrategy, sqlQuery, "").Named().Row()
   957  		if row != nil {
   958  			uuid = row.AsString("uuid", "")
   959  		}
   960  	} else {
   961  		params := cluster.VtctlClientParams{DDLStrategy: ddlStrategy, UUIDList: providedUUIDList, MigrationContext: providedMigrationContext}
   962  		output, err := clusterInstance.VtctlclientProcess.ApplySchemaWithOutput(keyspaceName, sqlQuery, params)
   963  		if expectError == "" {
   964  			assert.NoError(t, err)
   965  			uuid = output
   966  		} else {
   967  			assert.Error(t, err)
   968  			assert.Contains(t, output, expectError)
   969  		}
   970  	}
   971  	uuid = strings.TrimSpace(uuid)
   972  	fmt.Println("# Generated UUID (for debug purposes):")
   973  	fmt.Printf("<%s>\n", uuid)
   974  
   975  	strategySetting, err := schema.ParseDDLStrategy(ddlStrategy)
   976  	assert.NoError(t, err)
   977  
   978  	if strategySetting.Strategy.IsDirect() {
   979  		skipWait = true
   980  	}
   981  	if !skipWait {
   982  		status := onlineddl.WaitForMigrationStatus(t, &vtParams, shards, uuid, normalMigrationWait, schema.OnlineDDLStatusComplete, schema.OnlineDDLStatusFailed)
   983  		fmt.Printf("# Migration status (for debug purposes): <%s>\n", status)
   984  	}
   985  
   986  	if expectError == "" && expectHint != "" {
   987  		checkMigratedTable(t, tableName, expectHint)
   988  	}
   989  	return uuid
   990  }
   991  
   992  // checkTables checks the number of tables in the first two shards.
   993  func checkTables(t *testing.T, showTableName string, expectCount int) {
   994  	for i := range clusterInstance.Keyspaces[0].Shards {
   995  		checkTablesCount(t, clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], showTableName, expectCount)
   996  	}
   997  }
   998  
   999  // checkTablesCount checks the number of tables in the given tablet
  1000  func checkTablesCount(t *testing.T, tablet *cluster.Vttablet, showTableName string, expectCount int) {
  1001  	query := fmt.Sprintf(`show tables like '%%%s%%';`, showTableName)
  1002  	queryResult, err := tablet.VttabletProcess.QueryTablet(query, keyspaceName, true)
  1003  	require.Nil(t, err)
  1004  	assert.Equal(t, expectCount, len(queryResult.Rows))
  1005  }
  1006  
  1007  // checkMigratedTables checks the CREATE STATEMENT of a table after migration
  1008  func checkMigratedTable(t *testing.T, tableName, expectColumn string) {
  1009  	for i := range clusterInstance.Keyspaces[0].Shards {
  1010  		createStatement := getCreateTableStatement(t, clusterInstance.Keyspaces[0].Shards[i].Vttablets[0], tableName)
  1011  		assert.Contains(t, createStatement, expectColumn)
  1012  	}
  1013  }
  1014  
  1015  // getCreateTableStatement returns the CREATE TABLE statement for a given table
  1016  func getCreateTableStatement(t *testing.T, tablet *cluster.Vttablet, tableName string) (statement string) {
  1017  	queryResult, err := tablet.VttabletProcess.QueryTablet(fmt.Sprintf("show create table %s;", tableName), keyspaceName, true)
  1018  	require.Nil(t, err)
  1019  
  1020  	assert.Equal(t, len(queryResult.Rows), 1)
  1021  	assert.Equal(t, len(queryResult.Rows[0]), 2) // table name, create statement
  1022  	statement = queryResult.Rows[0][1].ToString()
  1023  	return statement
  1024  }