vitess.io/vitess@v0.16.2/go/vt/wrangler/workflow_test.go (about)

     1  /*
     2  Copyright 2020 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package wrangler
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"regexp"
    23  	"strconv"
    24  	"strings"
    25  	"testing"
    26  	"time"
    27  
    28  	"github.com/stretchr/testify/require"
    29  
    30  	"vitess.io/vitess/go/sqltypes"
    31  	"vitess.io/vitess/go/vt/discovery"
    32  	"vitess.io/vitess/go/vt/log"
    33  	"vitess.io/vitess/go/vt/proto/topodata"
    34  	"vitess.io/vitess/go/vt/topo"
    35  	"vitess.io/vitess/go/vt/vtctl/workflow"
    36  
    37  	binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata"
    38  	tabletmanagerdatapb "vitess.io/vitess/go/vt/proto/tabletmanagerdata"
    39  )
    40  
    41  var noResult = &sqltypes.Result{}
    42  
    43  func getMoveTablesWorkflow(t *testing.T, cells, tabletTypes string) *VReplicationWorkflow {
    44  	p := &VReplicationWorkflowParams{
    45  		Workflow:                        "wf1",
    46  		SourceKeyspace:                  "sourceks",
    47  		TargetKeyspace:                  "targetks",
    48  		Tables:                          "customer,corder",
    49  		Cells:                           cells,
    50  		TabletTypes:                     tabletTypes,
    51  		MaxAllowedTransactionLagSeconds: defaultMaxAllowedTransactionLagSeconds,
    52  		OnDDL:                           binlogdatapb.OnDDLAction_name[int32(binlogdatapb.OnDDLAction_EXEC)],
    53  	}
    54  	mtwf := &VReplicationWorkflow{
    55  		workflowType: MoveTablesWorkflow,
    56  		ctx:          context.Background(),
    57  		wr:           nil,
    58  		params:       p,
    59  		ts:           nil,
    60  		ws:           nil,
    61  	}
    62  	return mtwf
    63  }
    64  
    65  func testComplete(t *testing.T, vrwf *VReplicationWorkflow) error {
    66  	_, err := vrwf.Complete()
    67  	return err
    68  }
    69  func TestReshardingWorkflowErrorsAndMisc(t *testing.T) {
    70  	mtwf := getMoveTablesWorkflow(t, "cell1,cell2", "replica,rdonly")
    71  	require.False(t, mtwf.Exists())
    72  	mtwf.ws = &workflow.State{}
    73  	require.True(t, mtwf.Exists())
    74  	require.Errorf(t, testComplete(t, mtwf), ErrWorkflowNotFullySwitched)
    75  	mtwf.ws.WritesSwitched = true
    76  	require.Errorf(t, mtwf.Cancel(), ErrWorkflowPartiallySwitched)
    77  
    78  	tabletTypes, _, err := discovery.ParseTabletTypesAndOrder(mtwf.params.TabletTypes)
    79  	require.NoError(t, err)
    80  
    81  	require.ElementsMatch(t, mtwf.getCellsAsArray(), []string{"cell1", "cell2"})
    82  	require.ElementsMatch(t, tabletTypes, []topodata.TabletType{topodata.TabletType_REPLICA, topodata.TabletType_RDONLY})
    83  	hasReplica, hasRdonly, hasPrimary, err := mtwf.parseTabletTypes()
    84  	require.NoError(t, err)
    85  	require.True(t, hasReplica)
    86  	require.True(t, hasRdonly)
    87  	require.False(t, hasPrimary)
    88  
    89  	mtwf.params.TabletTypes = "replica,rdonly,primary"
    90  	tabletTypes, _, err = discovery.ParseTabletTypesAndOrder(mtwf.params.TabletTypes)
    91  	require.NoError(t, err)
    92  	require.ElementsMatch(t, tabletTypes,
    93  		[]topodata.TabletType{topodata.TabletType_REPLICA, topodata.TabletType_RDONLY, topodata.TabletType_PRIMARY})
    94  
    95  	hasReplica, hasRdonly, hasPrimary, err = mtwf.parseTabletTypes()
    96  	require.NoError(t, err)
    97  	require.True(t, hasReplica)
    98  	require.True(t, hasRdonly)
    99  	require.True(t, hasPrimary)
   100  }
   101  
   102  func expectCanSwitchQueries(t *testing.T, tme *testMigraterEnv, keyspace, state string, currentLag int64) {
   103  	now := time.Now().Unix()
   104  	rowTemplate := "1|||||%s|vt_%s|%d|%d|0|0|||"
   105  	row := fmt.Sprintf(rowTemplate, state, keyspace, now, now-currentLag)
   106  	replicationResult := sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   107  		"id|source|pos|stop_pos|max_replication_lag|state|db_name|time_updated|transaction_timestamp|time_heartbeat|time_throttled|component_throttled|message|tags",
   108  		"int64|varchar|int64|int64|int64|varchar|varchar|int64|int64|int64|int64|varchar|varchar|varchar"),
   109  		row)
   110  	copyStateResult := sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   111  		"table|lastpk",
   112  		"varchar|varchar"),
   113  		"t1|pk1",
   114  	)
   115  
   116  	for _, db := range tme.dbTargetClients {
   117  		db.addInvariant(streamExtInfoKs2, replicationResult)
   118  
   119  		if state == "Copying" {
   120  			db.addInvariant(fmt.Sprintf(copyStateQuery, 1, 1), copyStateResult)
   121  		} else {
   122  			db.addInvariant(fmt.Sprintf(copyStateQuery, 1, 1), noResult)
   123  		}
   124  	}
   125  }
   126  
   127  // TestCanSwitch validates the logic to determine if traffic can be switched or not
   128  func TestCanSwitch(t *testing.T) {
   129  	var wf *VReplicationWorkflow
   130  	ctx := context.Background()
   131  	workflowName := "test"
   132  	p := &VReplicationWorkflowParams{
   133  		Workflow:       workflowName,
   134  		SourceKeyspace: "ks1",
   135  		TargetKeyspace: "ks2",
   136  		Tables:         "t1,t2",
   137  		Cells:          "cell1,cell2",
   138  		TabletTypes:    "replica,rdonly,primary",
   139  		Timeout:        DefaultActionTimeout,
   140  	}
   141  	tme := newTestTableMigrater(ctx, t)
   142  	defer tme.stopTablets(t)
   143  	wf, err := tme.wr.NewVReplicationWorkflow(ctx, MoveTablesWorkflow, p)
   144  	require.NoError(t, err)
   145  	expectCopyProgressQueries(t, tme)
   146  
   147  	type testCase struct {
   148  		name                  string
   149  		state                 string
   150  		streamLag, allowedLag int64 /* seconds */
   151  		expectedReason        *regexp.Regexp
   152  	}
   153  
   154  	testCases := []testCase{
   155  		{"In Copy Phase", "Copying", 0, 0, regexp.MustCompile(cannotSwitchCopyIncomplete)},
   156  		{"High Lag", "Running", 6, 5, regexp.MustCompile(strings.ReplaceAll(cannotSwitchHighLag, "%d", "(\\d+)"))},
   157  		{"Acceptable Lag", "Running", 4, 5, nil},
   158  	}
   159  	for _, tc := range testCases {
   160  		t.Run(tc.name, func(t *testing.T) {
   161  			expectCanSwitchQueries(t, tme, "ks2", tc.state, tc.streamLag)
   162  			p.MaxAllowedTransactionLagSeconds = tc.allowedLag
   163  			reason, err := wf.canSwitch("ks2", workflowName)
   164  			require.NoError(t, err)
   165  
   166  			if tc.expectedReason != nil {
   167  				require.Regexp(t, tc.expectedReason, reason)
   168  
   169  				m := tc.expectedReason.FindStringSubmatch(reason)
   170  				switch tc.expectedReason.NumSubexp() {
   171  				case 0:
   172  					// cannotSwitchCopyIncomplete, nothing else to do
   173  				case 2:
   174  					// cannotSwitchHighLag, assert streamLag > allowedLag
   175  					curLag, err := strconv.ParseInt(m[1], 10, 64)
   176  					require.NoError(t, err, "could not parse current lag %s as int", m[1])
   177  
   178  					allowedLag, err := strconv.ParseInt(m[2], 10, 64)
   179  					require.NoError(t, err, "could not parse allowed lag %s as int", m[2])
   180  
   181  					require.Greater(t, curLag, allowedLag, "current lag %d should be strictly greater than allowed lag %d (from reason %q)", curLag, allowedLag, reason)
   182  				default:
   183  					// unexpected regexp, fail loudly
   184  					require.Fail(t, "unknown reason regexp %s -- did you add a new test case?", tc.expectedReason)
   185  				}
   186  			} else {
   187  				require.Empty(t, reason, "should be able to switch, but cannot because %s", reason)
   188  			}
   189  		})
   190  	}
   191  }
   192  
   193  func TestCopyProgress(t *testing.T) {
   194  	var err error
   195  	var wf *VReplicationWorkflow
   196  	ctx := context.Background()
   197  	workflowName := "test"
   198  	p := &VReplicationWorkflowParams{
   199  		Workflow:       workflowName,
   200  		SourceKeyspace: "ks1",
   201  		TargetKeyspace: "ks2",
   202  		Tables:         "t1,t2",
   203  		Cells:          "cell1,cell2",
   204  		TabletTypes:    "replica,rdonly,primary",
   205  		Timeout:        DefaultActionTimeout,
   206  	}
   207  	tme := newTestTableMigrater(ctx, t)
   208  	defer tme.stopTablets(t)
   209  	wf, err = tme.wr.NewVReplicationWorkflow(ctx, MoveTablesWorkflow, p)
   210  	require.NoError(t, err)
   211  	require.NotNil(t, wf)
   212  	require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState())
   213  
   214  	expectCopyProgressQueries(t, tme)
   215  
   216  	var cp *CopyProgress
   217  	cp, err = wf.GetCopyProgress()
   218  	require.NoError(t, err)
   219  	log.Infof("CopyProgress is %+v,%+v", (*cp)["t1"], (*cp)["t2"])
   220  
   221  	require.Equal(t, int64(800), (*cp)["t1"].SourceRowCount)
   222  	require.Equal(t, int64(200), (*cp)["t1"].TargetRowCount)
   223  	require.Equal(t, int64(4000), (*cp)["t1"].SourceTableSize)
   224  	require.Equal(t, int64(2000), (*cp)["t1"].TargetTableSize)
   225  
   226  	require.Equal(t, int64(2000), (*cp)["t2"].SourceRowCount)
   227  	require.Equal(t, int64(400), (*cp)["t2"].TargetRowCount)
   228  	require.Equal(t, int64(4000), (*cp)["t2"].SourceTableSize)
   229  	require.Equal(t, int64(1000), (*cp)["t2"].TargetTableSize)
   230  }
   231  
   232  func expectCopyProgressQueries(t *testing.T, tme *testMigraterEnv) {
   233  	db := tme.tmeDB
   234  	query := "select distinct table_name from _vt.copy_state cs, _vt.vreplication vr where vr.id = cs.vrepl_id and vr.id = 1"
   235  	rows := []string{"t1", "t2"}
   236  	result := sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   237  		"table_name",
   238  		"varchar"),
   239  		rows...)
   240  	db.AddQuery(query, result)
   241  	query = "select distinct table_name from _vt.copy_state cs, _vt.vreplication vr where vr.id = cs.vrepl_id and vr.id = 2"
   242  	db.AddQuery(query, result)
   243  
   244  	query = "select table_name, table_rows, data_length from information_schema.tables where table_schema = 'vt_ks2' and table_name in ('t1','t2')"
   245  	result = sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   246  		"table_name|table_rows|data_length",
   247  		"varchar|int64|int64"),
   248  		"t1|100|1000",
   249  		"t2|200|500")
   250  	db.AddQuery(query, result)
   251  
   252  	query = "select table_name, table_rows, data_length from information_schema.tables where table_schema = 'vt_ks1' and table_name in ('t1','t2')"
   253  	result = sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   254  		"table_name|table_rows|data_length",
   255  		"varchar|int64|int64"),
   256  		"t1|400|2000",
   257  		"t2|1000|2000")
   258  	db.AddQuery(query, result)
   259  
   260  	for _, id := range []int{1, 2} {
   261  		query = fmt.Sprintf("select distinct 1 from _vt.copy_state cs, _vt.vreplication vr where vr.id = cs.vrepl_id and vr.id = %d", id)
   262  		result = sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   263  			"dummy",
   264  			"int64"),
   265  			"1")
   266  		db.AddQuery(query, result)
   267  	}
   268  }
   269  
   270  const defaultMaxAllowedTransactionLagSeconds = 30
   271  
   272  func TestMoveTablesV2(t *testing.T) {
   273  	ctx := context.Background()
   274  	p := &VReplicationWorkflowParams{
   275  		Workflow:                        "test",
   276  		SourceKeyspace:                  "ks1",
   277  		TargetKeyspace:                  "ks2",
   278  		Tables:                          "t1,t2",
   279  		Cells:                           "cell1,cell2",
   280  		TabletTypes:                     "REPLICA,RDONLY,PRIMARY",
   281  		Timeout:                         DefaultActionTimeout,
   282  		MaxAllowedTransactionLagSeconds: defaultMaxAllowedTransactionLagSeconds,
   283  		OnDDL:                           binlogdatapb.OnDDLAction_name[int32(binlogdatapb.OnDDLAction_STOP)],
   284  	}
   285  	tme := newTestTableMigrater(ctx, t)
   286  	defer tme.stopTablets(t)
   287  	wf, err := tme.wr.NewVReplicationWorkflow(ctx, MoveTablesWorkflow, p)
   288  	require.NoError(t, err)
   289  	require.NotNil(t, wf)
   290  	require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState())
   291  	tme.expectNoPreviousJournals()
   292  	expectMoveTablesQueries(t, tme, p)
   293  	tme.expectNoPreviousJournals()
   294  	require.NoError(t, testSwitchForward(t, wf))
   295  	require.Equal(t, WorkflowStateAllSwitched, wf.CurrentState())
   296  
   297  	tme.expectNoPreviousJournals()
   298  	tme.expectNoPreviousReverseJournals()
   299  	require.NoError(t, testReverse(t, wf))
   300  	require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState())
   301  }
   302  
   303  func validateRoutingRuleCount(ctx context.Context, t *testing.T, ts *topo.Server, cnt int) {
   304  	rr, err := ts.GetRoutingRules(ctx)
   305  	require.NoError(t, err)
   306  	require.NotNil(t, rr)
   307  	rules := rr.Rules
   308  	require.Equal(t, cnt, len(rules))
   309  }
   310  
   311  func checkIfTableExistInVSchema(ctx context.Context, t *testing.T, ts *topo.Server, keyspace string, table string) bool {
   312  	vschema, err := ts.GetVSchema(ctx, keyspace)
   313  	require.NoError(t, err)
   314  	require.NotNil(t, vschema)
   315  	_, ok := vschema.Tables[table]
   316  	return ok
   317  }
   318  
   319  func TestMoveTablesV2Complete(t *testing.T) {
   320  	ctx := context.Background()
   321  	p := &VReplicationWorkflowParams{
   322  		Workflow:                        "test",
   323  		SourceKeyspace:                  "ks1",
   324  		TargetKeyspace:                  "ks2",
   325  		Tables:                          "t1,t2",
   326  		Cells:                           "cell1,cell2",
   327  		TabletTypes:                     "replica,rdonly,primary",
   328  		Timeout:                         DefaultActionTimeout,
   329  		MaxAllowedTransactionLagSeconds: defaultMaxAllowedTransactionLagSeconds,
   330  	}
   331  	tme := newTestTableMigrater(ctx, t)
   332  	defer tme.stopTablets(t)
   333  	wf, err := tme.wr.NewVReplicationWorkflow(ctx, MoveTablesWorkflow, p)
   334  	require.NoError(t, err)
   335  	require.NotNil(t, wf)
   336  	require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState())
   337  	tme.expectNoPreviousJournals()
   338  	expectMoveTablesQueries(t, tme, p)
   339  	tme.expectNoPreviousJournals()
   340  	require.NoError(t, testSwitchForward(t, wf))
   341  	require.Equal(t, WorkflowStateAllSwitched, wf.CurrentState())
   342  
   343  	//16 rules, 8 per table t1,t2 eg: t1,t1@replica,t1@rdonly,ks1.t1,ks1.t1@replica,ks1.t1@rdonly,ks2.t1@replica,ks2.t1@rdonly
   344  	validateRoutingRuleCount(ctx, t, wf.wr.ts, 16)
   345  	require.True(t, checkIfTableExistInVSchema(ctx, t, wf.wr.ts, "ks1", "t1"))
   346  	require.True(t, checkIfTableExistInVSchema(ctx, t, wf.wr.ts, "ks1", "t2"))
   347  	require.True(t, checkIfTableExistInVSchema(ctx, t, wf.wr.ts, "ks2", "t1"))
   348  	require.True(t, checkIfTableExistInVSchema(ctx, t, wf.wr.ts, "ks2", "t2"))
   349  	require.NoError(t, testComplete(t, wf))
   350  	require.False(t, checkIfTableExistInVSchema(ctx, t, wf.wr.ts, "ks1", "t1"))
   351  	require.False(t, checkIfTableExistInVSchema(ctx, t, wf.wr.ts, "ks1", "t2"))
   352  	require.True(t, checkIfTableExistInVSchema(ctx, t, wf.wr.ts, "ks2", "t1"))
   353  	require.True(t, checkIfTableExistInVSchema(ctx, t, wf.wr.ts, "ks2", "t2"))
   354  
   355  	validateRoutingRuleCount(ctx, t, wf.wr.ts, 0)
   356  }
   357  
   358  func testSwitchForward(t *testing.T, wf *VReplicationWorkflow) error {
   359  	_, err := wf.SwitchTraffic(workflow.DirectionForward)
   360  	return err
   361  }
   362  
   363  func testReverse(t *testing.T, wf *VReplicationWorkflow) error {
   364  	_, err := wf.ReverseTraffic()
   365  	return err
   366  }
   367  
   368  func TestMoveTablesV2Partial(t *testing.T) {
   369  	ctx := context.Background()
   370  	p := &VReplicationWorkflowParams{
   371  		Workflow:                        "test",
   372  		SourceKeyspace:                  "ks1",
   373  		TargetKeyspace:                  "ks2",
   374  		Tables:                          "t1,t2",
   375  		Cells:                           "cell1,cell2",
   376  		TabletTypes:                     "replica,rdonly,primary",
   377  		Timeout:                         DefaultActionTimeout,
   378  		MaxAllowedTransactionLagSeconds: defaultMaxAllowedTransactionLagSeconds,
   379  	}
   380  	tme := newTestTableMigrater(ctx, t)
   381  	defer tme.stopTablets(t)
   382  	wf, err := tme.wr.NewVReplicationWorkflow(ctx, MoveTablesWorkflow, p)
   383  	require.NoError(t, err)
   384  	require.NotNil(t, wf)
   385  	require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState())
   386  	tme.expectNoPreviousJournals()
   387  	expectMoveTablesQueries(t, tme, p)
   388  
   389  	tme.expectNoPreviousJournals()
   390  	wf.params.TabletTypes = "RDONLY"
   391  	wf.params.Cells = "cell1"
   392  	require.NoError(t, testSwitchForward(t, wf))
   393  	require.Equal(t, "Reads partially switched. Replica not switched. Rdonly switched in cells: cell1. Writes Not Switched", wf.CurrentState())
   394  
   395  	tme.expectNoPreviousJournals()
   396  	wf.params.TabletTypes = "rdonly"
   397  	wf.params.Cells = "cell2"
   398  	require.NoError(t, testSwitchForward(t, wf))
   399  	require.Equal(t, "Reads partially switched. Replica not switched. All Rdonly Reads Switched. Writes Not Switched", wf.CurrentState())
   400  
   401  	tme.expectNoPreviousJournals()
   402  	wf.params.TabletTypes = "REPLICA"
   403  	wf.params.Cells = "cell1,cell2"
   404  	require.NoError(t, testSwitchForward(t, wf))
   405  	require.Equal(t, WorkflowStateReadsSwitched, wf.CurrentState())
   406  
   407  	tme.expectNoPreviousJournals()
   408  	wf.params.TabletTypes = "replica,rdonly"
   409  	require.NoError(t, testReverse(t, wf))
   410  	require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState())
   411  
   412  	tme.expectNoPreviousJournals()
   413  	wf.params.TabletTypes = "replica"
   414  	wf.params.Cells = "cell1"
   415  	require.NoError(t, testSwitchForward(t, wf))
   416  	require.Equal(t, "Reads partially switched. Replica switched in cells: cell1. Rdonly switched in cells: cell1. Writes Not Switched", wf.CurrentState())
   417  
   418  	tme.expectNoPreviousJournals()
   419  	wf.params.TabletTypes = "replica"
   420  	wf.params.Cells = "cell2"
   421  	require.NoError(t, testSwitchForward(t, wf))
   422  	require.Equal(t, "All Reads Switched. Writes Not Switched", wf.CurrentState())
   423  }
   424  
   425  func TestMoveTablesV2Cancel(t *testing.T) {
   426  	ctx := context.Background()
   427  	p := &VReplicationWorkflowParams{
   428  		Workflow:                        "test",
   429  		SourceKeyspace:                  "ks1",
   430  		TargetKeyspace:                  "ks2",
   431  		Tables:                          "t1,t2",
   432  		Cells:                           "cell1,cell2",
   433  		TabletTypes:                     "replica,rdonly,primary",
   434  		Timeout:                         DefaultActionTimeout,
   435  		MaxAllowedTransactionLagSeconds: defaultMaxAllowedTransactionLagSeconds,
   436  	}
   437  	tme := newTestTableMigrater(ctx, t)
   438  	defer tme.stopTablets(t)
   439  	expectMoveTablesQueries(t, tme, p)
   440  	wf, err := tme.wr.NewVReplicationWorkflow(ctx, MoveTablesWorkflow, p)
   441  	require.NoError(t, err)
   442  	require.NotNil(t, wf)
   443  	require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState())
   444  	expectMoveTablesQueries(t, tme, p)
   445  	validateRoutingRuleCount(ctx, t, wf.wr.ts, 4) // rules set up by test env
   446  
   447  	require.True(t, checkIfTableExistInVSchema(ctx, t, wf.wr.ts, "ks1", "t1"))
   448  	require.True(t, checkIfTableExistInVSchema(ctx, t, wf.wr.ts, "ks1", "t2"))
   449  	require.True(t, checkIfTableExistInVSchema(ctx, t, wf.wr.ts, "ks2", "t1"))
   450  	require.True(t, checkIfTableExistInVSchema(ctx, t, wf.wr.ts, "ks2", "t2"))
   451  
   452  	require.NoError(t, wf.Cancel())
   453  
   454  	validateRoutingRuleCount(ctx, t, wf.wr.ts, 0)
   455  
   456  	require.True(t, checkIfTableExistInVSchema(ctx, t, wf.wr.ts, "ks1", "t1"))
   457  	require.True(t, checkIfTableExistInVSchema(ctx, t, wf.wr.ts, "ks1", "t2"))
   458  	require.False(t, checkIfTableExistInVSchema(ctx, t, wf.wr.ts, "ks2", "t1"))
   459  	require.False(t, checkIfTableExistInVSchema(ctx, t, wf.wr.ts, "ks2", "t2"))
   460  }
   461  
   462  func TestReshardV2(t *testing.T) {
   463  	ctx := context.Background()
   464  	sourceShards := []string{"-40", "40-"}
   465  	targetShards := []string{"-80", "80-"}
   466  	p := &VReplicationWorkflowParams{
   467  		Workflow:                        "test",
   468  		SourceKeyspace:                  "ks",
   469  		TargetKeyspace:                  "ks",
   470  		SourceShards:                    sourceShards,
   471  		TargetShards:                    targetShards,
   472  		Cells:                           "cell1,cell2",
   473  		TabletTypes:                     "replica,rdonly,primary",
   474  		Timeout:                         DefaultActionTimeout,
   475  		MaxAllowedTransactionLagSeconds: defaultMaxAllowedTransactionLagSeconds,
   476  		OnDDL:                           binlogdatapb.OnDDLAction_name[int32(binlogdatapb.OnDDLAction_EXEC_IGNORE)],
   477  	}
   478  	tme := newTestShardMigrater(ctx, t, sourceShards, targetShards)
   479  	defer tme.stopTablets(t)
   480  	wf, err := tme.wr.NewVReplicationWorkflow(ctx, ReshardWorkflow, p)
   481  	require.NoError(t, err)
   482  	require.NotNil(t, wf)
   483  	require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState())
   484  	tme.expectNoPreviousJournals()
   485  	expectReshardQueries(t, tme, p)
   486  	tme.expectNoPreviousJournals()
   487  	require.NoError(t, testSwitchForward(t, wf))
   488  	require.Equal(t, WorkflowStateAllSwitched, wf.CurrentState())
   489  	require.NoError(t, testComplete(t, wf))
   490  	si, err := wf.wr.ts.GetShard(ctx, "ks", "-40")
   491  	require.Contains(t, err.Error(), "node doesn't exist")
   492  	require.Nil(t, si)
   493  	si, err = wf.wr.ts.GetShard(ctx, "ks", "-80")
   494  	require.NoError(t, err)
   495  	require.NotNil(t, si)
   496  }
   497  
   498  func TestVRWSchemaValidation(t *testing.T) {
   499  	ctx := context.Background()
   500  	sourceShards := []string{"-80", "80-"}
   501  	targetShards := []string{"-40", "40-80", "80-c0", "c0-"}
   502  	p := &VReplicationWorkflowParams{
   503  		Workflow:                        "test",
   504  		SourceKeyspace:                  "ks",
   505  		TargetKeyspace:                  "ks",
   506  		SourceShards:                    sourceShards,
   507  		TargetShards:                    targetShards,
   508  		Cells:                           "cell1,cell2",
   509  		TabletTypes:                     "replica,rdonly,primary",
   510  		Timeout:                         DefaultActionTimeout,
   511  		MaxAllowedTransactionLagSeconds: defaultMaxAllowedTransactionLagSeconds,
   512  	}
   513  	schm := &tabletmanagerdatapb.SchemaDefinition{
   514  		TableDefinitions: []*tabletmanagerdatapb.TableDefinition{{
   515  			Name:              "not_in_vschema",
   516  			Columns:           []string{"c1", "c2"},
   517  			PrimaryKeyColumns: []string{"c1"},
   518  			Fields:            sqltypes.MakeTestFields("c1|c2", "int64|int64"),
   519  		}},
   520  	}
   521  	tme := newTestShardMigrater(ctx, t, sourceShards, targetShards)
   522  	for _, primary := range tme.sourcePrimaries {
   523  		primary.FakeMysqlDaemon.Schema = schm
   524  	}
   525  
   526  	defer tme.stopTablets(t)
   527  	vrwf, err := tme.wr.NewVReplicationWorkflow(ctx, ReshardWorkflow, p)
   528  	vrwf.ws = nil
   529  	require.NoError(t, err)
   530  	require.NotNil(t, vrwf)
   531  	shouldErr := vrwf.Create(ctx)
   532  	require.Contains(t, shouldErr.Error(), "Create ReshardWorkflow failed: ValidateVSchema")
   533  }
   534  
   535  func TestReshardV2Cancel(t *testing.T) {
   536  	ctx := context.Background()
   537  	sourceShards := []string{"-40", "40-"}
   538  	targetShards := []string{"-80", "80-"}
   539  	p := &VReplicationWorkflowParams{
   540  		Workflow:                        "test",
   541  		SourceKeyspace:                  "ks",
   542  		TargetKeyspace:                  "ks",
   543  		SourceShards:                    sourceShards,
   544  		TargetShards:                    targetShards,
   545  		Cells:                           "cell1,cell2",
   546  		TabletTypes:                     "replica,rdonly,primary",
   547  		Timeout:                         DefaultActionTimeout,
   548  		MaxAllowedTransactionLagSeconds: defaultMaxAllowedTransactionLagSeconds,
   549  	}
   550  	tme := newTestShardMigrater(ctx, t, sourceShards, targetShards)
   551  	defer tme.stopTablets(t)
   552  	wf, err := tme.wr.NewVReplicationWorkflow(ctx, ReshardWorkflow, p)
   553  	require.NoError(t, err)
   554  	require.NotNil(t, wf)
   555  	require.Equal(t, WorkflowStateNotSwitched, wf.CurrentState())
   556  	tme.expectNoPreviousJournals()
   557  	expectReshardQueries(t, tme, p)
   558  	require.NoError(t, wf.Cancel())
   559  }
   560  
   561  func expectReshardQueries(t *testing.T, tme *testShardMigraterEnv, params *VReplicationWorkflowParams) {
   562  	vdiffDeleteQuery := fmt.Sprintf(`delete from vd, vdt, vdl using _vt.vdiff as vd inner join _vt.vdiff_table as vdt on (vd.id = vdt.vdiff_id)
   563  						inner join _vt.vdiff_log as vdl on (vd.id = vdl.vdiff_id)
   564  						where vd.keyspace = '%s' and vd.workflow = '%s'`,
   565  		params.TargetKeyspace, params.Workflow)
   566  	vdiffDeleteReverseQuery := fmt.Sprintf(`delete from vd, vdt, vdl using _vt.vdiff as vd inner join _vt.vdiff_table as vdt on (vd.id = vdt.vdiff_id)
   567  						inner join _vt.vdiff_log as vdl on (vd.id = vdl.vdiff_id)
   568  						where vd.keyspace = '%s' and vd.workflow = '%s_reverse'`,
   569  		params.SourceKeyspace, params.Workflow)
   570  
   571  	sourceQueries := []string{
   572  		"select id, workflow, source, pos, workflow_type, workflow_sub_type, defer_secondary_keys from _vt.vreplication where db_name='vt_ks' and workflow != 'test_reverse' and state = 'Stopped' and message != 'FROZEN'",
   573  		"select id, workflow, source, pos, workflow_type, workflow_sub_type, defer_secondary_keys from _vt.vreplication where db_name='vt_ks' and workflow != 'test_reverse'",
   574  	}
   575  	noResult := &sqltypes.Result{}
   576  	for _, dbclient := range tme.dbSourceClients {
   577  		for _, query := range sourceQueries {
   578  			dbclient.addInvariant(query, noResult)
   579  		}
   580  		dbclient.addInvariant("select id from _vt.vreplication where db_name = 'vt_ks' and workflow = 'test_reverse'", resultid1)
   581  		dbclient.addInvariant("delete from _vt.vreplication where id in (1)", noResult)
   582  		dbclient.addInvariant("delete from _vt.copy_state where vrepl_id in (1)", noResult)
   583  		dbclient.addInvariant("delete from _vt.post_copy_action where vrepl_id in (1)", noResult)
   584  		dbclient.addInvariant("insert into _vt.vreplication (workflow, source, pos, max_tps, max_replication_lag, time_updated, transaction_timestamp, state, db_name, workflow_type, workflow_sub_type)", &sqltypes.Result{InsertID: uint64(1)})
   585  		dbclient.addInvariant("select id from _vt.vreplication where id = 1", resultid1)
   586  		dbclient.addInvariant("select id from _vt.vreplication where id = 2", resultid2)
   587  		dbclient.addInvariant("select * from _vt.vreplication where id = 1", runningResult(1))
   588  		dbclient.addInvariant("select * from _vt.vreplication where id = 2", runningResult(2))
   589  		dbclient.addInvariant("insert into _vt.resharding_journal", noResult)
   590  		dbclient.addInvariant("alter table _vt.copy_state auto_increment = 1", noResult)
   591  	}
   592  
   593  	targetQueries := []string{
   594  		"select id, workflow, source, pos, workflow_type, workflow_sub_type, defer_secondary_keys from _vt.vreplication where db_name='vt_ks' and workflow != 'test_reverse' and state = 'Stopped' and message != 'FROZEN'",
   595  	}
   596  
   597  	for _, dbclient := range tme.dbTargetClients {
   598  		for _, query := range targetQueries {
   599  			dbclient.addInvariant(query, noResult)
   600  		}
   601  		dbclient.addInvariant("select id from _vt.vreplication where id = 1", resultid1)
   602  		dbclient.addInvariant("select id from _vt.vreplication where id = 2", resultid2)
   603  		dbclient.addInvariant("update _vt.vreplication set state = 'Stopped', message = 'stopped for cutover' where id in (1)", noResult)
   604  		dbclient.addInvariant("update _vt.vreplication set state = 'Stopped', message = 'stopped for cutover' where id in (2)", noResult)
   605  		dbclient.addInvariant("select * from _vt.vreplication where id = 1", runningResult(1))
   606  		dbclient.addInvariant("select * from _vt.vreplication where id = 2", runningResult(2))
   607  		state := sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   608  			"pos|state|message",
   609  			"varchar|varchar|varchar"),
   610  			"MariaDB/5-456-892|Running")
   611  		dbclient.addInvariant("select pos, state, message from _vt.vreplication where id=2", state)
   612  		dbclient.addInvariant("select pos, state, message from _vt.vreplication where id=1", state)
   613  		dbclient.addInvariant("select id from _vt.vreplication where db_name = 'vt_ks' and workflow = 'test'", resultid1)
   614  		dbclient.addInvariant("update _vt.vreplication set message = 'FROZEN'", noResult)
   615  		dbclient.addInvariant("delete from _vt.vreplication where id in (1)", noResult)
   616  		dbclient.addInvariant("delete from _vt.copy_state where vrepl_id in (1)", noResult)
   617  		dbclient.addInvariant("delete from _vt.post_copy_action where vrepl_id in (1)", noResult)
   618  	}
   619  	tme.tmeDB.AddQuery("USE `vt_ks`", noResult)
   620  	tme.tmeDB.AddQuery("select distinct table_name from _vt.copy_state cs, _vt.vreplication vr where vr.id = cs.vrepl_id and vr.id = 1", noResult)
   621  	tme.tmeDB.AddQuery("select distinct table_name from _vt.copy_state cs, _vt.vreplication vr where vr.id = cs.vrepl_id and vr.id = 2", noResult)
   622  	tme.tmeDB.AddQuery(vdiffDeleteQuery, noResult)
   623  	tme.tmeDB.AddQuery(vdiffDeleteReverseQuery, noResult)
   624  	tme.tmeDB.AddQuery("alter table _vt.copy_state auto_increment = 1", noResult)
   625  	tme.tmeDB.AddQuery("optimize table _vt.copy_state", noResult)
   626  }
   627  
   628  func expectMoveTablesQueries(t *testing.T, tme *testMigraterEnv, params *VReplicationWorkflowParams) {
   629  	vdiffDeleteQuery := fmt.Sprintf(`delete from vd, vdt, vdl using _vt.vdiff as vd inner join _vt.vdiff_table as vdt on (vd.id = vdt.vdiff_id)
   630  	inner join _vt.vdiff_log as vdl on (vd.id = vdl.vdiff_id)
   631  	where vd.keyspace = '%s' and vd.workflow = '%s'`,
   632  		params.TargetKeyspace, params.Workflow)
   633  	vdiffDeleteReverseQuery := fmt.Sprintf(`delete from vd, vdt, vdl using _vt.vdiff as vd inner join _vt.vdiff_table as vdt on (vd.id = vdt.vdiff_id)
   634  	inner join _vt.vdiff_log as vdl on (vd.id = vdl.vdiff_id)
   635  	where vd.keyspace = '%s' and vd.workflow = '%s_reverse'`,
   636  		params.SourceKeyspace, params.Workflow)
   637  
   638  	var query string
   639  	noResult := &sqltypes.Result{}
   640  	for _, dbclient := range tme.dbTargetClients {
   641  		query = "update _vt.vreplication set state = 'Running', message = '' where id in (1)"
   642  		dbclient.addInvariant(query, noResult)
   643  		dbclient.addInvariant("select id from _vt.vreplication where db_name = 'vt_ks2' and workflow = 'test'", resultid1)
   644  		dbclient.addInvariant("select * from _vt.vreplication where id = 1", runningResult(1))
   645  		dbclient.addInvariant("select * from _vt.vreplication where id = 2", runningResult(2))
   646  		query = "update _vt.vreplication set message='Picked source tablet: cell:\"cell1\" uid:10 ' where id=1"
   647  		dbclient.addInvariant(query, noResult)
   648  		dbclient.addInvariant("select id from _vt.vreplication where id = 1", resultid1)
   649  		dbclient.addInvariant("select id from _vt.vreplication where id = 2", resultid2)
   650  		dbclient.addInvariant("update _vt.vreplication set state = 'Stopped', message = 'stopped for cutover' where id in (1)", noResult)
   651  		dbclient.addInvariant("update _vt.vreplication set state = 'Stopped', message = 'stopped for cutover' where id in (2)", noResult)
   652  		dbclient.addInvariant("insert into _vt.vreplication (workflow, source, pos, max_tps, max_replication_lag, time_updated, transaction_timestamp, state, db_name, workflow_type, workflow_sub_type)", &sqltypes.Result{InsertID: uint64(1)})
   653  		dbclient.addInvariant("update _vt.vreplication set message = 'FROZEN'", noResult)
   654  		dbclient.addInvariant("select 1 from _vt.vreplication where db_name='vt_ks2' and workflow='test' and message!='FROZEN'", noResult)
   655  		dbclient.addInvariant("delete from _vt.vreplication where id in (1)", noResult)
   656  		dbclient.addInvariant("delete from _vt.copy_state where vrepl_id in (1)", noResult)
   657  		dbclient.addInvariant("delete from _vt.post_copy_action where vrepl_id in (1)", noResult)
   658  		dbclient.addInvariant("insert into _vt.resharding_journal", noResult)
   659  		dbclient.addInvariant("select val from _vt.resharding_journal", noResult)
   660  		dbclient.addInvariant("select id, source, message, cell, tablet_types from _vt.vreplication where workflow='test_reverse' and db_name='vt_ks1'",
   661  			sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   662  				"id|source|message|cell|tablet_types",
   663  				"int64|varchar|varchar|varchar|varchar"),
   664  				""),
   665  		)
   666  	}
   667  
   668  	for _, dbclient := range tme.dbSourceClients {
   669  		dbclient.addInvariant("select val from _vt.resharding_journal", noResult)
   670  		dbclient.addInvariant("update _vt.vreplication set message = 'FROZEN'", noResult)
   671  		dbclient.addInvariant("insert into _vt.vreplication (workflow, source, pos, max_tps, max_replication_lag, time_updated, transaction_timestamp, state, db_name, workflow_type, workflow_sub_type)", &sqltypes.Result{InsertID: uint64(1)})
   672  		dbclient.addInvariant("update _vt.vreplication set state = 'Stopped', message = 'stopped for cutover' where id in (1)", noResult)
   673  		dbclient.addInvariant("update _vt.vreplication set state = 'Stopped', message = 'stopped for cutover' where id in (2)", noResult)
   674  		dbclient.addInvariant("select id from _vt.vreplication where id = 1", resultid1)
   675  		dbclient.addInvariant("select id from _vt.vreplication where id = 2", resultid2)
   676  		dbclient.addInvariant("select id from _vt.vreplication where db_name = 'vt_ks1' and workflow = 'test_reverse'", resultid1)
   677  		dbclient.addInvariant("delete from _vt.vreplication where id in (1)", noResult)
   678  		dbclient.addInvariant("delete from _vt.copy_state where vrepl_id in (1)", noResult)
   679  		dbclient.addInvariant("delete from _vt.post_copy_action where vrepl_id in (1)", noResult)
   680  		dbclient.addInvariant("insert into _vt.vreplication (workflow, source, pos, max_tps, max_replication_lag, time_updated, transaction_timestamp, state, db_name, workflow_type, workflow_sub_type)", &sqltypes.Result{InsertID: uint64(1)})
   681  		dbclient.addInvariant("select * from _vt.vreplication where id = 1", runningResult(1))
   682  		dbclient.addInvariant("select * from _vt.vreplication where id = 2", runningResult(2))
   683  		dbclient.addInvariant("insert into _vt.resharding_journal", noResult)
   684  		dbclient.addInvariant(reverseStreamExtInfoKs1, noResult)
   685  	}
   686  	state := sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   687  		"pos|state|message",
   688  		"varchar|varchar|varchar"),
   689  		"MariaDB/5-456-892|Running",
   690  	)
   691  	tme.dbTargetClients[0].addInvariant("select pos, state, message from _vt.vreplication where id=1", state)
   692  	tme.dbTargetClients[0].addInvariant("select pos, state, message from _vt.vreplication where id=2", state)
   693  	tme.dbTargetClients[1].addInvariant("select pos, state, message from _vt.vreplication where id=1", state)
   694  	tme.dbTargetClients[1].addInvariant("select pos, state, message from _vt.vreplication where id=2", state)
   695  
   696  	state = sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   697  		"pos|state|message",
   698  		"varchar|varchar|varchar"),
   699  		"MariaDB/5-456-893|Running",
   700  	)
   701  	tme.dbSourceClients[0].addInvariant("select pos, state, message from _vt.vreplication where id=1", state)
   702  	tme.dbSourceClients[0].addInvariant("select pos, state, message from _vt.vreplication where id=2", state)
   703  	tme.dbSourceClients[1].addInvariant("select pos, state, message from _vt.vreplication where id=1", state)
   704  	tme.dbSourceClients[1].addInvariant("select pos, state, message from _vt.vreplication where id=2", state)
   705  	tme.tmeDB.AddQuery("USE `vt_ks1`", noResult)
   706  	tme.tmeDB.AddQuery("USE `vt_ks2`", noResult)
   707  	tme.tmeDB.AddQuery("drop table `vt_ks1`.`t1`", noResult)
   708  	tme.tmeDB.AddQuery("drop table `vt_ks1`.`t2`", noResult)
   709  	tme.tmeDB.AddQuery("drop table `vt_ks2`.`t1`", noResult)
   710  	tme.tmeDB.AddQuery("drop table `vt_ks2`.`t2`", noResult)
   711  	tme.tmeDB.AddQuery("update _vt.vreplication set message='Picked source tablet: cell:\"cell1\" uid:10 ' where id=1", noResult)
   712  	tme.tmeDB.AddQuery("lock tables `t1` read,`t2` read", &sqltypes.Result{})
   713  	tme.tmeDB.AddQuery("select distinct table_name from _vt.copy_state cs, _vt.vreplication vr where vr.id = cs.vrepl_id and vr.id = 1", noResult)
   714  	tme.tmeDB.AddQuery("select distinct table_name from _vt.copy_state cs, _vt.vreplication vr where vr.id = cs.vrepl_id and vr.id = 2", noResult)
   715  	tme.tmeDB.AddQuery(vdiffDeleteQuery, noResult)
   716  	tme.tmeDB.AddQuery(vdiffDeleteReverseQuery, noResult)
   717  	tme.tmeDB.AddQuery("alter table _vt.copy_state auto_increment = 1", noResult)
   718  	tme.tmeDB.AddQuery("optimize table _vt.copy_state", noResult)
   719  }