vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletmanager/vdiff/engine_test.go (about)

     1  /*
     2  Copyright 2022 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vdiff
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"testing"
    23  
    24  	"github.com/google/uuid"
    25  	"github.com/stretchr/testify/assert"
    26  	"github.com/stretchr/testify/require"
    27  
    28  	"vitess.io/vitess/go/mysql"
    29  	"vitess.io/vitess/go/sqltypes"
    30  	"vitess.io/vitess/go/vt/binlog/binlogplayer"
    31  	tabletmanagerdatapb "vitess.io/vitess/go/vt/proto/tabletmanagerdata"
    32  )
    33  
    34  func TestEngineOpen(t *testing.T) {
    35  	vdenv := newTestVDiffEnv(t)
    36  	defer vdenv.close()
    37  	UUID := uuid.New().String()
    38  	tests := []struct {
    39  		name  string
    40  		state VDiffState
    41  	}{
    42  		// This needs to be started, for the first time, on open
    43  		{
    44  			name:  "pending vdiff",
    45  			state: PendingState,
    46  		},
    47  		// This needs to be restarted on open as it was previously started
    48  		// but was unable to terminate normally (e.g. crash) in the previous
    49  		// engine.
    50  		{
    51  			name:  "started vdiff",
    52  			state: StartedState,
    53  		},
    54  	}
    55  
    56  	for _, tt := range tests {
    57  		t.Run(tt.name, func(t *testing.T) {
    58  			vdenv.dbClient = binlogplayer.NewMockDBClient(t)
    59  			vdenv.vde.Close() // ensure we close any open one
    60  			vdenv.vde = nil
    61  			vdenv.vde = NewTestEngine(tstenv.TopoServ, vdenv.tablets[100].tablet, vdiffDBName, vdenv.dbClientFactory, vdenv.tmClientFactory)
    62  			require.False(t, vdenv.vde.IsOpen())
    63  
    64  			initialQR := sqltypes.MakeTestResult(sqltypes.MakeTestFields(
    65  				vdiffTestCols,
    66  				vdiffTestColTypes,
    67  			),
    68  				fmt.Sprintf("1|%s|%s|%s|%s|%s|%s|%s|", UUID, vdenv.workflow, tstenv.KeyspaceName, tstenv.ShardName, vdiffDBName, tt.state, optionsJS),
    69  			)
    70  
    71  			vdenv.dbClient.ExpectRequest("select * from _vt.vdiff where state in ('started','pending')", initialQR, nil)
    72  			vdenv.dbClient.ExpectRequest("select * from _vt.vdiff where id = 1", sqltypes.MakeTestResult(sqltypes.MakeTestFields(
    73  				vdiffTestCols,
    74  				vdiffTestColTypes,
    75  			),
    76  				fmt.Sprintf("1|%s|%s|%s|%s|%s|%s|%s|", UUID, vdiffenv.workflow, tstenv.KeyspaceName, tstenv.ShardName, vdiffDBName, tt.state, optionsJS),
    77  			), nil)
    78  			vdenv.dbClient.ExpectRequest(fmt.Sprintf("select * from _vt.vreplication where workflow = '%s' and db_name = '%s'", vdiffenv.workflow, vdiffDBName), sqltypes.MakeTestResult(sqltypes.MakeTestFields(
    79  				"id|workflow|source|pos|stop_pos|max_tps|max_replication_lag|cell|tablet_types|time_updated|transaction_timestamp|state|message|db_name|rows_copied|tags|time_heartbeat|workflow_type|time_throttled|component_throttled|workflow_sub_type",
    80  				"int64|varbinary|blob|varbinary|varbinary|int64|int64|varbinary|varbinary|int64|int64|varbinary|varbinary|varbinary|int64|varbinary|int64|int64|int64|varchar|int64",
    81  			),
    82  				fmt.Sprintf("1|%s|%s|%s||9223372036854775807|9223372036854775807||PRIMARY,REPLICA|1669511347|0|Running||%s|200||1669511347|1|0||1", vdiffenv.workflow, vreplSource, vdiffSourceGtid, vdiffDBName),
    83  			), nil)
    84  
    85  			// Now let's short circuit the vdiff as we know that the open has worked as expected.
    86  			shortCircuitTestAfterQuery("update _vt.vdiff set state = 'started', last_error = '' , started_at = utc_timestamp() where id = 1", vdiffenv.dbClient)
    87  
    88  			vdenv.vde.Open(context.Background(), vdiffenv.vre)
    89  			defer vdenv.vde.Close()
    90  			assert.True(t, vdenv.vde.IsOpen())
    91  			assert.Equal(t, 1, len(vdenv.vde.controllers))
    92  			vdenv.dbClient.Wait()
    93  		})
    94  	}
    95  }
    96  
    97  // Test the full set of VDiff queries on a tablet.
    98  func TestVDiff(t *testing.T) {
    99  	vdenv := newTestVDiffEnv(t)
   100  	defer vdenv.close()
   101  	UUID := uuid.New().String()
   102  	options := &tabletmanagerdatapb.VDiffOptions{
   103  		CoreOptions: &tabletmanagerdatapb.VDiffCoreOptions{
   104  			Tables:         "t1",
   105  			TimeoutSeconds: 60,
   106  			MaxRows:        100,
   107  		},
   108  		PickerOptions: &tabletmanagerdatapb.VDiffPickerOptions{
   109  			SourceCell:  tstenv.Cells[0],
   110  			TargetCell:  tstenv.Cells[0],
   111  			TabletTypes: "primary",
   112  		},
   113  		ReportOptions: &tabletmanagerdatapb.VDiffReportOptions{
   114  			DebugQuery: false,
   115  			Format:     "json",
   116  		},
   117  	}
   118  
   119  	controllerQR := sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   120  		vdiffTestCols,
   121  		vdiffTestColTypes,
   122  	),
   123  		fmt.Sprintf("1|%s|%s|%s|%s|%s|pending|%s|", UUID, vdenv.workflow, tstenv.KeyspaceName, tstenv.ShardName, vdiffDBName, optionsJS),
   124  	)
   125  
   126  	vdenv.dbClient.ExpectRequest("select * from _vt.vdiff where id = 1", controllerQR, nil)
   127  	vdenv.dbClient.ExpectRequest(fmt.Sprintf("select * from _vt.vreplication where workflow = '%s' and db_name = '%s'", vdiffenv.workflow, vdiffDBName), sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   128  		"id|workflow|source|pos|stop_pos|max_tps|max_replication_lag|cell|tablet_types|time_updated|transaction_timestamp|state|message|db_name|rows_copied|tags|time_heartbeat|workflow_type|time_throttled|component_throttled|workflow_sub_type",
   129  		"int64|varbinary|blob|varbinary|varbinary|int64|int64|varbinary|varbinary|int64|int64|varbinary|varbinary|varbinary|int64|varbinary|int64|int64|int64|varchar|int64",
   130  	),
   131  		fmt.Sprintf("1|%s|%s|%s||9223372036854775807|9223372036854775807||PRIMARY,REPLICA|1669511347|0|Running||%s|200||1669511347|1|0||1", vdiffenv.workflow, vreplSource, vdiffSourceGtid, vdiffDBName),
   132  	), nil)
   133  	vdenv.dbClient.ExpectRequest("update _vt.vdiff set state = 'started', last_error = '' , started_at = utc_timestamp() where id = 1", singleRowAffected, nil)
   134  	vdenv.dbClient.ExpectRequest("insert into _vt.vdiff_log(vdiff_id, message) values (1, 'State changed to: started')", singleRowAffected, nil)
   135  	vdenv.dbClient.ExpectRequest(`select vdt.lastpk as lastpk, vdt.mismatch as mismatch, vdt.report as report
   136  						from _vt.vdiff as vd inner join _vt.vdiff_table as vdt on (vd.id = vdt.vdiff_id)
   137  						where vdt.vdiff_id = 1 and vdt.table_name = 't1'`, sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   138  		"lastpk|mismatch|report",
   139  		"varbinary|int64|json",
   140  	),
   141  		`fields:{name:"c1" type:INT64 table:"t1" org_table:"t1" database:"vt_customer" org_name:"c1" column_length:20 charset:63 flags:53251} rows:{lengths:1 values:"1"}|0|{}`,
   142  	), nil)
   143  	vdenv.dbClient.ExpectRequest(fmt.Sprintf("select table_name as table_name, table_rows as table_rows from INFORMATION_SCHEMA.TABLES where table_schema = '%s' and table_name in ('t1')", vdiffDBName), sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   144  		"table_name|table_rows",
   145  		"varchar|int64",
   146  	),
   147  		"t1|1",
   148  	), nil)
   149  	vdenv.dbClient.ExpectRequest(`select vdt.lastpk as lastpk, vdt.mismatch as mismatch, vdt.report as report
   150  						from _vt.vdiff as vd inner join _vt.vdiff_table as vdt on (vd.id = vdt.vdiff_id)
   151  						where vdt.vdiff_id = 1 and vdt.table_name = 't1'`, sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   152  		"lastpk|mismatch|report",
   153  		"varbinary|int64|json",
   154  	),
   155  		`fields:{name:"c1" type:INT64 table:"t1" org_table:"t1" database:"vt_customer" org_name:"c1" column_length:20 charset:63 flags:53251} rows:{lengths:1 values:"1"}|0|{"TableName": "t1", "MatchingRows": 1, "ProcessedRows": 1, "MismatchedRows": 0, "ExtraRowsSource": 0, "ExtraRowsTarget": 0}`,
   156  	), nil)
   157  
   158  	vdenv.dbClient.ExpectRequest("update _vt.vdiff_table set table_rows = 1 where vdiff_id = 1 and table_name = 't1'", singleRowAffected, nil)
   159  	vdenv.dbClient.ExpectRequest(`select vdt.lastpk as lastpk, vdt.mismatch as mismatch, vdt.report as report
   160  						from _vt.vdiff as vd inner join _vt.vdiff_table as vdt on (vd.id = vdt.vdiff_id)
   161  						where vdt.vdiff_id = 1 and vdt.table_name = 't1'`, sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   162  		"lastpk|mismatch|report",
   163  		"varbinary|int64|json",
   164  	),
   165  		`fields:{name:"c1" type:INT64 table:"t1" org_table:"t1" database:"vt_customer" org_name:"c1" column_length:20 charset:63 flags:53251} rows:{lengths:1 values:"1"}|0|{"TableName": "t1", "MatchingRows": 1, "ProcessedRows": 1, "MismatchedRows": 0, "ExtraRowsSource": 0, "ExtraRowsTarget": 0}`,
   166  	), nil)
   167  	vdenv.dbClient.ExpectRequest("update _vt.vdiff_table set state = 'started' where vdiff_id = 1 and table_name = 't1'", singleRowAffected, nil)
   168  	vdenv.dbClient.ExpectRequest(`insert into _vt.vdiff_log(vdiff_id, message) values (1, 'started: table \'t1\'')`, singleRowAffected, nil)
   169  	vdenv.dbClient.ExpectRequest(fmt.Sprintf("select id, source, pos from _vt.vreplication where workflow = '%s' and db_name = '%s'", vdiffenv.workflow, vdiffDBName), sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   170  		"id|source|pos",
   171  		"int64|varbinary|varbinary",
   172  	),
   173  		fmt.Sprintf("1|%s|%s", vreplSource, vdiffSourceGtid),
   174  	), nil)
   175  	vdenv.dbClient.ExpectRequest(`select vdt.lastpk as lastpk, vdt.mismatch as mismatch, vdt.report as report
   176  						from _vt.vdiff as vd inner join _vt.vdiff_table as vdt on (vd.id = vdt.vdiff_id)
   177  						where vdt.vdiff_id = 1 and vdt.table_name = 't1'`, sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   178  		"lastpk|mismatch|report",
   179  		"varbinary|int64|json",
   180  	),
   181  		`fields:{name:"c1" type:INT64 table:"t1" org_table:"t1" database:"vt_customer" org_name:"c1" column_length:20 charset:63 flags:53251} rows:{lengths:1 values:"1"}|0|{}`,
   182  	), nil)
   183  	vdenv.dbClient.ExpectRequest(`update _vt.vdiff_table set rows_compared = 0, report = '{\"TableName\":\"t1\",\"ProcessedRows\":0,\"MatchingRows\":0,\"MismatchedRows\":0,\"ExtraRowsSource\":0,\"ExtraRowsTarget\":0}' where vdiff_id = 1 and table_name = 't1'`, singleRowAffected, nil)
   184  	vdenv.dbClient.ExpectRequest(`update _vt.vdiff_table set state = 'completed', rows_compared = 0, report = '{\"TableName\":\"t1\",\"ProcessedRows\":0,\"MatchingRows\":0,\"MismatchedRows\":0,\"ExtraRowsSource\":0,\"ExtraRowsTarget\":0}' where vdiff_id = 1 and table_name = 't1'`, singleRowAffected, nil)
   185  	vdenv.dbClient.ExpectRequest(`insert into _vt.vdiff_log(vdiff_id, message) values (1, 'completed: table \'t1\'')`, singleRowAffected, nil)
   186  	vdenv.dbClient.ExpectRequest("update _vt.vdiff_table set state = 'completed' where vdiff_id = 1 and table_name = 't1'", singleRowAffected, nil)
   187  	vdenv.dbClient.ExpectRequest(`insert into _vt.vdiff_log(vdiff_id, message) values (1, 'completed: table \'t1\'')`, singleRowAffected, nil)
   188  	vdenv.dbClient.ExpectRequest("select table_name as table_name from _vt.vdiff_table where vdiff_id = 1 and state != 'completed'", singleRowAffected, nil)
   189  	vdenv.dbClient.ExpectRequest("update _vt.vdiff set state = 'completed', last_error = '' , completed_at = utc_timestamp() where id = 1", singleRowAffected, nil)
   190  	vdenv.dbClient.ExpectRequest("insert into _vt.vdiff_log(vdiff_id, message) values (1, 'State changed to: completed')", singleRowAffected, nil)
   191  
   192  	vdenv.vde.mu.Lock()
   193  	err := vdenv.vde.addController(controllerQR.Named().Row(), options)
   194  	vdenv.vde.mu.Unlock()
   195  	require.NoError(t, err)
   196  
   197  	vdenv.dbClient.Wait()
   198  }
   199  
   200  func TestEngineRetryErroredVDiffs(t *testing.T) {
   201  	vdenv := newTestVDiffEnv(t)
   202  	defer vdenv.close()
   203  	UUID := uuid.New().String()
   204  	expectedControllerCnt := 0
   205  	tests := []struct {
   206  		name              string
   207  		retryQueryResults *sqltypes.Result
   208  		expectRetry       bool
   209  	}{
   210  		{
   211  			name:              "nothing to retry",
   212  			retryQueryResults: noResults,
   213  		},
   214  		{
   215  			name: "non-ephemeral error",
   216  			retryQueryResults: sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   217  				vdiffTestCols,
   218  				vdiffTestColTypes,
   219  			),
   220  				fmt.Sprintf("1|%s|%s|%s|%s|%s|error|%s|%v", UUID, vdiffenv.workflow, tstenv.KeyspaceName, tstenv.ShardName, vdiffDBName, optionsJS,
   221  					mysql.NewSQLError(mysql.ERNoSuchTable, "42S02", "Table 'foo' doesn't exist")),
   222  			),
   223  		},
   224  		{
   225  			name: "ephemeral error",
   226  			retryQueryResults: sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   227  				vdiffTestCols,
   228  				vdiffTestColTypes,
   229  			),
   230  				fmt.Sprintf("1|%s|%s|%s|%s|%s|error|%s|%v", UUID, vdiffenv.workflow, tstenv.KeyspaceName, tstenv.ShardName, vdiffDBName, optionsJS,
   231  					mysql.NewSQLError(mysql.ERLockWaitTimeout, "HY000", "Lock wait timeout exceeded; try restarting transaction")),
   232  			),
   233  			expectRetry: true,
   234  		},
   235  	}
   236  
   237  	for _, tt := range tests {
   238  		t.Run(tt.name, func(t *testing.T) {
   239  			vdiffenv.dbClient.ExpectRequest("select * from _vt.vdiff where state = 'error' and json_unquote(json_extract(options, '$.core_options.auto_retry')) = 'true'", tt.retryQueryResults, nil)
   240  
   241  			// Right now this only supports a single row as with multiple rows we have
   242  			// multiple controllers in separate goroutines and the order is not
   243  			// guaranteed. If we want to support multiple rows here then we'll need to
   244  			// switch to using the queryhistory package. That will also require building
   245  			// out that package to support MockDBClient and its Expect* functions
   246  			// (query+results+err) as right now it only supports a real DBClient and
   247  			// checks for query execution.
   248  			for _, row := range tt.retryQueryResults.Rows {
   249  				id := row[0].ToString()
   250  				if tt.expectRetry {
   251  					vdiffenv.dbClient.ExpectRequestRE("update _vt.vdiff as vd left join _vt.vdiff_table as vdt on \\(vd.id = vdt.vdiff_id\\) set vd.state = 'pending'.*", singleRowAffected, nil)
   252  					vdiffenv.dbClient.ExpectRequest(fmt.Sprintf("select * from _vt.vdiff where id = %s", id), sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   253  						vdiffTestCols,
   254  						vdiffTestColTypes,
   255  					),
   256  						fmt.Sprintf("%s|%s|%s|%s|%s|%s|pending|%s|", id, UUID, vdiffenv.workflow, tstenv.KeyspaceName, tstenv.ShardName, vdiffDBName, optionsJS),
   257  					), nil)
   258  					vdiffenv.dbClient.ExpectRequest(fmt.Sprintf("select * from _vt.vreplication where workflow = '%s' and db_name = '%s'", vdiffenv.workflow, vdiffDBName), sqltypes.MakeTestResult(sqltypes.MakeTestFields(
   259  						"id|workflow|source|pos|stop_pos|max_tps|max_replication_lag|cell|tablet_types|time_updated|transaction_timestamp|state|message|db_name|rows_copied|tags|time_heartbeat|workflow_type|time_throttled|component_throttled|workflow_sub_type",
   260  						"int64|varbinary|blob|varbinary|varbinary|int64|int64|varbinary|varbinary|int64|int64|varbinary|varbinary|varbinary|int64|varbinary|int64|int64|int64|varchar|int64",
   261  					),
   262  						fmt.Sprintf("%s|%s|%s|%s||9223372036854775807|9223372036854775807||PRIMARY,REPLICA|1669511347|0|Running||%s|200||1669511347|1|0||1", id, vdiffenv.workflow, vreplSource, vdiffSourceGtid, vdiffDBName),
   263  					), nil)
   264  
   265  					// At this point we know that we kicked off the expected retry so we can short circit the vdiff.
   266  					shortCircuitTestAfterQuery(fmt.Sprintf("update _vt.vdiff set state = 'started', last_error = '' , started_at = utc_timestamp() where id = %s", id), vdiffenv.dbClient)
   267  
   268  					expectedControllerCnt++
   269  				}
   270  			}
   271  
   272  			err := vdiffenv.vde.retryVDiffs(vdiffenv.vde.ctx)
   273  			assert.NoError(t, err)
   274  			assert.Equal(t, expectedControllerCnt, len(vdiffenv.vde.controllers))
   275  			vdiffenv.dbClient.Wait()
   276  		})
   277  	}
   278  
   279  }