vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletmanager/vdiff/workflow_differ.go (about)

     1  /*
     2  Copyright 2022 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vdiff
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"reflect"
    23  	"strings"
    24  
    25  	"google.golang.org/protobuf/encoding/prototext"
    26  
    27  	"vitess.io/vitess/go/vt/binlog/binlogplayer"
    28  
    29  	"vitess.io/vitess/go/vt/schema"
    30  
    31  	"vitess.io/vitess/go/vt/key"
    32  	"vitess.io/vitess/go/vt/log"
    33  	binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata"
    34  	querypb "vitess.io/vitess/go/vt/proto/query"
    35  	tabletmanagerdatapb "vitess.io/vitess/go/vt/proto/tabletmanagerdata"
    36  	vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
    37  	"vitess.io/vitess/go/vt/sqlparser"
    38  	"vitess.io/vitess/go/vt/vtctl/schematools"
    39  	"vitess.io/vitess/go/vt/vterrors"
    40  	"vitess.io/vitess/go/vt/vttablet/tabletmanager/vreplication"
    41  )
    42  
    43  // workflowDiffer has metadata and state for the vdiff of a single workflow on this tablet
    44  // only one vdiff can be running for a workflow at any time.
    45  type workflowDiffer struct {
    46  	ct *controller
    47  
    48  	tableDiffers map[string]*tableDiffer // key is table name
    49  	opts         *tabletmanagerdatapb.VDiffOptions
    50  }
    51  
    52  func newWorkflowDiffer(ct *controller, opts *tabletmanagerdatapb.VDiffOptions) (*workflowDiffer, error) {
    53  	wd := &workflowDiffer{
    54  		ct:           ct,
    55  		opts:         opts,
    56  		tableDiffers: make(map[string]*tableDiffer, 1),
    57  	}
    58  	return wd, nil
    59  }
    60  
    61  // If the only difference is the order in which the rows were returned
    62  // by MySQL on each side then we'll have the same number of extras on
    63  // both sides. If that's the case, then let's see if the extra rows on
    64  // both sides are actually different.
    65  func (wd *workflowDiffer) reconcileExtraRows(dr *DiffReport, maxExtraRowsToCompare int64) {
    66  	if (dr.ExtraRowsSource == dr.ExtraRowsTarget) && (dr.ExtraRowsSource <= maxExtraRowsToCompare) {
    67  		for i := range dr.ExtraRowsSourceDiffs {
    68  			foundMatch := false
    69  			for j := range dr.ExtraRowsTargetDiffs {
    70  				if reflect.DeepEqual(dr.ExtraRowsSourceDiffs[i], dr.ExtraRowsTargetDiffs[j]) {
    71  					dr.ExtraRowsSourceDiffs = append(dr.ExtraRowsSourceDiffs[:i], dr.ExtraRowsSourceDiffs[i+1:]...)
    72  					dr.ExtraRowsSource--
    73  					dr.ExtraRowsTargetDiffs = append(dr.ExtraRowsTargetDiffs[:j], dr.ExtraRowsTargetDiffs[j+1:]...)
    74  					dr.ExtraRowsTarget--
    75  					dr.ProcessedRows--
    76  					dr.MatchingRows++
    77  					foundMatch = true
    78  					break
    79  				}
    80  			}
    81  			// If we didn't find a match then the tables are in fact different and we can short circuit the second pass
    82  			if !foundMatch {
    83  				break
    84  			}
    85  		}
    86  	}
    87  	// We can now trim the extra rows diffs on both sides to the maxVDiffReportSampleRows value
    88  	if len(dr.ExtraRowsSourceDiffs) > maxVDiffReportSampleRows {
    89  		dr.ExtraRowsSourceDiffs = dr.ExtraRowsSourceDiffs[:maxVDiffReportSampleRows-1]
    90  	}
    91  	if len(dr.ExtraRowsTargetDiffs) > maxVDiffReportSampleRows {
    92  		dr.ExtraRowsTargetDiffs = dr.ExtraRowsTargetDiffs[:maxVDiffReportSampleRows-1]
    93  	}
    94  }
    95  
    96  func (wd *workflowDiffer) diffTable(ctx context.Context, dbClient binlogplayer.DBClient, td *tableDiffer) error {
    97  	select {
    98  	case <-ctx.Done():
    99  		return vterrors.Errorf(vtrpcpb.Code_CANCELED, "context has expired")
   100  	default:
   101  	}
   102  
   103  	log.Infof("Starting differ on table %s for vdiff %s", td.table.Name, wd.ct.uuid)
   104  	if err := td.updateTableState(ctx, dbClient, StartedState); err != nil {
   105  		return err
   106  	}
   107  	if err := td.initialize(ctx); err != nil {
   108  		return err
   109  	}
   110  	log.Infof("Table initialization done on table %s for vdiff %s", td.table.Name, wd.ct.uuid)
   111  	dr, err := td.diff(ctx, wd.opts.CoreOptions.MaxRows, wd.opts.ReportOptions.DebugQuery, wd.opts.ReportOptions.OnlyPks, wd.opts.CoreOptions.MaxExtraRowsToCompare)
   112  	if err != nil {
   113  		log.Errorf("Encountered an error diffing table %s for vdiff %s: %v", td.table.Name, wd.ct.uuid, err)
   114  		return err
   115  	}
   116  	log.Infof("Table diff done on table %s for vdiff %s with report: %+v", td.table.Name, wd.ct.uuid, dr)
   117  	if dr.ExtraRowsSource > 0 || dr.ExtraRowsTarget > 0 {
   118  		wd.reconcileExtraRows(dr, wd.opts.CoreOptions.MaxExtraRowsToCompare)
   119  	}
   120  
   121  	if dr.MismatchedRows > 0 || dr.ExtraRowsTarget > 0 || dr.ExtraRowsSource > 0 {
   122  		if err := updateTableMismatch(dbClient, wd.ct.id, td.table.Name); err != nil {
   123  			return err
   124  		}
   125  	}
   126  
   127  	log.Infof("Completed reconciliation on table %s for vdiff %s with updated report: %+v", td.table.Name, wd.ct.uuid, dr)
   128  	if err := td.updateTableStateAndReport(ctx, dbClient, CompletedState, dr); err != nil {
   129  		return err
   130  	}
   131  	return nil
   132  }
   133  
   134  func (wd *workflowDiffer) diff(ctx context.Context) error {
   135  	dbClient := wd.ct.dbClientFactory()
   136  	if err := dbClient.Connect(); err != nil {
   137  		return err
   138  	}
   139  	defer dbClient.Close()
   140  
   141  	select {
   142  	case <-ctx.Done():
   143  		return vterrors.Errorf(vtrpcpb.Code_CANCELED, "context has expired")
   144  	default:
   145  	}
   146  
   147  	filter := wd.ct.filter
   148  	req := &tabletmanagerdatapb.GetSchemaRequest{}
   149  	schm, err := schematools.GetSchema(ctx, wd.ct.ts, wd.ct.tmc, wd.ct.vde.thisTablet.Alias, req)
   150  	if err != nil {
   151  		return vterrors.Wrap(err, "GetSchema")
   152  	}
   153  	if err = wd.buildPlan(dbClient, filter, schm); err != nil {
   154  		return vterrors.Wrap(err, "buildPlan")
   155  	}
   156  	if err := wd.initVDiffTables(dbClient); err != nil {
   157  		return err
   158  	}
   159  	for _, td := range wd.tableDiffers {
   160  		select {
   161  		case <-ctx.Done():
   162  			return vterrors.Errorf(vtrpcpb.Code_CANCELED, "context has expired")
   163  		default:
   164  		}
   165  		query := fmt.Sprintf(sqlGetVDiffTable, wd.ct.id, encodeString(td.table.Name))
   166  		qr, err := dbClient.ExecuteFetch(query, 1)
   167  		if err != nil {
   168  			return err
   169  		}
   170  		if len(qr.Rows) == 0 {
   171  			return fmt.Errorf("no vdiff table found for %s on tablet %v",
   172  				td.table.Name, wd.ct.vde.thisTablet.Alias)
   173  		}
   174  
   175  		log.Infof("Starting diff of table %s for vdiff %s", td.table.Name, wd.ct.uuid)
   176  		if err := wd.diffTable(ctx, dbClient, td); err != nil {
   177  			if err := td.updateTableState(ctx, dbClient, ErrorState); err != nil {
   178  				return err
   179  			}
   180  			insertVDiffLog(ctx, dbClient, wd.ct.id, fmt.Sprintf("Table %s Error: %s", td.table.Name, err))
   181  			return err
   182  		}
   183  		if err := td.updateTableState(ctx, dbClient, CompletedState); err != nil {
   184  			return err
   185  		}
   186  		log.Infof("Completed diff of table %s for vdiff %s", td.table.Name, wd.ct.uuid)
   187  	}
   188  	if err := wd.markIfCompleted(ctx, dbClient); err != nil {
   189  		return err
   190  	}
   191  	return nil
   192  }
   193  
   194  func (wd *workflowDiffer) markIfCompleted(ctx context.Context, dbClient binlogplayer.DBClient) error {
   195  	query := fmt.Sprintf(sqlGetIncompleteTables, wd.ct.id)
   196  	qr, err := dbClient.ExecuteFetch(query, -1)
   197  	if err != nil {
   198  		return err
   199  	}
   200  
   201  	// Double check to be sure all of the individual table diffs completed without error
   202  	// before marking the vdiff as completed.
   203  	if len(qr.Rows) == 0 {
   204  		if err := wd.ct.updateState(dbClient, CompletedState, nil); err != nil {
   205  			return err
   206  		}
   207  	}
   208  	return nil
   209  }
   210  
   211  func (wd *workflowDiffer) buildPlan(dbClient binlogplayer.DBClient, filter *binlogdatapb.Filter, schm *tabletmanagerdatapb.SchemaDefinition) error {
   212  	var specifiedTables []string
   213  	optTables := strings.TrimSpace(wd.opts.CoreOptions.Tables)
   214  	if optTables != "" {
   215  		specifiedTables = strings.Split(optTables, ",")
   216  	}
   217  
   218  	for _, table := range schm.TableDefinitions {
   219  		// if user specified tables explicitly only use those, otherwise diff all tables in workflow
   220  		if len(specifiedTables) != 0 && !stringListContains(specifiedTables, table.Name) {
   221  			continue
   222  		}
   223  		if schema.IsInternalOperationTableName(table.Name) {
   224  			continue
   225  		}
   226  		rule, err := vreplication.MatchTable(table.Name, filter)
   227  		if err != nil {
   228  			return err
   229  		}
   230  		if rule == nil || rule.Filter == "exclude" {
   231  			continue
   232  		}
   233  		sourceQuery := rule.Filter
   234  		switch {
   235  		case rule.Filter == "":
   236  			buf := sqlparser.NewTrackedBuffer(nil)
   237  			buf.Myprintf("select * from %v", sqlparser.NewIdentifierCS(table.Name))
   238  			sourceQuery = buf.String()
   239  		case key.IsKeyRange(rule.Filter):
   240  			buf := sqlparser.NewTrackedBuffer(nil)
   241  			buf.Myprintf("select * from %v where in_keyrange(%v)", sqlparser.NewIdentifierCS(table.Name), sqlparser.NewStrLiteral(rule.Filter))
   242  			sourceQuery = buf.String()
   243  		}
   244  
   245  		td := newTableDiffer(wd, table, sourceQuery)
   246  		lastpkpb, err := wd.getTableLastPK(dbClient, table.Name)
   247  		if err != nil {
   248  			return err
   249  		}
   250  		td.lastPK = lastpkpb
   251  		wd.tableDiffers[table.Name] = td
   252  		if _, err := td.buildTablePlan(); err != nil {
   253  			return err
   254  		}
   255  	}
   256  	if len(wd.tableDiffers) == 0 {
   257  		return fmt.Errorf("no tables found to diff, %s:%s, on tablet %v",
   258  			optTables, specifiedTables, wd.ct.vde.thisTablet.Alias)
   259  	}
   260  	return nil
   261  }
   262  
   263  // getTableLastPK gets the lastPK protobuf message for a given vdiff table.
   264  func (wd *workflowDiffer) getTableLastPK(dbClient binlogplayer.DBClient, tableName string) (*querypb.QueryResult, error) {
   265  	query := fmt.Sprintf(sqlGetVDiffTable, wd.ct.id, encodeString(tableName))
   266  	qr, err := dbClient.ExecuteFetch(query, 1)
   267  	if err != nil {
   268  		return nil, err
   269  	}
   270  	if len(qr.Rows) == 1 {
   271  		var lastpk []byte
   272  		if lastpk, err = qr.Named().Row().ToBytes("lastpk"); err != nil {
   273  			return nil, err
   274  		}
   275  		if len(lastpk) != 0 {
   276  			var lastpkpb querypb.QueryResult
   277  			if err := prototext.Unmarshal(lastpk, &lastpkpb); err != nil {
   278  				return nil, err
   279  			}
   280  			return &lastpkpb, nil
   281  		}
   282  	}
   283  	return nil, nil
   284  }
   285  
   286  func (wd *workflowDiffer) initVDiffTables(dbClient binlogplayer.DBClient) error {
   287  	tableIn := strings.Builder{}
   288  	n := 0
   289  	for tableName := range wd.tableDiffers {
   290  		tableIn.WriteString(encodeString(tableName))
   291  		if n++; n < len(wd.tableDiffers) {
   292  			tableIn.WriteByte(',')
   293  		}
   294  	}
   295  	query := fmt.Sprintf(sqlGetAllTableRows, encodeString(wd.ct.vde.dbName), tableIn.String())
   296  	isqr, err := dbClient.ExecuteFetch(query, -1)
   297  	if err != nil {
   298  		return err
   299  	}
   300  	for _, row := range isqr.Named().Rows {
   301  		tableName, _ := row.ToString("table_name")
   302  		tableRows, _ := row.ToInt64("table_rows")
   303  
   304  		query := fmt.Sprintf(sqlGetVDiffTable, wd.ct.id, encodeString(tableName))
   305  		qr, err := dbClient.ExecuteFetch(query, -1)
   306  		if err != nil {
   307  			return err
   308  		}
   309  		if len(qr.Rows) == 0 {
   310  			query = fmt.Sprintf(sqlNewVDiffTable, wd.ct.id, encodeString(tableName), tableRows)
   311  		} else if len(qr.Rows) == 1 {
   312  			query = fmt.Sprintf(sqlUpdateTableRows, tableRows, wd.ct.id, encodeString(tableName))
   313  		} else {
   314  			return fmt.Errorf("invalid state found for vdiff table %s for vdiff_id %d on tablet %s",
   315  				tableName, wd.ct.id, wd.ct.vde.thisTablet.Alias)
   316  		}
   317  		if _, err := dbClient.ExecuteFetch(query, 1); err != nil {
   318  			return err
   319  		}
   320  	}
   321  	return nil
   322  }