vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletmanager/vdiff/table_differ.go (about) 1 /* 2 Copyright 2022 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package vdiff 18 19 import ( 20 "context" 21 "encoding/json" 22 "fmt" 23 "sync" 24 "time" 25 26 "vitess.io/vitess/go/vt/proto/topodata" 27 vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" 28 "vitess.io/vitess/go/vt/sqlparser" 29 "vitess.io/vitess/go/vt/topo" 30 31 "google.golang.org/protobuf/encoding/prototext" 32 "google.golang.org/protobuf/proto" 33 34 "vitess.io/vitess/go/mysql" 35 "vitess.io/vitess/go/mysql/collations" 36 "vitess.io/vitess/go/sqltypes" 37 "vitess.io/vitess/go/vt/binlog/binlogplayer" 38 "vitess.io/vitess/go/vt/concurrency" 39 "vitess.io/vitess/go/vt/discovery" 40 "vitess.io/vitess/go/vt/log" 41 binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" 42 querypb "vitess.io/vitess/go/vt/proto/query" 43 tabletmanagerdatapb "vitess.io/vitess/go/vt/proto/tabletmanagerdata" 44 "vitess.io/vitess/go/vt/topo/topoproto" 45 "vitess.io/vitess/go/vt/vterrors" 46 "vitess.io/vitess/go/vt/vtgate/engine" 47 "vitess.io/vitess/go/vt/vtgate/evalengine" 48 "vitess.io/vitess/go/vt/vttablet/tabletconn" 49 ) 50 51 // how long to wait for background operations to complete 52 var BackgroundOperationTimeout = topo.RemoteOperationTimeout * 4 53 54 // compareColInfo contains the metadata for a column of the table being diffed 55 type compareColInfo struct { 56 colIndex int // index of the column in the filter's select 57 collation collations.Collation // is the collation of the column, if any 58 isPK bool // is this column part of the primary key 59 colName string 60 } 61 62 // tableDiffer performs a diff for one table in the workflow. 63 type tableDiffer struct { 64 wd *workflowDiffer 65 tablePlan *tablePlan 66 67 // sourcePrimitive and targetPrimitive are used for streaming 68 sourcePrimitive engine.Primitive 69 targetPrimitive engine.Primitive 70 71 // sourceQuery is computed from the associated query for this table in the vreplication workflow's Rule Filter 72 sourceQuery string 73 table *tabletmanagerdatapb.TableDefinition 74 lastPK *querypb.QueryResult 75 } 76 77 func newTableDiffer(wd *workflowDiffer, table *tabletmanagerdatapb.TableDefinition, sourceQuery string) *tableDiffer { 78 return &tableDiffer{wd: wd, table: table, sourceQuery: sourceQuery} 79 } 80 81 // initialize 82 func (td *tableDiffer) initialize(ctx context.Context) error { 83 vdiffEngine := td.wd.ct.vde 84 vdiffEngine.snapshotMu.Lock() 85 defer vdiffEngine.snapshotMu.Unlock() 86 87 dbClient := td.wd.ct.dbClientFactory() 88 if err := dbClient.Connect(); err != nil { 89 return err 90 } 91 defer dbClient.Close() 92 93 targetKeyspace := td.wd.ct.vde.thisTablet.Keyspace 94 log.Infof("Locking target keyspace %s", targetKeyspace) 95 ctx, unlock, lockErr := td.wd.ct.ts.LockKeyspace(ctx, targetKeyspace, "vdiff") 96 if lockErr != nil { 97 log.Errorf("LockKeyspace failed: %v", lockErr) 98 return lockErr 99 } 100 101 var err error 102 defer func() { 103 unlock(&err) 104 if err != nil { 105 log.Errorf("UnlockKeyspace %s failed: %v", targetKeyspace, lockErr) 106 } 107 }() 108 109 if err := td.stopTargetVReplicationStreams(ctx, dbClient); err != nil { 110 return err 111 } 112 defer func() { 113 // We use a new context as we want to reset the state even 114 // when the parent context has timed out or been canceled. 115 log.Infof("Restarting the %q VReplication workflow on target tablets in keyspace %q", 116 td.wd.ct.workflow, targetKeyspace) 117 restartCtx, restartCancel := context.WithTimeout(context.Background(), BackgroundOperationTimeout) 118 defer restartCancel() 119 if err := td.restartTargetVReplicationStreams(restartCtx); err != nil { 120 log.Errorf("error restarting target streams: %v", err) 121 } 122 }() 123 124 if err := td.selectTablets(ctx, td.wd.opts.PickerOptions.SourceCell, td.wd.opts.PickerOptions.TabletTypes); err != nil { 125 return err 126 } 127 if err := td.syncSourceStreams(ctx); err != nil { 128 return err 129 } 130 if err := td.startSourceDataStreams(ctx); err != nil { 131 return err 132 } 133 if err := td.syncTargetStreams(ctx); err != nil { 134 return err 135 } 136 if err := td.startTargetDataStream(ctx); err != nil { 137 return err 138 } 139 td.setupRowSorters() 140 return nil 141 } 142 143 func (td *tableDiffer) stopTargetVReplicationStreams(ctx context.Context, dbClient binlogplayer.DBClient) error { 144 log.Infof("stopTargetVReplicationStreams") 145 ct := td.wd.ct 146 query := fmt.Sprintf("update _vt.vreplication set state = 'Stopped', message='for vdiff' %s", ct.workflowFilter) 147 if _, err := ct.vde.vre.Exec(query); err != nil { 148 return err 149 } 150 // streams are no longer running because vre.Exec would have replaced old controllers and new ones will not start 151 152 // update position of all source streams 153 query = fmt.Sprintf("select id, source, pos from _vt.vreplication %s", ct.workflowFilter) 154 qr, err := dbClient.ExecuteFetch(query, -1) 155 if err != nil { 156 return err 157 } 158 for _, row := range qr.Named().Rows { 159 id, _ := row["id"].ToInt64() 160 pos := row["pos"].ToString() 161 mpos, err := binlogplayer.DecodePosition(pos) 162 if err != nil { 163 return err 164 } 165 if mpos.IsZero() { 166 return fmt.Errorf("stream %d has not started on tablet %v", 167 id, td.wd.ct.vde.thisTablet.Alias) 168 } 169 sourceBytes, err := row["source"].ToBytes() 170 if err != nil { 171 return err 172 } 173 var bls binlogdatapb.BinlogSource 174 if err := prototext.Unmarshal(sourceBytes, &bls); err != nil { 175 return err 176 } 177 ct.sources[bls.Shard].position = mpos 178 } 179 180 return nil 181 } 182 183 func (td *tableDiffer) forEachSource(cb func(source *migrationSource) error) error { 184 ct := td.wd.ct 185 var wg sync.WaitGroup 186 allErrors := &concurrency.AllErrorRecorder{} 187 for _, source := range ct.sources { 188 wg.Add(1) 189 go func(source *migrationSource) { 190 defer wg.Done() 191 log.Flush() 192 if err := cb(source); err != nil { 193 allErrors.RecordError(err) 194 } 195 }(source) 196 } 197 wg.Wait() 198 199 return allErrors.AggrError(vterrors.Aggregate) 200 } 201 202 func (td *tableDiffer) selectTablets(ctx context.Context, cell, tabletTypes string) error { 203 var wg sync.WaitGroup 204 ct := td.wd.ct 205 var err1, err2 error 206 207 // For Mount+Migrate, the source tablets will be in a different 208 // Vitess cluster with its own TopoServer. 209 sourceTopoServer := ct.ts 210 if ct.externalCluster != "" { 211 extTS, err := ct.ts.OpenExternalVitessClusterServer(ctx, ct.externalCluster) 212 if err != nil { 213 return err 214 } 215 sourceTopoServer = extTS 216 } 217 wg.Add(1) 218 go func() { 219 defer wg.Done() 220 err1 = td.forEachSource(func(source *migrationSource) error { 221 tablet, err := pickTablet(ctx, sourceTopoServer, cell, ct.sourceKeyspace, source.shard, tabletTypes) 222 if err != nil { 223 return err 224 } 225 source.tablet = tablet 226 return nil 227 }) 228 }() 229 230 wg.Add(1) 231 go func() { 232 defer wg.Done() 233 tablet, err2 := pickTablet(ctx, ct.ts, td.wd.opts.PickerOptions.TargetCell, ct.vde.thisTablet.Keyspace, 234 ct.vde.thisTablet.Shard, td.wd.opts.PickerOptions.TabletTypes) 235 if err2 != nil { 236 return 237 } 238 ct.targetShardStreamer = &shardStreamer{ 239 tablet: tablet, 240 shard: tablet.Shard, 241 } 242 }() 243 244 wg.Wait() 245 if err1 != nil { 246 return err1 247 } 248 return err2 249 } 250 251 func pickTablet(ctx context.Context, ts *topo.Server, cell, keyspace, shard, tabletTypes string) (*topodata.Tablet, error) { 252 tp, err := discovery.NewTabletPicker(ts, []string{cell}, keyspace, shard, tabletTypes) 253 if err != nil { 254 return nil, err 255 } 256 return tp.PickForStreaming(ctx) 257 } 258 259 func (td *tableDiffer) syncSourceStreams(ctx context.Context) error { 260 // source can be replica, wait for them to at least reach max gtid of all target streams 261 ct := td.wd.ct 262 waitCtx, cancel := context.WithTimeout(ctx, time.Duration(ct.options.CoreOptions.TimeoutSeconds*int64(time.Second))) 263 defer cancel() 264 265 if err := td.forEachSource(func(source *migrationSource) error { 266 log.Flush() 267 if err := ct.tmc.WaitForPosition(waitCtx, source.tablet, mysql.EncodePosition(source.position)); err != nil { 268 return vterrors.Wrapf(err, "WaitForPosition for tablet %v", topoproto.TabletAliasString(source.tablet.Alias)) 269 } 270 return nil 271 }); err != nil { 272 return err 273 } 274 return nil 275 } 276 277 func (td *tableDiffer) syncTargetStreams(ctx context.Context) error { 278 ct := td.wd.ct 279 waitCtx, cancel := context.WithTimeout(ctx, time.Duration(ct.options.CoreOptions.TimeoutSeconds*int64(time.Second))) 280 defer cancel() 281 282 if err := td.forEachSource(func(source *migrationSource) error { 283 query := fmt.Sprintf("update _vt.vreplication set state='Running', stop_pos='%s', message='synchronizing for vdiff' where id=%d", 284 source.snapshotPosition, source.vrID) 285 if _, err := ct.tmc.VReplicationExec(waitCtx, ct.vde.thisTablet, query); err != nil { 286 return err 287 } 288 if err := ct.vde.vre.WaitForPos(waitCtx, int(source.vrID), source.snapshotPosition); err != nil { 289 log.Errorf("WaitForPosition error: %d: %s", source.vrID, err) 290 return vterrors.Wrapf(err, "WaitForPosition for stream id %d", source.vrID) 291 } 292 return nil 293 }); err != nil { 294 return err 295 } 296 return nil 297 } 298 299 func (td *tableDiffer) startTargetDataStream(ctx context.Context) error { 300 ct := td.wd.ct 301 gtidch := make(chan string, 1) 302 ct.targetShardStreamer.result = make(chan *sqltypes.Result, 1) 303 go td.streamOneShard(ctx, ct.targetShardStreamer, td.tablePlan.targetQuery, td.lastPK, gtidch) 304 gtid, ok := <-gtidch 305 if !ok { 306 log.Infof("streaming error: %v", ct.targetShardStreamer.err) 307 return ct.targetShardStreamer.err 308 } 309 ct.targetShardStreamer.snapshotPosition = gtid 310 return nil 311 } 312 313 func (td *tableDiffer) startSourceDataStreams(ctx context.Context) error { 314 if err := td.forEachSource(func(source *migrationSource) error { 315 gtidch := make(chan string, 1) 316 source.result = make(chan *sqltypes.Result, 1) 317 go td.streamOneShard(ctx, source.shardStreamer, td.tablePlan.sourceQuery, td.lastPK, gtidch) 318 319 gtid, ok := <-gtidch 320 if !ok { 321 return source.err 322 } 323 source.snapshotPosition = gtid 324 return nil 325 }); err != nil { 326 return err 327 } 328 return nil 329 } 330 331 func (td *tableDiffer) restartTargetVReplicationStreams(ctx context.Context) error { 332 ct := td.wd.ct 333 query := fmt.Sprintf("update _vt.vreplication set state='Running', message='', stop_pos='' where db_name=%s and workflow=%s", 334 encodeString(ct.vde.dbName), encodeString(ct.workflow)) 335 log.Infof("Restarting the %q VReplication workflow using %q", ct.workflow, query) 336 var err error 337 // Let's retry a few times if we get a retryable error. 338 for i := 1; i <= 3; i++ { 339 _, err := ct.tmc.VReplicationExec(ctx, ct.vde.thisTablet, query) 340 if err == nil || !mysql.IsEphemeralError(err) { 341 break 342 } 343 log.Warningf("Encountered the following error while restarting the %q VReplication workflow, will retry (attempt #%d): %v", 344 ct.workflow, i, err) 345 } 346 return err 347 } 348 349 func (td *tableDiffer) streamOneShard(ctx context.Context, participant *shardStreamer, query string, lastPK *querypb.QueryResult, gtidch chan string) { 350 log.Infof("streamOneShard Start on %s using query: %s", participant.tablet.Alias.String(), query) 351 defer func() { 352 log.Infof("streamOneShard End on %s", participant.tablet.Alias.String()) 353 close(participant.result) 354 close(gtidch) 355 }() 356 participant.err = func() error { 357 conn, err := tabletconn.GetDialer()(participant.tablet, false) 358 if err != nil { 359 return err 360 } 361 defer conn.Close(ctx) 362 363 target := &querypb.Target{ 364 Keyspace: participant.tablet.Keyspace, 365 Shard: participant.shard, 366 TabletType: participant.tablet.Type, 367 } 368 var fields []*querypb.Field 369 req := &binlogdatapb.VStreamRowsRequest{Target: target, Query: query, Lastpk: lastPK} 370 return conn.VStreamRows(ctx, req, func(vsrRaw *binlogdatapb.VStreamRowsResponse) error { 371 // We clone (deep copy) the VStreamRowsResponse -- which contains a vstream packet with N rows and 372 // their corresponding GTID position/snapshot along with the LastPK in the row set -- so that we 373 // can safely process it while the next VStreamRowsResponse message is getting prepared by the 374 // shardStreamer. Without doing this, we would have to serialize the row processing by using 375 // unbuffered channels which would present a major performance bottleneck. 376 // This need arises from the gRPC VStreamRowsResponse pooling and re-use/recycling done for 377 // gRPCQueryClient.VStreamRows() in vttablet/grpctabletconn/conn. 378 vsr := proto.Clone(vsrRaw).(*binlogdatapb.VStreamRowsResponse) 379 380 if len(fields) == 0 { 381 if len(vsr.Fields) == 0 { 382 return fmt.Errorf("did not received expected fields in response %+v on tablet %v", 383 vsr, td.wd.ct.vde.thisTablet.Alias) 384 } 385 fields = vsr.Fields 386 gtidch <- vsr.Gtid 387 } 388 if len(vsr.Rows) == 0 && len(vsr.Fields) == 0 { 389 return nil 390 } 391 p3qr := &querypb.QueryResult{ 392 Fields: fields, 393 Rows: vsr.Rows, 394 } 395 result := sqltypes.Proto3ToResult(p3qr) 396 397 // Fields should be received only once, and sent only once. 398 if vsr.Fields == nil { 399 result.Fields = nil 400 } 401 select { 402 case participant.result <- result: 403 case <-ctx.Done(): 404 return vterrors.Wrap(ctx.Err(), "VStreamRows") 405 } 406 return nil 407 }) 408 }() 409 } 410 411 func (td *tableDiffer) setupRowSorters() { 412 // combine all sources into a slice and create a merge sorter for it 413 sources := make(map[string]*shardStreamer) 414 for shard, source := range td.wd.ct.sources { 415 sources[shard] = source.shardStreamer 416 } 417 td.sourcePrimitive = newMergeSorter(sources, td.tablePlan.comparePKs) 418 419 // create a merge sorter for the target 420 targets := make(map[string]*shardStreamer) 421 targets[td.wd.ct.targetShardStreamer.shard] = td.wd.ct.targetShardStreamer 422 td.targetPrimitive = newMergeSorter(targets, td.tablePlan.comparePKs) 423 424 // If there were aggregate expressions, we have to re-aggregate 425 // the results, which engine.OrderedAggregate can do. 426 if len(td.tablePlan.aggregates) != 0 { 427 td.sourcePrimitive = &engine.OrderedAggregate{ 428 Aggregates: td.tablePlan.aggregates, 429 GroupByKeys: pkColsToGroupByParams(td.tablePlan.pkCols), 430 Input: td.sourcePrimitive, 431 } 432 } 433 } 434 435 func (td *tableDiffer) diff(ctx context.Context, rowsToCompare int64, debug, onlyPks bool, maxExtraRowsToCompare int64) (*DiffReport, error) { 436 dbClient := td.wd.ct.dbClientFactory() 437 if err := dbClient.Connect(); err != nil { 438 return nil, err 439 } 440 defer dbClient.Close() 441 442 // We need to continue were we left off when appropriate. This can be an 443 // auto-retry on error, or a manual retry via the resume command. 444 // Otherwise the existing state will be empty and we start from scratch. 445 query := fmt.Sprintf(sqlGetVDiffTable, td.wd.ct.id, encodeString(td.table.Name)) 446 cs, err := dbClient.ExecuteFetch(query, -1) 447 if err != nil { 448 return nil, err 449 } 450 if len(cs.Rows) == 0 { 451 return nil, fmt.Errorf("no state found for vdiff table %s for vdiff_id %d on tablet %v", 452 td.table.Name, td.wd.ct.id, td.wd.ct.vde.thisTablet.Alias) 453 } else if len(cs.Rows) > 1 { 454 return nil, fmt.Errorf("invalid state found for vdiff table %s (multiple records) for vdiff_id %d on tablet %v", 455 td.table.Name, td.wd.ct.id, td.wd.ct.vde.thisTablet.Alias) 456 } 457 curState := cs.Named().Row() 458 mismatch := curState.AsBool("mismatch", false) 459 dr := &DiffReport{} 460 if rpt := curState.AsBytes("report", []byte("{}")); json.Valid(rpt) { 461 if err = json.Unmarshal(rpt, dr); err != nil { 462 return nil, err 463 } 464 } 465 dr.TableName = td.table.Name 466 467 sourceExecutor := newPrimitiveExecutor(ctx, td.sourcePrimitive, "source") 468 targetExecutor := newPrimitiveExecutor(ctx, td.targetPrimitive, "target") 469 var sourceRow, lastProcessedRow, targetRow []sqltypes.Value 470 advanceSource := true 471 advanceTarget := true 472 473 // Save our progress when we finish the run 474 defer func() { 475 if err := td.updateTableProgress(dbClient, dr, lastProcessedRow); err != nil { 476 log.Errorf("Failed to update vdiff progress on %s table: %v", td.table.Name, err) 477 } 478 }() 479 480 for { 481 lastProcessedRow = sourceRow 482 483 select { 484 case <-ctx.Done(): 485 return nil, vterrors.Errorf(vtrpcpb.Code_CANCELED, "context has expired") 486 default: 487 } 488 489 if !mismatch && dr.MismatchedRows > 0 { 490 mismatch = true 491 log.Infof("Flagging mismatch for %s: %+v", td.table.Name, dr) 492 if err := updateTableMismatch(dbClient, td.wd.ct.id, td.table.Name); err != nil { 493 return nil, err 494 } 495 } 496 rowsToCompare-- 497 if rowsToCompare < 0 { 498 log.Infof("Stopping vdiff, specified limit reached") 499 return dr, nil 500 } 501 if advanceSource { 502 sourceRow, err = sourceExecutor.next() 503 if err != nil { 504 log.Error(err) 505 return nil, err 506 } 507 } 508 if advanceTarget { 509 targetRow, err = targetExecutor.next() 510 if err != nil { 511 log.Error(err) 512 return nil, err 513 } 514 } 515 516 if sourceRow == nil && targetRow == nil { 517 return dr, nil 518 } 519 520 advanceSource = true 521 advanceTarget = true 522 if sourceRow == nil { 523 diffRow, err := td.genRowDiff(td.tablePlan.sourceQuery, targetRow, debug, onlyPks) 524 if err != nil { 525 return nil, vterrors.Wrap(err, "unexpected error generating diff") 526 } 527 dr.ExtraRowsTargetDiffs = append(dr.ExtraRowsTargetDiffs, diffRow) 528 529 // drain target, update count 530 count, err := targetExecutor.drain(ctx) 531 if err != nil { 532 return nil, err 533 } 534 dr.ExtraRowsTarget += 1 + count 535 dr.ProcessedRows += 1 + count 536 return dr, nil 537 } 538 if targetRow == nil { 539 // no more rows from the target 540 // we know we have rows from source, drain, update count 541 diffRow, err := td.genRowDiff(td.tablePlan.sourceQuery, sourceRow, debug, onlyPks) 542 if err != nil { 543 return nil, vterrors.Wrap(err, "unexpected error generating diff") 544 } 545 dr.ExtraRowsSourceDiffs = append(dr.ExtraRowsSourceDiffs, diffRow) 546 count, err := sourceExecutor.drain(ctx) 547 if err != nil { 548 return nil, err 549 } 550 dr.ExtraRowsSource += 1 + count 551 dr.ProcessedRows += 1 + count 552 return dr, nil 553 } 554 555 dr.ProcessedRows++ 556 557 // Compare pk values. 558 c, err := td.compare(sourceRow, targetRow, td.tablePlan.comparePKs, false) 559 switch { 560 case err != nil: 561 return nil, err 562 case c < 0: 563 if dr.ExtraRowsSource < maxExtraRowsToCompare { 564 diffRow, err := td.genRowDiff(td.tablePlan.sourceQuery, sourceRow, debug, onlyPks) 565 if err != nil { 566 return nil, vterrors.Wrap(err, "unexpected error generating diff") 567 } 568 dr.ExtraRowsSourceDiffs = append(dr.ExtraRowsSourceDiffs, diffRow) 569 } 570 dr.ExtraRowsSource++ 571 advanceTarget = false 572 continue 573 case c > 0: 574 if dr.ExtraRowsTarget < maxExtraRowsToCompare { 575 diffRow, err := td.genRowDiff(td.tablePlan.targetQuery, targetRow, debug, onlyPks) 576 if err != nil { 577 return nil, vterrors.Wrap(err, "unexpected error generating diff") 578 } 579 dr.ExtraRowsTargetDiffs = append(dr.ExtraRowsTargetDiffs, diffRow) 580 } 581 dr.ExtraRowsTarget++ 582 advanceSource = false 583 continue 584 } 585 586 // c == 0 587 // Compare the non-pk values. 588 c, err = td.compare(sourceRow, targetRow, td.tablePlan.compareCols, true) 589 switch { 590 case err != nil: 591 return nil, err 592 case c != 0: 593 // We don't do a second pass to compare mismatched rows so we can cap the slice here 594 if dr.MismatchedRows < maxVDiffReportSampleRows { 595 sourceDiffRow, err := td.genRowDiff(td.tablePlan.targetQuery, sourceRow, debug, onlyPks) 596 if err != nil { 597 return nil, vterrors.Wrap(err, "unexpected error generating diff") 598 } 599 targetDiffRow, err := td.genRowDiff(td.tablePlan.targetQuery, targetRow, debug, onlyPks) 600 if err != nil { 601 return nil, vterrors.Wrap(err, "unexpected error generating diff") 602 } 603 dr.MismatchedRowsDiffs = append(dr.MismatchedRowsDiffs, &DiffMismatch{Source: sourceDiffRow, Target: targetDiffRow}) 604 } 605 dr.MismatchedRows++ 606 default: 607 dr.MatchingRows++ 608 } 609 610 // Update progress every 10,000 rows as we go along. This will allow us to provide 611 // approximate progress information but without too much overhead for when it's not 612 // needed or even desired. 613 if dr.ProcessedRows%1e4 == 0 { 614 if err := td.updateTableProgress(dbClient, dr, sourceRow); err != nil { 615 return nil, err 616 } 617 } 618 } 619 } 620 621 func (td *tableDiffer) compare(sourceRow, targetRow []sqltypes.Value, cols []compareColInfo, compareOnlyNonPKs bool) (int, error) { 622 for _, col := range cols { 623 if col.isPK && compareOnlyNonPKs { 624 continue 625 } 626 compareIndex := col.colIndex 627 var c int 628 var err error 629 var collationID collations.ID 630 // if the collation is nil or unknown, use binary collation to compare as bytes 631 if col.collation == nil { 632 collationID = collations.CollationBinaryID 633 } else { 634 collationID = col.collation.ID() 635 } 636 c, err = evalengine.NullsafeCompare(sourceRow[compareIndex], targetRow[compareIndex], collationID) 637 if err != nil { 638 return 0, err 639 } 640 if c != 0 { 641 return c, nil 642 } 643 } 644 return 0, nil 645 } 646 647 func (td *tableDiffer) updateTableProgress(dbClient binlogplayer.DBClient, dr *DiffReport, lastRow []sqltypes.Value) error { 648 if dr == nil { 649 return fmt.Errorf("cannot update progress with a nil diff report") 650 } 651 var lastPK []byte 652 var err error 653 var query string 654 rpt, err := json.Marshal(dr) 655 if err != nil { 656 return err 657 } 658 if lastRow != nil { 659 lastPK, err = td.lastPKFromRow(lastRow) 660 if err != nil { 661 return err 662 } 663 664 query = fmt.Sprintf(sqlUpdateTableProgress, dr.ProcessedRows, encodeString(string(lastPK)), encodeString(string(rpt)), td.wd.ct.id, encodeString(td.table.Name)) 665 } else { 666 query = fmt.Sprintf(sqlUpdateTableNoProgress, dr.ProcessedRows, encodeString(string(rpt)), td.wd.ct.id, encodeString(td.table.Name)) 667 } 668 if _, err := dbClient.ExecuteFetch(query, 1); err != nil { 669 return err 670 } 671 return nil 672 } 673 674 func (td *tableDiffer) updateTableState(ctx context.Context, dbClient binlogplayer.DBClient, state VDiffState) error { 675 query := fmt.Sprintf(sqlUpdateTableState, encodeString(string(state)), td.wd.ct.id, encodeString(td.table.Name)) 676 if _, err := dbClient.ExecuteFetch(query, 1); err != nil { 677 return err 678 } 679 insertVDiffLog(ctx, dbClient, td.wd.ct.id, fmt.Sprintf("%s: table %s", state, encodeString(td.table.Name))) 680 681 return nil 682 } 683 684 func (td *tableDiffer) updateTableStateAndReport(ctx context.Context, dbClient binlogplayer.DBClient, state VDiffState, dr *DiffReport) error { 685 var report string 686 if dr != nil { 687 reportJSONBytes, err := json.Marshal(dr) 688 if err != nil { 689 return err 690 } 691 report = string(reportJSONBytes) 692 } else { 693 report = "{}" 694 } 695 query := fmt.Sprintf(sqlUpdateTableStateAndReport, encodeString(string(state)), dr.ProcessedRows, encodeString(report), td.wd.ct.id, encodeString(td.table.Name)) 696 if _, err := dbClient.ExecuteFetch(query, 1); err != nil { 697 return err 698 } 699 insertVDiffLog(ctx, dbClient, td.wd.ct.id, fmt.Sprintf("%s: table %s", state, encodeString(td.table.Name))) 700 701 return nil 702 } 703 704 func updateTableMismatch(dbClient binlogplayer.DBClient, vdiffID int64, table string) error { 705 query := fmt.Sprintf(sqlUpdateTableMismatch, vdiffID, encodeString(table)) 706 if _, err := dbClient.ExecuteFetch(query, 1); err != nil { 707 return err 708 } 709 return nil 710 } 711 712 func (td *tableDiffer) lastPKFromRow(row []sqltypes.Value) ([]byte, error) { 713 pkColCnt := len(td.tablePlan.pkCols) 714 pkFields := make([]*querypb.Field, pkColCnt) 715 pkVals := make([]sqltypes.Value, pkColCnt) 716 for i, colIndex := range td.tablePlan.pkCols { 717 pkFields[i] = td.tablePlan.table.Fields[colIndex] 718 pkVals[i] = row[colIndex] 719 } 720 buf, err := prototext.Marshal(&querypb.QueryResult{ 721 Fields: pkFields, 722 Rows: []*querypb.Row{sqltypes.RowToProto3(pkVals)}, 723 }) 724 return buf, err 725 } 726 727 // If SourceTimeZone is defined in the BinlogSource (_vt.vreplication.source), the 728 // VReplication workflow would have converted the datetime columns expecting the 729 // source to have been in the SourceTimeZone and target in TargetTimeZone. We need 730 // to do the reverse conversion in VDiff before the comparison. 731 func (td *tableDiffer) adjustForSourceTimeZone(targetSelectExprs sqlparser.SelectExprs, fields map[string]querypb.Type) sqlparser.SelectExprs { 732 if td.wd.ct.sourceTimeZone == "" { 733 return targetSelectExprs 734 } 735 log.Infof("source time zone specified: %s", td.wd.ct.sourceTimeZone) 736 var newSelectExprs sqlparser.SelectExprs 737 var modified bool 738 for _, expr := range targetSelectExprs { 739 converted := false 740 switch selExpr := expr.(type) { 741 case *sqlparser.AliasedExpr: 742 if colAs, ok := selExpr.Expr.(*sqlparser.ColName); ok { 743 var convertTZFuncExpr *sqlparser.FuncExpr 744 colName := colAs.Name.Lowered() 745 fieldType := fields[colName] 746 if fieldType == querypb.Type_DATETIME { 747 convertTZFuncExpr = &sqlparser.FuncExpr{ 748 Name: sqlparser.NewIdentifierCI("convert_tz"), 749 Exprs: sqlparser.SelectExprs{ 750 expr, 751 &sqlparser.AliasedExpr{Expr: sqlparser.NewStrLiteral(td.wd.ct.targetTimeZone)}, 752 &sqlparser.AliasedExpr{Expr: sqlparser.NewStrLiteral(td.wd.ct.sourceTimeZone)}, 753 }, 754 } 755 log.Infof("converting datetime column %s using convert_tz()", colName) 756 newSelectExprs = append(newSelectExprs, &sqlparser.AliasedExpr{Expr: convertTZFuncExpr, As: colAs.Name}) 757 converted = true 758 modified = true 759 } 760 } 761 } 762 if !converted { // not datetime 763 newSelectExprs = append(newSelectExprs, expr) 764 } 765 } 766 if modified { // at least one datetime was found 767 log.Infof("Found datetime columns when SourceTimeZone was set, resetting target SelectExprs after convert_tz()") 768 return newSelectExprs 769 } 770 return targetSelectExprs 771 } 772 773 func getColumnNameForSelectExpr(selectExpression sqlparser.SelectExpr) (string, error) { 774 aliasedExpr := selectExpression.(*sqlparser.AliasedExpr) 775 expr := aliasedExpr.Expr 776 var colname string 777 switch t := expr.(type) { 778 case *sqlparser.ColName: 779 colname = t.Name.Lowered() 780 case *sqlparser.FuncExpr: // only in case datetime was converted using convert_tz() 781 colname = aliasedExpr.As.Lowered() 782 default: 783 return "", fmt.Errorf("found target SelectExpr which was neither ColName nor FuncExpr: %+v", aliasedExpr) 784 } 785 return colname, nil 786 }