github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/pkg/checker/table_structure.go (about)

     1  // Copyright 2021 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package checker
    15  
    16  import (
    17  	"bytes"
    18  	"context"
    19  	"fmt"
    20  	"math"
    21  	"strings"
    22  	"sync"
    23  	"time"
    24  
    25  	"github.com/pingcap/errors"
    26  	"github.com/pingcap/tidb/pkg/parser"
    27  	"github.com/pingcap/tidb/pkg/parser/ast"
    28  	"github.com/pingcap/tidb/pkg/parser/mysql"
    29  	"github.com/pingcap/tidb/pkg/util/dbutil"
    30  	"github.com/pingcap/tidb/pkg/util/dbutil/dbutiltest"
    31  	"github.com/pingcap/tidb/pkg/util/filter"
    32  	"github.com/pingcap/tidb/pkg/util/schemacmp"
    33  	"github.com/pingcap/tiflow/dm/pkg/conn"
    34  	tcontext "github.com/pingcap/tiflow/dm/pkg/context"
    35  	"github.com/pingcap/tiflow/dm/pkg/log"
    36  	"github.com/pingcap/tiflow/dm/pkg/utils"
    37  	"go.uber.org/zap"
    38  	"golang.org/x/exp/maps"
    39  	"golang.org/x/sync/errgroup"
    40  )
    41  
    42  const (
    43  	// AutoIncrementKeyChecking is an identification for auto increment key checking.
    44  	AutoIncrementKeyChecking = "auto-increment key checking"
    45  )
    46  
    47  type checkItem struct {
    48  	upstreamTable   filter.Table
    49  	downstreamTable filter.Table
    50  	sourceID        string
    51  }
    52  
    53  // hold information of incompatibility option.
    54  type incompatibilityOption struct {
    55  	state       State
    56  	tableID     string
    57  	instruction string
    58  	errMessage  string
    59  }
    60  
    61  // String returns raw text of this incompatibility option.
    62  func (o *incompatibilityOption) String() string {
    63  	var text bytes.Buffer
    64  
    65  	if len(o.errMessage) > 0 {
    66  		fmt.Fprintf(&text, "information: %s\n", o.errMessage)
    67  	}
    68  
    69  	if len(o.instruction) > 0 {
    70  		fmt.Fprintf(&text, "instruction: %s\n", o.instruction)
    71  	}
    72  
    73  	return text.String()
    74  }
    75  
    76  // TablesChecker checks compatibility of table structures, there are differences between MySQL and TiDB.
    77  // In generally we need to check definitions of columns, constraints and table options.
    78  // Because of the early TiDB engineering design, we did not have a complete list of check items, which are all based on experience now.
    79  type TablesChecker struct {
    80  	upstreamDBs  map[string]*conn.BaseDB
    81  	downstreamDB *conn.BaseDB
    82  	// sourceID -> downstream table -> upstream tables
    83  	tableMap map[string]map[filter.Table][]filter.Table
    84  	// downstream table -> extended column names
    85  	extendedColumnPerTable map[filter.Table][]string
    86  	dumpThreads            int
    87  	// a simple cache for downstream table structure
    88  	// filter.Table -> *ast.CreateTableStmt
    89  	// if the value is nil, it means the downstream table is not created yet
    90  	downstreamTables sync.Map
    91  }
    92  
    93  // NewTablesChecker returns a RealChecker.
    94  func NewTablesChecker(
    95  	upstreamDBs map[string]*conn.BaseDB,
    96  	downstreamDB *conn.BaseDB,
    97  	tableMap map[string]map[filter.Table][]filter.Table,
    98  	extendedColumnPerTable map[filter.Table][]string,
    99  	dumpThreads int,
   100  ) RealChecker {
   101  	if dumpThreads == 0 {
   102  		dumpThreads = 1
   103  	}
   104  	c := &TablesChecker{
   105  		upstreamDBs:            upstreamDBs,
   106  		downstreamDB:           downstreamDB,
   107  		tableMap:               tableMap,
   108  		extendedColumnPerTable: extendedColumnPerTable,
   109  		dumpThreads:            dumpThreads,
   110  	}
   111  	log.L().Logger.Debug("check table structure", zap.Int("channel pool size", dumpThreads))
   112  	return c
   113  }
   114  
   115  type tablesCheckerWorker struct {
   116  	c                *TablesChecker
   117  	downstreamParser *parser.Parser
   118  
   119  	lastSourceID   string
   120  	upstreamParser *parser.Parser
   121  }
   122  
   123  func (w *tablesCheckerWorker) handle(ctx context.Context, checkItem *checkItem) ([]*incompatibilityOption, error) {
   124  	var (
   125  		err   error
   126  		ret   = make([]*incompatibilityOption, 0, 1)
   127  		table = checkItem.upstreamTable
   128  	)
   129  	log.L().Logger.Debug("checking table", zap.String("db", table.Schema), zap.String("table", table.Name))
   130  	if w.lastSourceID == "" || w.lastSourceID != checkItem.sourceID {
   131  		w.lastSourceID = checkItem.sourceID
   132  		w.upstreamParser, err = dbutil.GetParserForDB(ctx, w.c.upstreamDBs[w.lastSourceID].DB)
   133  		if err != nil {
   134  			return nil, err
   135  		}
   136  	}
   137  	db := w.c.upstreamDBs[checkItem.sourceID].DB
   138  	upstreamSQL, err := dbutil.GetCreateTableSQL(ctx, db, table.Schema, table.Name)
   139  	if err != nil {
   140  		// continue if table was deleted when checking
   141  		if isMySQLError(err, mysql.ErrNoSuchTable) {
   142  			return nil, nil
   143  		}
   144  		return nil, err
   145  	}
   146  
   147  	upstreamStmt, err := getCreateTableStmt(w.upstreamParser, upstreamSQL)
   148  	if err != nil {
   149  		opt := &incompatibilityOption{
   150  			state:      StateWarning,
   151  			tableID:    dbutil.TableName(table.Schema, table.Name),
   152  			errMessage: err.Error(),
   153  		}
   154  		ret = append(ret, opt)
   155  		// nolint:nilerr
   156  		return ret, nil
   157  	}
   158  
   159  	downstreamStmt, ok := w.c.downstreamTables.Load(checkItem.downstreamTable)
   160  	if !ok {
   161  		sql, err2 := dbutil.GetCreateTableSQL(
   162  			ctx,
   163  			w.c.downstreamDB.DB,
   164  			checkItem.downstreamTable.Schema,
   165  			checkItem.downstreamTable.Name,
   166  		)
   167  		if err2 != nil && !isMySQLError(err2, mysql.ErrNoSuchTable) {
   168  			return nil, err2
   169  		}
   170  		if sql == "" {
   171  			downstreamStmt = (*ast.CreateTableStmt)(nil)
   172  		} else {
   173  			downstreamStmt, err2 = getCreateTableStmt(w.downstreamParser, sql)
   174  			if err2 != nil {
   175  				opt := &incompatibilityOption{
   176  					state:      StateWarning,
   177  					tableID:    dbutil.TableName(table.Schema, table.Name),
   178  					errMessage: err2.Error(),
   179  				}
   180  				ret = append(ret, opt)
   181  			}
   182  		}
   183  		w.c.downstreamTables.Store(checkItem.downstreamTable, downstreamStmt)
   184  	}
   185  
   186  	downstreamTable := filter.Table{
   187  		Schema: checkItem.downstreamTable.Schema,
   188  		Name:   checkItem.downstreamTable.Name,
   189  	}
   190  	opts := w.c.checkAST(
   191  		upstreamStmt,
   192  		downstreamStmt.(*ast.CreateTableStmt),
   193  		w.c.extendedColumnPerTable[downstreamTable],
   194  	)
   195  	for _, opt := range opts {
   196  		opt.tableID = table.String()
   197  		ret = append(ret, opt)
   198  	}
   199  	log.L().Logger.Debug("finish checking table", zap.String("db", table.Schema), zap.String("table", table.Name))
   200  	return ret, nil
   201  }
   202  
   203  // Check implements RealChecker interface.
   204  func (c *TablesChecker) Check(ctx context.Context) *Result {
   205  	r := &Result{
   206  		Name:  c.Name(),
   207  		Desc:  "check compatibility of table structure",
   208  		State: StateSuccess,
   209  	}
   210  
   211  	startTime := time.Now()
   212  	sourceIDs := maps.Keys(c.tableMap)
   213  	concurrency, err := GetConcurrency(ctx, sourceIDs, c.upstreamDBs, c.dumpThreads)
   214  	if err != nil {
   215  		markCheckError(r, err)
   216  		return r
   217  	}
   218  
   219  	everyOptHandler, finalHandler := c.handleOpts(r)
   220  
   221  	pool := NewWorkerPoolWithContext[*checkItem, []*incompatibilityOption](
   222  		ctx, everyOptHandler,
   223  	)
   224  
   225  	for i := 0; i < concurrency; i++ {
   226  		worker := &tablesCheckerWorker{c: c}
   227  		worker.downstreamParser, err = dbutil.GetParserForDB(ctx, c.downstreamDB.DB)
   228  		if err != nil {
   229  			markCheckError(r, err)
   230  			return r
   231  		}
   232  		pool.Go(worker.handle)
   233  	}
   234  
   235  	dispatchTableItemWithDownstreamTable(c.tableMap, pool)
   236  
   237  	if err := pool.Wait(); err != nil {
   238  		markCheckError(r, err)
   239  		return r
   240  	}
   241  	finalHandler()
   242  
   243  	log.L().Logger.Info("check table structure over", zap.Duration("spend time", time.Since(startTime)))
   244  	return r
   245  }
   246  
   247  // Name implements RealChecker interface.
   248  func (c *TablesChecker) Name() string {
   249  	return "table structure compatibility check"
   250  }
   251  
   252  // handleOpts returns a handler that should be called on every
   253  // incompatibilityOption, and a second handler that should be called once after
   254  // all incompatibilityOption.
   255  func (c *TablesChecker) handleOpts(r *Result) (func(options []*incompatibilityOption), func()) {
   256  	// extract same instruction from Errors to Result.Instruction
   257  	resultInstructions := map[string]struct{}{}
   258  
   259  	return func(options []*incompatibilityOption) {
   260  			for _, opt := range options {
   261  				tableMsg := "table " + opt.tableID + " "
   262  				switch opt.state {
   263  				case StateWarning:
   264  					if r.State != StateFailure {
   265  						r.State = StateWarning
   266  					}
   267  					e := NewError(tableMsg + opt.errMessage)
   268  					e.Severity = StateWarning
   269  					if _, ok := resultInstructions[opt.instruction]; !ok && opt.instruction != "" {
   270  						resultInstructions[opt.instruction] = struct{}{}
   271  					}
   272  					r.Errors = append(r.Errors, e)
   273  				case StateFailure:
   274  					r.State = StateFailure
   275  					e := NewError(tableMsg + opt.errMessage)
   276  					if _, ok := resultInstructions[opt.instruction]; !ok && opt.instruction != "" {
   277  						resultInstructions[opt.instruction] = struct{}{}
   278  					}
   279  					r.Errors = append(r.Errors, e)
   280  				}
   281  			}
   282  		}, func() {
   283  			instructionSlice := make([]string, 0, len(resultInstructions))
   284  			for k := range resultInstructions {
   285  				instructionSlice = append(instructionSlice, k)
   286  			}
   287  			r.Instruction += strings.Join(instructionSlice, "; ")
   288  		}
   289  }
   290  
   291  func (c *TablesChecker) checkAST(
   292  	upstreamStmt *ast.CreateTableStmt,
   293  	downstreamStmt *ast.CreateTableStmt,
   294  	extendedCols []string,
   295  ) []*incompatibilityOption {
   296  	var options []*incompatibilityOption
   297  
   298  	// check columns
   299  	for _, def := range upstreamStmt.Cols {
   300  		option := c.checkColumnDef(def)
   301  		if option != nil {
   302  			options = append(options, option)
   303  		}
   304  	}
   305  	// check constrains
   306  	for _, cst := range upstreamStmt.Constraints {
   307  		option := c.checkConstraint(cst)
   308  		if option != nil {
   309  			options = append(options, option)
   310  		}
   311  	}
   312  	// check primary/unique key
   313  	hasUnique := false
   314  	for _, cst := range upstreamStmt.Constraints {
   315  		if c.checkUnique(cst) {
   316  			hasUnique = true
   317  			break
   318  		}
   319  	}
   320  	if !hasUnique {
   321  		options = append(options, &incompatibilityOption{
   322  			state:       StateWarning,
   323  			instruction: "You need to set primary/unique keys for the table. Otherwise replication efficiency might become very low and exactly-once replication cannot be guaranteed.",
   324  			errMessage:  "primary/unique key does not exist",
   325  		})
   326  	}
   327  
   328  	if downstreamStmt == nil {
   329  		if len(extendedCols) > 0 {
   330  			options = append(options, &incompatibilityOption{
   331  				state:       StateFailure,
   332  				instruction: "You need to create a table with extended columns before replication.",
   333  				errMessage:  fmt.Sprintf("upstream table %s who has extended columns %v does not exist in downstream table", upstreamStmt.Table.Name, extendedCols),
   334  			})
   335  		}
   336  		return options
   337  	}
   338  
   339  	options = append(options, c.checkTableStructurePair(upstreamStmt, downstreamStmt, extendedCols)...)
   340  	return options
   341  }
   342  
   343  func (c *TablesChecker) checkColumnDef(def *ast.ColumnDef) *incompatibilityOption {
   344  	return nil
   345  }
   346  
   347  func (c *TablesChecker) checkConstraint(cst *ast.Constraint) *incompatibilityOption {
   348  	if cst.Tp == ast.ConstraintForeignKey {
   349  		return &incompatibilityOption{
   350  			state:       StateWarning,
   351  			instruction: "TiDB does not support foreign key constraints. See the document: https://docs.pingcap.com/tidb/stable/mysql-compatibility#unsupported-features",
   352  			errMessage:  fmt.Sprintf("Foreign Key %s is parsed but ignored by TiDB.", cst.Name),
   353  		}
   354  	}
   355  
   356  	return nil
   357  }
   358  
   359  func (c *TablesChecker) checkUnique(cst *ast.Constraint) bool {
   360  	switch cst.Tp {
   361  	case ast.ConstraintPrimaryKey, ast.ConstraintUniq, ast.ConstraintUniqKey, ast.ConstraintUniqIndex:
   362  		return true
   363  	}
   364  	return false
   365  }
   366  
   367  func (c *TablesChecker) checkTableStructurePair(
   368  	upstream *ast.CreateTableStmt,
   369  	downstream *ast.CreateTableStmt,
   370  	extendedCols []string,
   371  ) []*incompatibilityOption {
   372  	//nolint: prealloc
   373  	var options []*incompatibilityOption
   374  
   375  	// check charset of upstream/downstream tables
   376  	upstreamCharset := getCharset(upstream)
   377  	downstreamCharset := getCharset(downstream)
   378  	if upstreamCharset != "" && downstreamCharset != "" &&
   379  		!strings.EqualFold(upstreamCharset, downstreamCharset) &&
   380  		!strings.EqualFold(downstreamCharset, mysql.UTF8MB4Charset) {
   381  		options = append(options, &incompatibilityOption{
   382  			state:       StateWarning,
   383  			instruction: "Ensure that you use the same charsets for both upstream and downstream databases. Different charsets might cause data inconsistency.",
   384  			errMessage: fmt.Sprintf("charset is not same, upstream: (%s %s), downstream: (%s %s)",
   385  				upstream.Table.Name.O, upstreamCharset,
   386  				downstream.Table.Name.O, downstreamCharset),
   387  		})
   388  	}
   389  
   390  	// check collation
   391  	upstreamCollation := getCollation(upstream)
   392  	downstreamCollation := getCollation(downstream)
   393  	if upstreamCollation != "" && downstreamCollation != "" &&
   394  		!strings.EqualFold(upstreamCollation, downstreamCollation) {
   395  		options = append(options, &incompatibilityOption{
   396  			state:       StateWarning,
   397  			instruction: "Ensure that you use the same collations for both upstream and downstream databases. Otherwise the query results from the two databases might be inconsistent.",
   398  			errMessage: fmt.Sprintf("collation is not same, upstream: (%s %s), downstream: (%s %s)",
   399  				upstream.Table.Name.O, upstreamCollation,
   400  				downstream.Table.Name.O, downstreamCollation),
   401  		})
   402  	}
   403  
   404  	// check PK/UK
   405  	upstreamPKUK := getPKAndUK(upstream)
   406  	downstreamPKUK := getPKAndUK(downstream)
   407  	// the number of PK/UK should be small, we use a simple but slow algorithm for now
   408  	for idxNameUp, s := range upstreamPKUK {
   409  		for idxNameDown, s2 := range downstreamPKUK {
   410  			if stringSetEqual(s, s2) {
   411  				delete(upstreamPKUK, idxNameUp)
   412  				delete(downstreamPKUK, idxNameDown)
   413  				break
   414  			}
   415  		}
   416  	}
   417  	for idxName, cols := range upstreamPKUK {
   418  		options = append(options, &incompatibilityOption{
   419  			state:       StateWarning,
   420  			instruction: "Ensure that you use the same index columns for both upstream and downstream databases. Otherwise the migration job might fail or data inconsistency might occur.",
   421  			errMessage: fmt.Sprintf("upstream has more PK or NOT NULL UK than downstream, index name: %s, columns: %v",
   422  				idxName, utils.SetToSlice(cols)),
   423  		})
   424  	}
   425  	for idxName, cols := range downstreamPKUK {
   426  		options = append(options, &incompatibilityOption{
   427  			state:       StateWarning,
   428  			instruction: "Ensure that you use the same index columns for both upstream and downstream databases. Otherwise the migration job might fail or data inconsistency might occur.",
   429  			errMessage: fmt.Sprintf("downstream has more PK or NOT NULL UK than upstream, table name: %s, index name: %s, columns: %v",
   430  				downstream.Table.Name.O, idxName, utils.SetToSlice(cols)),
   431  		})
   432  	}
   433  
   434  	// check columns
   435  	upstreamCols := getColumnsAndIgnorable(upstream)
   436  	downstreamCols := getColumnsAndIgnorable(downstream)
   437  	for col := range upstreamCols {
   438  		if _, ok := downstreamCols[col]; ok {
   439  			delete(upstreamCols, col)
   440  			delete(downstreamCols, col)
   441  		}
   442  	}
   443  
   444  	upstreamDupCols := make([]string, 0, len(extendedCols))
   445  	downstreamMissingCols := make([]string, 0, len(extendedCols))
   446  	for _, col := range extendedCols {
   447  		if _, ok := upstreamCols[col]; ok {
   448  			upstreamDupCols = append(upstreamDupCols, col)
   449  		}
   450  		if _, ok := downstreamCols[col]; !ok {
   451  			downstreamMissingCols = append(downstreamMissingCols, col)
   452  		}
   453  		delete(upstreamCols, col)
   454  	}
   455  	if len(upstreamDupCols) > 0 {
   456  		options = append(options, &incompatibilityOption{
   457  			state:       StateFailure,
   458  			instruction: "DM automatically fills the values of extended columns. You need to remove these columns or change configuration.",
   459  			errMessage:  fmt.Sprintf("upstream table must not contain extended column %v", upstreamDupCols),
   460  		})
   461  	}
   462  	if len(downstreamMissingCols) > 0 {
   463  		options = append(options, &incompatibilityOption{
   464  			state:       StateFailure,
   465  			instruction: "You need to manually add extended columns to the downstream table.",
   466  			errMessage:  fmt.Sprintf("downstream table must contain extended columns %v", downstreamMissingCols),
   467  		})
   468  	}
   469  	if len(upstreamDupCols) > 0 || len(downstreamMissingCols) > 0 {
   470  		return options
   471  	}
   472  
   473  	if len(upstreamCols) > 0 {
   474  		options = append(options, &incompatibilityOption{
   475  			state:       StateWarning,
   476  			instruction: "Ensure that the column numbers are the same between upstream and downstream databases. Otherwise the migration job may fail.",
   477  			errMessage: fmt.Sprintf("upstream has more columns than downstream, columns: %v",
   478  				maps.Keys(upstreamCols)),
   479  		})
   480  	}
   481  	for col, ignorable := range downstreamCols {
   482  		if ignorable {
   483  			delete(downstreamCols, col)
   484  		}
   485  	}
   486  	if len(downstreamCols) > 0 {
   487  		options = append(options, &incompatibilityOption{
   488  			state:       StateWarning,
   489  			instruction: "Ensure that the column numbers are the same between upstream and downstream databases. Otherwise the migration job may fail.",
   490  			errMessage: fmt.Sprintf("downstream has more columns than upstream that require values to insert records, table name: %s, columns: %v",
   491  				downstream.Table.Name.O, maps.Keys(downstreamCols)),
   492  		})
   493  	}
   494  
   495  	return options
   496  }
   497  
   498  // ShardingTablesChecker checks consistency of table structures of one sharding group
   499  // * check whether they have same column list
   500  // * check whether they have auto_increment key.
   501  type ShardingTablesChecker struct {
   502  	targetTableID                string
   503  	dbs                          map[string]*conn.BaseDB
   504  	tableMap                     map[string][]filter.Table // sourceID => {[table1, table2, ...]}
   505  	checkAutoIncrementPrimaryKey bool
   506  	firstCreateTableStmtNode     *ast.CreateTableStmt
   507  	firstTable                   filter.Table
   508  	firstSourceID                string
   509  	inCh                         chan *checkItem
   510  	reMu                         sync.Mutex
   511  	dumpThreads                  int
   512  }
   513  
   514  // NewShardingTablesChecker returns a RealChecker.
   515  func NewShardingTablesChecker(
   516  	targetTableID string,
   517  	dbs map[string]*conn.BaseDB,
   518  	tableMap map[string][]filter.Table,
   519  	checkAutoIncrementPrimaryKey bool,
   520  	dumpThreads int,
   521  ) RealChecker {
   522  	if dumpThreads == 0 {
   523  		dumpThreads = 1
   524  	}
   525  	c := &ShardingTablesChecker{
   526  		targetTableID:                targetTableID,
   527  		dbs:                          dbs,
   528  		tableMap:                     tableMap,
   529  		checkAutoIncrementPrimaryKey: checkAutoIncrementPrimaryKey,
   530  		dumpThreads:                  dumpThreads,
   531  	}
   532  	c.inCh = make(chan *checkItem, dumpThreads)
   533  
   534  	return c
   535  }
   536  
   537  // Check implements RealChecker interface.
   538  func (c *ShardingTablesChecker) Check(ctx context.Context) *Result {
   539  	r := &Result{
   540  		Name:  c.Name(),
   541  		Desc:  "check consistency of sharding table structures",
   542  		State: StateSuccess,
   543  		Extra: fmt.Sprintf("sharding %s,", c.targetTableID),
   544  	}
   545  
   546  	startTime := time.Now()
   547  	log.L().Logger.Info("start to check sharding tables")
   548  
   549  	for sourceID, tables := range c.tableMap {
   550  		c.firstSourceID = sourceID
   551  		c.firstTable = tables[0]
   552  		break
   553  	}
   554  	db, ok := c.dbs[c.firstSourceID]
   555  	if !ok {
   556  		markCheckError(r, errors.NotFoundf("client for sourceID %s", c.firstSourceID))
   557  		return r
   558  	}
   559  
   560  	p, err := dbutil.GetParserForDB(ctx, db.DB)
   561  	if err != nil {
   562  		r.Extra = fmt.Sprintf("fail to get parser for sourceID %s on sharding %s", c.firstSourceID, c.targetTableID)
   563  		markCheckError(r, err)
   564  		return r
   565  	}
   566  	r.Extra = fmt.Sprintf("sourceID %s on sharding %s", c.firstSourceID, c.targetTableID)
   567  	statement, err := dbutil.GetCreateTableSQL(ctx, db.DB, c.firstTable.Schema, c.firstTable.Name)
   568  	if err != nil {
   569  		markCheckError(r, err)
   570  		return r
   571  	}
   572  
   573  	c.firstCreateTableStmtNode, err = getCreateTableStmt(p, statement)
   574  	if err != nil {
   575  		markCheckErrorFromParser(r, err)
   576  		return r
   577  	}
   578  
   579  	sourceIDs := maps.Keys(c.tableMap)
   580  	concurrency, err := GetConcurrency(ctx, sourceIDs, c.dbs, c.dumpThreads)
   581  	if err != nil {
   582  		markCheckError(r, err)
   583  		return r
   584  	}
   585  	eg, checkCtx := errgroup.WithContext(ctx)
   586  	for i := 0; i < concurrency; i++ {
   587  		eg.Go(func() error {
   588  			return c.checkShardingTable(checkCtx, r)
   589  		})
   590  	}
   591  
   592  	dispatchTableItem(checkCtx, c.tableMap, c.inCh)
   593  	if err := eg.Wait(); err != nil {
   594  		markCheckError(r, err)
   595  	}
   596  
   597  	log.L().Logger.Info("check sharding table structure over", zap.Duration("spend time", time.Since(startTime)))
   598  	return r
   599  }
   600  
   601  func (c *ShardingTablesChecker) checkShardingTable(ctx context.Context, r *Result) error {
   602  	var (
   603  		sourceID string
   604  		p        *parser.Parser
   605  		err      error
   606  	)
   607  	for {
   608  		select {
   609  		case <-ctx.Done():
   610  			return nil
   611  		case checkItem, ok := <-c.inCh:
   612  			if !ok {
   613  				return nil
   614  			}
   615  			table := checkItem.upstreamTable
   616  			if len(sourceID) == 0 || sourceID != checkItem.sourceID {
   617  				sourceID = checkItem.sourceID
   618  				p, err = dbutil.GetParserForDB(ctx, c.dbs[sourceID].DB)
   619  				if err != nil {
   620  					c.reMu.Lock()
   621  					r.Extra = fmt.Sprintf("fail to get parser for sourceID %s on sharding %s", sourceID, c.targetTableID)
   622  					c.reMu.Unlock()
   623  					return err
   624  				}
   625  			}
   626  
   627  			statement, err := dbutil.GetCreateTableSQL(ctx, c.dbs[sourceID].DB, table.Schema, table.Name)
   628  			if err != nil {
   629  				// continue if table was deleted when checking
   630  				if isMySQLError(err, mysql.ErrNoSuchTable) {
   631  					continue
   632  				}
   633  				return err
   634  			}
   635  
   636  			ctStmt, err := getCreateTableStmt(p, statement)
   637  			if err != nil {
   638  				c.reMu.Lock()
   639  				markCheckErrorFromParser(r, err)
   640  				c.reMu.Unlock()
   641  				continue
   642  			}
   643  
   644  			if has := hasAutoIncrementKey(ctStmt); has {
   645  				c.reMu.Lock()
   646  				if r.State == StateSuccess {
   647  					r.State = StateWarning
   648  					r.Instruction = "If happen conflict, please handle it by yourself. You can refer to https://docs.pingcap.com/tidb-data-migration/stable/shard-merge-best-practices/#handle-conflicts-between-primary-keys-or-unique-indexes-across-multiple-sharded-tables"
   649  					r.Extra = AutoIncrementKeyChecking
   650  				}
   651  				r.Errors = append(r.Errors, NewError("sourceID %s table %v of sharding %s have auto-increment key, please make sure them don't conflict in target table!", sourceID, table, c.targetTableID))
   652  				c.reMu.Unlock()
   653  			}
   654  
   655  			if checkErr := c.checkConsistency(ctStmt, table.String(), sourceID); checkErr != nil {
   656  				c.reMu.Lock()
   657  				r.State = StateFailure
   658  				r.Errors = append(r.Errors, checkErr)
   659  				r.Extra = fmt.Sprintf("error on sharding %s", c.targetTableID)
   660  				r.Instruction = "please set same table structure for sharding tables"
   661  				c.reMu.Unlock()
   662  				// shouldn't return error
   663  				// it's feasible to check more sharding tables and
   664  				// able to inform users of as many as possible incompatible tables
   665  			}
   666  		}
   667  	}
   668  }
   669  
   670  func hasAutoIncrementKey(stmt *ast.CreateTableStmt) bool {
   671  	for _, col := range stmt.Cols {
   672  		for _, opt := range col.Options {
   673  			if opt.Tp == ast.ColumnOptionAutoIncrement {
   674  				return true
   675  			}
   676  		}
   677  	}
   678  	return false
   679  }
   680  
   681  type briefColumnInfo struct {
   682  	name         string
   683  	tp           string
   684  	isUniqueKey  bool
   685  	isPrimaryKey bool
   686  }
   687  
   688  func (c *briefColumnInfo) String() string {
   689  	var buf bytes.Buffer
   690  	fmt.Fprintf(&buf, "%s %s", c.name, c.tp)
   691  	if c.isPrimaryKey {
   692  		fmt.Fprintln(&buf, " primary key")
   693  	} else if c.isUniqueKey {
   694  		fmt.Fprintln(&buf, " unique key")
   695  	}
   696  
   697  	return buf.String()
   698  }
   699  
   700  type briefColumnInfos []*briefColumnInfo
   701  
   702  func (cs briefColumnInfos) String() string {
   703  	colStrs := make([]string, 0, len(cs))
   704  	for _, col := range cs {
   705  		colStrs = append(colStrs, col.String())
   706  	}
   707  
   708  	return strings.Join(colStrs, "\n")
   709  }
   710  
   711  func (c *ShardingTablesChecker) checkConsistency(other *ast.CreateTableStmt, otherTable, othersourceID string) *Error {
   712  	selfColumnList := getBriefColumnList(c.firstCreateTableStmtNode)
   713  	otherColumnList := getBriefColumnList(other)
   714  
   715  	if len(selfColumnList) != len(otherColumnList) {
   716  		e := NewError("column length mismatch (self: %d vs other: %d)", len(selfColumnList), len(otherColumnList))
   717  		getColumnNames := func(infos briefColumnInfos) []string {
   718  			ret := make([]string, 0, len(infos))
   719  			for _, info := range infos {
   720  				ret = append(ret, info.name)
   721  			}
   722  			return ret
   723  		}
   724  		e.Self = fmt.Sprintf("sourceID %s table %v columns %v", c.firstSourceID, c.firstTable, getColumnNames(selfColumnList))
   725  		e.Other = fmt.Sprintf("sourceID %s table %s columns %v", othersourceID, otherTable, getColumnNames(otherColumnList))
   726  		return e
   727  	}
   728  
   729  	for i := range selfColumnList {
   730  		if *selfColumnList[i] != *otherColumnList[i] {
   731  			e := NewError("different column definition")
   732  			e.Self = fmt.Sprintf("sourceID %s table %s column %s", c.firstSourceID, c.firstTable, selfColumnList[i])
   733  			e.Other = fmt.Sprintf("sourceID %s table %s column %s", othersourceID, otherTable, otherColumnList[i])
   734  			return e
   735  		}
   736  	}
   737  
   738  	return nil
   739  }
   740  
   741  func getBriefColumnList(stmt *ast.CreateTableStmt) briefColumnInfos {
   742  	columnList := make(briefColumnInfos, 0, len(stmt.Cols))
   743  
   744  	for _, col := range stmt.Cols {
   745  		bc := &briefColumnInfo{
   746  			name: col.Name.Name.L,
   747  			tp:   col.Tp.String(),
   748  		}
   749  
   750  		for _, opt := range col.Options {
   751  			switch opt.Tp {
   752  			case ast.ColumnOptionPrimaryKey:
   753  				bc.isPrimaryKey = true
   754  			case ast.ColumnOptionUniqKey:
   755  				bc.isUniqueKey = true
   756  			}
   757  		}
   758  
   759  		columnList = append(columnList, bc)
   760  	}
   761  
   762  	return columnList
   763  }
   764  
   765  // Name implements Checker interface.
   766  func (c *ShardingTablesChecker) Name() string {
   767  	return fmt.Sprintf("sharding table %s consistency checking", c.targetTableID)
   768  }
   769  
   770  // OptimisticShardingTablesChecker checks consistency of table structures of one sharding group in optimistic shard.
   771  // * check whether they have compatible column list.
   772  type OptimisticShardingTablesChecker struct {
   773  	targetTableID string
   774  	dbs           map[string]*conn.BaseDB
   775  	tableMap      map[string][]filter.Table // sourceID => [table1, table2, ...]
   776  	reMu          sync.Mutex
   777  	joinedMu      sync.Mutex
   778  	inCh          chan *checkItem
   779  	dumpThreads   int
   780  	joined        *schemacmp.Table
   781  }
   782  
   783  // NewOptimisticShardingTablesChecker returns a RealChecker.
   784  func NewOptimisticShardingTablesChecker(
   785  	targetTableID string,
   786  	dbs map[string]*conn.BaseDB,
   787  	tableMap map[string][]filter.Table,
   788  	dumpThreads int,
   789  ) RealChecker {
   790  	if dumpThreads == 0 {
   791  		dumpThreads = 1
   792  	}
   793  	c := &OptimisticShardingTablesChecker{
   794  		targetTableID: targetTableID,
   795  		dbs:           dbs,
   796  		tableMap:      tableMap,
   797  		dumpThreads:   dumpThreads,
   798  	}
   799  	c.inCh = make(chan *checkItem, dumpThreads)
   800  	return c
   801  }
   802  
   803  // Name implements Checker interface.
   804  func (c *OptimisticShardingTablesChecker) Name() string {
   805  	return fmt.Sprintf("optimistic sharding table %s consistency checking", c.targetTableID)
   806  }
   807  
   808  // Check implements RealChecker interface.
   809  func (c *OptimisticShardingTablesChecker) Check(ctx context.Context) *Result {
   810  	r := &Result{
   811  		Name:  c.Name(),
   812  		Desc:  "check consistency of sharding table structures for Optimistic Sharding Merge",
   813  		State: StateSuccess,
   814  		Extra: fmt.Sprintf("sharding %s", c.targetTableID),
   815  	}
   816  
   817  	startTime := time.Now()
   818  	sourceIDs := maps.Keys(c.tableMap)
   819  	concurrency, err := GetConcurrency(ctx, sourceIDs, c.dbs, c.dumpThreads)
   820  	if err != nil {
   821  		markCheckError(r, err)
   822  		return r
   823  	}
   824  	eg, checkCtx := errgroup.WithContext(ctx)
   825  	for i := 0; i < concurrency; i++ {
   826  		eg.Go(func() error {
   827  			return c.checkTable(checkCtx, r)
   828  		})
   829  	}
   830  
   831  	dispatchTableItem(checkCtx, c.tableMap, c.inCh)
   832  	if err := eg.Wait(); err != nil {
   833  		markCheckError(r, err)
   834  	}
   835  
   836  	log.L().Logger.Info("check optimistic sharding table structure over", zap.Duration("spend time", time.Since(startTime)))
   837  	return r
   838  }
   839  
   840  func (c *OptimisticShardingTablesChecker) checkTable(ctx context.Context, r *Result) error {
   841  	var (
   842  		sourceID string
   843  		p        *parser.Parser
   844  		err      error
   845  	)
   846  	for {
   847  		select {
   848  		case <-ctx.Done():
   849  			return nil
   850  		case checkItem, ok := <-c.inCh:
   851  			if !ok {
   852  				return nil
   853  			}
   854  			table := checkItem.upstreamTable
   855  			if len(sourceID) == 0 || sourceID != checkItem.sourceID {
   856  				sourceID = checkItem.sourceID
   857  				p, err = dbutil.GetParserForDB(ctx, c.dbs[sourceID].DB)
   858  				if err != nil {
   859  					c.reMu.Lock()
   860  					r.Extra = fmt.Sprintf("fail to get parser for sourceID %s on sharding %s", sourceID, c.targetTableID)
   861  					c.reMu.Unlock()
   862  					return err
   863  				}
   864  			}
   865  
   866  			statement, err := dbutil.GetCreateTableSQL(ctx, c.dbs[sourceID].DB, table.Schema, table.Name)
   867  			if err != nil {
   868  				// continue if table was deleted when checking
   869  				if isMySQLError(err, mysql.ErrNoSuchTable) {
   870  					continue
   871  				}
   872  				return err
   873  			}
   874  
   875  			ctStmt, err := getCreateTableStmt(p, statement)
   876  			if err != nil {
   877  				c.reMu.Lock()
   878  				markCheckErrorFromParser(r, err)
   879  				c.reMu.Unlock()
   880  				continue
   881  			}
   882  
   883  			if has := hasAutoIncrementKey(ctStmt); has {
   884  				c.reMu.Lock()
   885  				if r.State == StateSuccess {
   886  					r.State = StateWarning
   887  					r.Instruction = "If happen conflict, please handle it by yourself. You can refer to https://docs.pingcap.com/tidb-data-migration/stable/shard-merge-best-practices/#handle-conflicts-between-primary-keys-or-unique-indexes-across-multiple-sharded-tables"
   888  					r.Extra = AutoIncrementKeyChecking
   889  				}
   890  				r.Errors = append(r.Errors, NewError("sourceID %s table %v of sharding %s have auto-increment key, please make sure them don't conflict in target table!", sourceID, table, c.targetTableID))
   891  				c.reMu.Unlock()
   892  			}
   893  
   894  			ti, err := dbutiltest.GetTableInfoBySQL(statement, p)
   895  			if err != nil {
   896  				return err
   897  			}
   898  			encodeTi := schemacmp.Encode(ti)
   899  			c.joinedMu.Lock()
   900  			if c.joined == nil {
   901  				c.joined = &encodeTi
   902  				c.joinedMu.Unlock()
   903  				continue
   904  			}
   905  			newJoined, err2 := c.joined.Join(encodeTi)
   906  			if err2 != nil {
   907  				// NOTE: conflict detected.
   908  				c.reMu.Lock()
   909  				r.Extra = fmt.Sprintf("fail to join table info %s with %s", c.joined, encodeTi)
   910  				c.reMu.Unlock()
   911  				c.joinedMu.Unlock()
   912  				return err2
   913  			}
   914  			c.joined = &newJoined
   915  			c.joinedMu.Unlock()
   916  		}
   917  	}
   918  }
   919  
   920  func dispatchTableItem(ctx context.Context, tableMap map[string][]filter.Table, inCh chan *checkItem) {
   921  	for sourceID, tables := range tableMap {
   922  		for _, table := range tables {
   923  			select {
   924  			case <-ctx.Done():
   925  				log.L().Logger.Warn("ctx canceled before input tables completely")
   926  				return
   927  			case inCh <- &checkItem{upstreamTable: table, sourceID: sourceID}:
   928  			}
   929  		}
   930  	}
   931  	close(inCh)
   932  }
   933  
   934  func dispatchTableItemWithDownstreamTable(
   935  	tableMaps map[string]map[filter.Table][]filter.Table,
   936  	pool *WorkerPool[*checkItem, []*incompatibilityOption],
   937  ) {
   938  	for sourceID, tableMap := range tableMaps {
   939  		for downTable, upTables := range tableMap {
   940  			for _, upTable := range upTables {
   941  				ok := pool.PutJob(&checkItem{
   942  					upstreamTable:   upTable,
   943  					downstreamTable: downTable,
   944  					sourceID:        sourceID,
   945  				})
   946  				if !ok {
   947  					return
   948  				}
   949  			}
   950  		}
   951  	}
   952  }
   953  
   954  // GetConcurrency gets the concurrency of workers that we can randomly dispatch
   955  // tasks on any sources to any of them, where each task needs a SQL connection.
   956  func GetConcurrency(ctx context.Context, sourceIDs []string, dbs map[string]*conn.BaseDB, dumpThreads int) (int, error) {
   957  	concurrency := dumpThreads
   958  	for _, sourceID := range sourceIDs {
   959  		db, ok := dbs[sourceID]
   960  		if !ok {
   961  			return 0, errors.NotFoundf("SQL connection for sourceID %s", sourceID)
   962  		}
   963  		maxConnections, err := conn.GetMaxConnections(tcontext.NewContext(ctx, log.L()), db)
   964  		if err != nil {
   965  			return 0, err
   966  		}
   967  		concurrency = int(math.Min(float64(concurrency), float64((maxConnections+1)/2)))
   968  	}
   969  	return concurrency, nil
   970  }