github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/syncer/sharding-meta/shardmeta.go (about)

     1  // Copyright 2019 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package shardmeta
    15  
    16  import (
    17  	"encoding/json"
    18  	"fmt"
    19  
    20  	"github.com/go-mysql-org/go-mysql/mysql"
    21  	"github.com/pingcap/tidb/pkg/util/dbutil"
    22  	"github.com/pingcap/tidb/pkg/util/filter"
    23  	"github.com/pingcap/tiflow/dm/pkg/binlog"
    24  	"github.com/pingcap/tiflow/dm/pkg/gtid"
    25  	"github.com/pingcap/tiflow/dm/pkg/log"
    26  	"github.com/pingcap/tiflow/dm/pkg/terror"
    27  	"github.com/pingcap/tiflow/dm/pkg/utils"
    28  	"go.uber.org/zap"
    29  )
    30  
    31  // DDLItem records ddl information used in sharding sequence organization.
    32  type DDLItem struct {
    33  	FirstLocation binlog.Location `json:"-"`      // first DDL's binlog Pos, not the End_log_pos of the event
    34  	DDLs          []string        `json:"ddls"`   // DDLs, these ddls are in the same QueryEvent
    35  	Source        string          `json:"source"` // source table ID
    36  
    37  	// just used for json's marshal and unmarshal, because gtid set in FirstLocation is interface,
    38  	// can't be marshal and unmarshal
    39  	FirstPosition mysql.Position `json:"first-position"`
    40  	FirstGTIDSet  string         `json:"first-gtid-set"`
    41  }
    42  
    43  // NewDDLItem creates a new DDLItem.
    44  func NewDDLItem(location binlog.Location, ddls []string, source string) *DDLItem {
    45  	gsetStr := ""
    46  	if location.GetGTID() != nil {
    47  		gsetStr = location.GetGTID().String()
    48  	}
    49  
    50  	return &DDLItem{
    51  		FirstLocation: location,
    52  		DDLs:          ddls,
    53  		Source:        source,
    54  
    55  		FirstPosition: location.Position,
    56  		FirstGTIDSet:  gsetStr,
    57  	}
    58  }
    59  
    60  // String returns the item's format string value.
    61  func (item *DDLItem) String() string {
    62  	return fmt.Sprintf("first-location: %s ddls: %+v source: %s", item.FirstLocation, item.DDLs, item.Source)
    63  }
    64  
    65  // ShardingSequence records a list of DDLItem.
    66  type ShardingSequence struct {
    67  	Items []*DDLItem `json:"items"`
    68  }
    69  
    70  // IsPrefixSequence checks whether a ShardingSequence is the prefix sequence of other.
    71  func (seq *ShardingSequence) IsPrefixSequence(other *ShardingSequence) bool {
    72  	if len(seq.Items) > len(other.Items) {
    73  		return false
    74  	}
    75  	for idx := range seq.Items {
    76  		if !utils.CompareShardingDDLs(seq.Items[idx].DDLs, other.Items[idx].DDLs) {
    77  			return false
    78  		}
    79  	}
    80  	return true
    81  }
    82  
    83  // String returns the ShardingSequence's json string.
    84  func (seq *ShardingSequence) String() string {
    85  	jsonSeq, err := json.Marshal(seq.Items)
    86  	if err != nil {
    87  		log.L().DPanic("fail to marshal ShardingSequence to json", zap.Reflect("shard sequence", seq))
    88  	}
    89  	return string(jsonSeq)
    90  }
    91  
    92  // ShardingMeta stores sharding ddl sequence
    93  // including global sequence and each source's own sequence
    94  // NOTE: sharding meta is not thread safe, it must be used in thread safe context.
    95  type ShardingMeta struct {
    96  	activeIdx int                          // the first unsynced DDL index
    97  	global    *ShardingSequence            // merged sharding sequence of all source tables
    98  	sources   map[string]*ShardingSequence // source table ID -> its sharding sequence
    99  	tableName string                       // table name (with schema) used in downstream meta db
   100  
   101  	enableGTID bool // whether enableGTID, used to compare location
   102  }
   103  
   104  // NewShardingMeta creates a new ShardingMeta.
   105  func NewShardingMeta(schema, table string, enableGTID bool) *ShardingMeta {
   106  	return &ShardingMeta{
   107  		tableName: dbutil.TableName(schema, table),
   108  		global:    &ShardingSequence{Items: make([]*DDLItem, 0)},
   109  		sources:   make(map[string]*ShardingSequence),
   110  
   111  		enableGTID: enableGTID,
   112  	}
   113  }
   114  
   115  // RestoreFromData restores ShardingMeta from given data.
   116  func (meta *ShardingMeta) RestoreFromData(sourceTableID string, activeIdx int, isGlobal bool, data []byte, flavor string) error {
   117  	items := make([]*DDLItem, 0)
   118  	err := json.Unmarshal(data, &items)
   119  	if err != nil {
   120  		return terror.ErrSyncUnitInvalidShardMeta.Delegate(err)
   121  	}
   122  
   123  	// set FirstLocation
   124  	for _, item := range items {
   125  		gset, err1 := gtid.ParserGTID(flavor, item.FirstGTIDSet)
   126  		if err1 != nil {
   127  			return err1
   128  		}
   129  		item.FirstLocation = binlog.NewLocation(
   130  			item.FirstPosition,
   131  			gset,
   132  		)
   133  	}
   134  
   135  	if isGlobal {
   136  		meta.global = &ShardingSequence{Items: items}
   137  	} else {
   138  		meta.sources[sourceTableID] = &ShardingSequence{Items: items}
   139  	}
   140  	meta.activeIdx = activeIdx
   141  	return nil
   142  }
   143  
   144  // ActiveIdx returns the activeIdx of sharding meta.
   145  func (meta *ShardingMeta) ActiveIdx() int {
   146  	return meta.activeIdx
   147  }
   148  
   149  // Reinitialize reinitialize the shardingmeta.
   150  func (meta *ShardingMeta) Reinitialize() {
   151  	meta.activeIdx = 0
   152  	meta.global = &ShardingSequence{make([]*DDLItem, 0)}
   153  	meta.sources = make(map[string]*ShardingSequence)
   154  }
   155  
   156  // checkItemExists checks whether DDLItem exists in its source sequence
   157  // if exists, return the index of DDLItem in source sequence.
   158  // if not exists, return the next index in source sequence.
   159  func (meta *ShardingMeta) checkItemExists(item *DDLItem) (int, bool) {
   160  	source, ok := meta.sources[item.Source]
   161  	if !ok {
   162  		return 0, false
   163  	}
   164  	for idx, ddlItem := range source.Items {
   165  		if binlog.CompareLocation(item.FirstLocation, ddlItem.FirstLocation, meta.enableGTID) == 0 {
   166  			return idx, true
   167  		}
   168  	}
   169  	return len(source.Items), false
   170  }
   171  
   172  // AddItem adds a new coming DDLItem into ShardingMeta
   173  // 1. if DDLItem already exists in source sequence, check whether it is active DDL only
   174  // 2. add the DDLItem into its related source sequence
   175  // 3. if it is a new DDL in global sequence, which means len(source.Items) > len(global.Items), add it into global sequence
   176  // 4. check the source sequence is the prefix-sequence of global sequence, if not, return an error
   177  // returns:
   178  //
   179  //	active: whether the DDL will be processed in this round
   180  func (meta *ShardingMeta) AddItem(item *DDLItem) (active bool, err error) {
   181  	index, exists := meta.checkItemExists(item)
   182  	if exists {
   183  		return index == meta.activeIdx, nil
   184  	}
   185  
   186  	if source, ok := meta.sources[item.Source]; !ok {
   187  		meta.sources[item.Source] = &ShardingSequence{Items: []*DDLItem{item}}
   188  	} else {
   189  		source.Items = append(source.Items, item)
   190  	}
   191  
   192  	global, source := meta.global, meta.sources[item.Source]
   193  	if len(source.Items) > len(global.Items) {
   194  		global.Items = append(global.Items, item)
   195  	}
   196  
   197  	if !source.IsPrefixSequence(global) {
   198  		return false, terror.ErrSyncUnitDDLWrongSequence.Generate(source.Items, global.Items)
   199  	}
   200  
   201  	return index == meta.activeIdx, nil
   202  }
   203  
   204  // GetGlobalActiveDDL returns activeDDL in global sequence.
   205  func (meta *ShardingMeta) GetGlobalActiveDDL() *DDLItem {
   206  	if meta.activeIdx < len(meta.global.Items) {
   207  		return meta.global.Items[meta.activeIdx]
   208  	}
   209  	return nil
   210  }
   211  
   212  // GetGlobalItems returns global DDLItems.
   213  func (meta *ShardingMeta) GetGlobalItems() []*DDLItem {
   214  	return meta.global.Items
   215  }
   216  
   217  // GetActiveDDLItem returns the source table's active DDLItem
   218  // if in DDL unsynced procedure, the active DDLItem means the syncing DDL
   219  // if in re-sync procedure, the active DDLItem means the next syncing DDL in DDL syncing sequence, may be nil.
   220  func (meta *ShardingMeta) GetActiveDDLItem(tableSource string) *DDLItem {
   221  	source, ok := meta.sources[tableSource]
   222  	if !ok {
   223  		return nil
   224  	}
   225  	if meta.activeIdx < len(source.Items) {
   226  		return source.Items[meta.activeIdx]
   227  	}
   228  	return nil
   229  }
   230  
   231  // InSequenceSharding returns whether in sequence sharding.
   232  func (meta *ShardingMeta) InSequenceSharding() bool {
   233  	globalItemCount := len(meta.global.Items)
   234  	return globalItemCount > 0 && meta.activeIdx < globalItemCount
   235  }
   236  
   237  // ResolveShardingDDL resolves one sharding DDL and increase activeIdx
   238  // if activeIdx equals to the length of global sharding sequence, it means all
   239  // sharding DDL in this ShardingMeta sequence is resolved and will reinitialize
   240  // the ShardingMeta, return true if all DDLs are resolved.
   241  func (meta *ShardingMeta) ResolveShardingDDL() bool {
   242  	meta.activeIdx++
   243  	if meta.activeIdx == len(meta.global.Items) {
   244  		meta.Reinitialize()
   245  		return true
   246  	}
   247  	return false
   248  }
   249  
   250  // ActiveDDLFirstLocation returns the first binlog position of active DDL.
   251  func (meta *ShardingMeta) ActiveDDLFirstLocation() (binlog.Location, error) {
   252  	if meta.activeIdx >= len(meta.global.Items) {
   253  		return binlog.Location{}, terror.ErrSyncUnitDDLActiveIndexLarger.Generate(meta.activeIdx, meta.global.Items)
   254  	}
   255  
   256  	return meta.global.Items[meta.activeIdx].FirstLocation, nil
   257  }
   258  
   259  // FlushData returns sharding meta flush SQL and args.
   260  func (meta *ShardingMeta) FlushData(sourceID, tableID string) ([]string, [][]interface{}) {
   261  	if len(meta.global.Items) == 0 {
   262  		sql2 := fmt.Sprintf("DELETE FROM %s where source_id=? and target_table_id=?", meta.tableName)
   263  		args2 := []interface{}{sourceID, tableID}
   264  		return []string{sql2}, [][]interface{}{args2}
   265  	}
   266  	var (
   267  		sqls    = make([]string, 1+len(meta.sources))
   268  		args    = make([][]interface{}, 0, 1+len(meta.sources))
   269  		baseSQL = fmt.Sprintf("INSERT INTO %s (`source_id`, `target_table_id`, `source_table_id`, `active_index`, `is_global`, `data`) VALUES(?,?,?,?,?,?) ON DUPLICATE KEY UPDATE `data`=?, `active_index`=?", meta.tableName)
   270  	)
   271  	for i := range sqls {
   272  		sqls[i] = baseSQL
   273  	}
   274  	args = append(args, []interface{}{sourceID, tableID, "", meta.activeIdx, true, meta.global.String(), meta.global.String(), meta.activeIdx})
   275  	for source, seq := range meta.sources {
   276  		args = append(args, []interface{}{sourceID, tableID, source, meta.activeIdx, false, seq.String(), seq.String(), meta.activeIdx})
   277  	}
   278  	return sqls, args
   279  }
   280  
   281  func (meta *ShardingMeta) genRemoveSQL(sourceID, tableID string) (string, []interface{}) {
   282  	sql := fmt.Sprintf("DELETE FROM %s where source_id=? and target_table_id=?", meta.tableName)
   283  	return sql, []interface{}{sourceID, tableID}
   284  }
   285  
   286  // CheckAndUpdate check and fix schema and table names for all the sharding groups.
   287  func (meta *ShardingMeta) CheckAndUpdate(logger log.Logger, targetID string, schemaMap map[string]string, tablesMap map[string]map[string]string) ([]string, [][]interface{}, error) {
   288  	if len(schemaMap) == 0 && len(tablesMap) == 0 {
   289  		return nil, nil, nil
   290  	}
   291  
   292  	checkSourceID := func(source string) (string, bool) {
   293  		sourceTable := utils.UnpackTableID(source)
   294  		schemaName, tblName := sourceTable.Schema, sourceTable.Name
   295  		realSchema, changed := schemaMap[schemaName]
   296  		if !changed {
   297  			realSchema = schemaName
   298  		}
   299  		tblMap := tablesMap[schemaName]
   300  		realTable, ok := tblMap[tblName]
   301  		if ok {
   302  			changed = true
   303  		} else {
   304  			realTable = tblName
   305  		}
   306  		newTableID := utils.GenTableID(&filter.Table{Schema: realSchema, Name: realTable})
   307  		return newTableID, changed
   308  	}
   309  
   310  	for _, item := range meta.global.Items {
   311  		newID, changed := checkSourceID(item.Source)
   312  		if changed {
   313  			item.Source = newID
   314  		}
   315  	}
   316  
   317  	sourceIDsMap := make(map[string]string)
   318  	for sourceID, seqs := range meta.sources {
   319  		newSourceID, changed := checkSourceID(sourceID)
   320  		for _, item := range seqs.Items {
   321  			newID, hasChanged := checkSourceID(item.Source)
   322  			if hasChanged {
   323  				item.Source = newID
   324  				changed = true
   325  			}
   326  		}
   327  		if changed {
   328  			sourceIDsMap[sourceID] = newSourceID
   329  		}
   330  	}
   331  	var (
   332  		sqls []string
   333  		args [][]interface{}
   334  	)
   335  	for oldID, newID := range sourceIDsMap {
   336  		if oldID != newID {
   337  			seqs := meta.sources[oldID]
   338  			delete(meta.sources, oldID)
   339  			meta.sources[newID] = seqs
   340  			removeSQL, arg := meta.genRemoveSQL(oldID, targetID)
   341  			sqls = append(sqls, removeSQL)
   342  			args = append(args, arg)
   343  		}
   344  		logger.Info("fix sharding meta", zap.String("old", oldID), zap.String("new", newID))
   345  		fixedSQLs, fixedArgs := meta.FlushData(newID, targetID)
   346  		sqls = append(sqls, fixedSQLs...)
   347  		args = append(args, fixedArgs...)
   348  	}
   349  
   350  	return sqls, args, nil
   351  }