github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/pkg/shardddl/optimism/info.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package optimism
    15  
    16  import (
    17  	"context"
    18  	"encoding/json"
    19  	"fmt"
    20  
    21  	"github.com/pingcap/tidb/pkg/parser/model"
    22  	"github.com/pingcap/tidb/pkg/util/schemacmp"
    23  	"github.com/pingcap/tiflow/dm/common"
    24  	"github.com/pingcap/tiflow/dm/pkg/etcdutil"
    25  	"github.com/pingcap/tiflow/dm/pkg/log"
    26  	"go.etcd.io/etcd/api/v3/mvccpb"
    27  	clientv3 "go.etcd.io/etcd/client/v3"
    28  )
    29  
    30  // TODO: much of the code in optimistic mode is very similar to pessimistic mode, we can try to combine them together.
    31  
    32  // Info represents the shard DDL information.
    33  // This information should be persistent in etcd so can be retrieved after the DM-master leader restarted or changed.
    34  // NOTE: `Task`, `Source`, `UpSchema` and `DownTable` are redundant in the etcd key path for convenient.
    35  // Info is putted when receiving a shard DDL for a table in DM-worker,
    36  // and is deleted when removing the lock by DM-master
    37  // because we need the newest schema in Info to recover the lock when restarting DM-master.
    38  // when new Info is putted to overwrite the old one, the DM-master should update the lock based on the new one.
    39  type Info struct {
    40  	Task       string   `json:"task"`        // data migration task name
    41  	Source     string   `json:"source"`      // upstream source ID
    42  	UpSchema   string   `json:"up-schema"`   // upstream/source schema name, different sources can have the same schema name
    43  	UpTable    string   `json:"up-table"`    // upstream/source table name, different sources can have the same table name
    44  	DownSchema string   `json:"down-schema"` // downstream/target schema name
    45  	DownTable  string   `json:"down-table"`  // downstream/target table name
    46  	DDLs       []string `json:"ddls"`        // DDL statements
    47  
    48  	TableInfoBefore *model.TableInfo   `json:"table-info-before"` // the tracked table schema before applying the DDLs
    49  	TableInfosAfter []*model.TableInfo `json:"table-info-after"`  // the tracked table schema after applying the DDLs
    50  
    51  	// only used to report to the caller of the watcher, do not marsh it.
    52  	// if it's true, it means the Info has been deleted in etcd.
    53  	IsDeleted bool `json:"-"`
    54  
    55  	// only set it when get/watch from etcd
    56  	Version int64 `json:"-"`
    57  
    58  	// only set it when get from etcd
    59  	// use for sort infos in recoverlock
    60  	Revision int64 `json:"-"`
    61  
    62  	// use to resolve conflict
    63  	IgnoreConflict bool `json:"ignore-conflict"`
    64  }
    65  
    66  // LogInfo replace TableInfo with schema.Table.String() for log.
    67  type LogInfo struct {
    68  	Task           string   `json:"task"`
    69  	Source         string   `json:"source"`
    70  	UpSchema       string   `json:"up-schema"`
    71  	UpTable        string   `json:"up-table"`
    72  	DownSchema     string   `json:"down-schema"`
    73  	DownTable      string   `json:"down-table"`
    74  	DDLs           []string `json:"ddls"`
    75  	TableBefore    string   `json:"table-before"`
    76  	TableAfter     string   `json:"table-after"`
    77  	IsDeleted      bool     `json:"is-deleted"`
    78  	Version        int64    `json:"version"`
    79  	Revision       int64    `json:"revision"`
    80  	IgnoreConflict bool     `json:"ignore-conflict"`
    81  }
    82  
    83  // NewInfo creates a new Info instance.
    84  func NewInfo(task, source, upSchema, upTable, downSchema, downTable string,
    85  	ddls []string, tableInfoBefore *model.TableInfo, tableInfosAfter []*model.TableInfo,
    86  ) Info {
    87  	return Info{
    88  		Task:            task,
    89  		Source:          source,
    90  		UpSchema:        upSchema,
    91  		UpTable:         upTable,
    92  		DownSchema:      downSchema,
    93  		DownTable:       downTable,
    94  		DDLs:            ddls,
    95  		TableInfoBefore: tableInfoBefore,
    96  		TableInfosAfter: tableInfosAfter,
    97  	}
    98  }
    99  
   100  // String implements Stringer interface.
   101  func (i Info) String() string {
   102  	s, _ := i.toJSON()
   103  	return s
   104  }
   105  
   106  // ShortString returns short string of Info.
   107  func (i *Info) ShortString() string {
   108  	logInfo := LogInfo{
   109  		Task:           i.Task,
   110  		Source:         i.Source,
   111  		UpSchema:       i.UpSchema,
   112  		UpTable:        i.UpTable,
   113  		DownSchema:     i.DownSchema,
   114  		DownTable:      i.DownTable,
   115  		DDLs:           i.DDLs,
   116  		IsDeleted:      i.IsDeleted,
   117  		Version:        i.Version,
   118  		Revision:       i.Revision,
   119  		IgnoreConflict: i.IgnoreConflict,
   120  	}
   121  	if i.TableInfoBefore != nil {
   122  		logInfo.TableBefore = schemacmp.Encode(i.TableInfoBefore).String()
   123  	}
   124  	if len(i.TableInfosAfter) != 0 {
   125  		logInfo.TableAfter = schemacmp.Encode(i.TableInfosAfter[len(i.TableInfosAfter)-1]).String()
   126  	}
   127  	s, _ := logInfo.toJSON()
   128  	return s
   129  }
   130  
   131  // toJSON returns the string of JSON represent.
   132  func (i LogInfo) toJSON() (string, error) {
   133  	data, err := json.Marshal(i)
   134  	if err != nil {
   135  		return "", err
   136  	}
   137  	return string(data), nil
   138  }
   139  
   140  // toJSON returns the string of JSON represent.
   141  func (i Info) toJSON() (string, error) {
   142  	data, err := json.Marshal(i)
   143  	if err != nil {
   144  		return "", err
   145  	}
   146  	return string(data), nil
   147  }
   148  
   149  // infoFromJSON constructs Info from its JSON represent.
   150  func infoFromJSON(s string) (i Info, err error) {
   151  	err = json.Unmarshal([]byte(s), &i)
   152  	if err != nil {
   153  		// For compatibility.
   154  		// In v2.0.2, we changed struct of table-info-after but forgot to upgrade etcd value.
   155  		// To keep the ModRevision of info, we change them after getting info instead of change all the value in etcd when upgrade
   156  		// All the Info will be upgraded after new info putted or lock resolved.
   157  		oldInfo, newErr := oldInfoFromJSON(s)
   158  		if newErr != nil {
   159  			log.L().Error("unmarshal old info", log.ShortError(newErr))
   160  			return
   161  		}
   162  		return oldInfo.toInfo(), nil
   163  	}
   164  	return
   165  }
   166  
   167  // PutInfo puts the shard DDL info into etcd.
   168  // NOTE:
   169  //
   170  //	In some cases before the lock resolved, the same DDL info may be PUT multiple times:
   171  //	  1. start-task after stop-task.
   172  //	  2. resume-task after paused manually or automatically.
   173  //	  3. the task scheduled to another DM-worker instance (just like case-1).
   174  //	Then we need to ensure re-PUT is safe:
   175  //	  1. DM-master can construct the lock and do the coordination correctly.
   176  //	  2. DM-worker can re-PUT and comply with the coordination correctly.
   177  //
   178  // This function should often be called by DM-worker.
   179  func PutInfo(cli *clientv3.Client, info Info) (int64, error) {
   180  	op, err := putInfoOp(info)
   181  	if err != nil {
   182  		return 0, err
   183  	}
   184  	_, rev, err := etcdutil.DoTxnWithRepeatable(cli, etcdutil.ThenOpFunc(op))
   185  	return rev, err
   186  }
   187  
   188  // GetAllInfo gets all shard DDL info in etcd currently.
   189  // This function should often be called by DM-master.
   190  // k/k/k/k/v: task-name -> source-ID -> upstream-schema-name -> upstream-table-name -> shard DDL info.
   191  // ugly code, but have no better idea now.
   192  func GetAllInfo(cli *clientv3.Client) (map[string]map[string]map[string]map[string]Info, int64, error) {
   193  	respTxn, _, err := etcdutil.DoTxnWithRepeatable(cli, etcdutil.ThenOpFunc(clientv3.OpGet(common.ShardDDLOptimismInfoKeyAdapter.Path(), clientv3.WithPrefix())))
   194  	if err != nil {
   195  		return nil, 0, err
   196  	}
   197  	resp := respTxn.Responses[0].GetResponseRange()
   198  
   199  	ifm := make(map[string]map[string]map[string]map[string]Info)
   200  	for _, kv := range resp.Kvs {
   201  		info, err2 := infoFromJSON(string(kv.Value))
   202  		if err2 != nil {
   203  			return nil, 0, err2
   204  		}
   205  		info.Version = kv.Version
   206  		info.Revision = kv.ModRevision
   207  
   208  		if _, ok := ifm[info.Task]; !ok {
   209  			ifm[info.Task] = make(map[string]map[string]map[string]Info)
   210  		}
   211  		if _, ok := ifm[info.Task][info.Source]; !ok {
   212  			ifm[info.Task][info.Source] = make(map[string]map[string]Info)
   213  		}
   214  		if _, ok := ifm[info.Task][info.Source][info.UpSchema]; !ok {
   215  			ifm[info.Task][info.Source][info.UpSchema] = make(map[string]Info)
   216  		}
   217  		ifm[info.Task][info.Source][info.UpSchema][info.UpTable] = info
   218  	}
   219  
   220  	return ifm, resp.Header.Revision, nil
   221  }
   222  
   223  // WatchInfo watches PUT & DELETE operations for info.
   224  // This function should often be called by DM-master.
   225  func WatchInfo(ctx context.Context, cli *clientv3.Client, revision int64,
   226  	outCh chan<- Info, errCh chan<- error,
   227  ) {
   228  	wCtx, cancel := context.WithCancel(ctx)
   229  	defer cancel()
   230  	// NOTE: WithPrevKV used to get a valid `ev.PrevKv` for deletion.
   231  	ch := cli.Watch(wCtx, common.ShardDDLOptimismInfoKeyAdapter.Path(),
   232  		clientv3.WithPrefix(), clientv3.WithRev(revision), clientv3.WithPrevKV())
   233  
   234  	for {
   235  		select {
   236  		case <-ctx.Done():
   237  			return
   238  		case resp, ok := <-ch:
   239  			if !ok {
   240  				return
   241  			}
   242  			if resp.Canceled {
   243  				select {
   244  				case errCh <- resp.Err():
   245  				case <-ctx.Done():
   246  				}
   247  				return
   248  			}
   249  
   250  			for _, ev := range resp.Events {
   251  				var (
   252  					info Info
   253  					err  error
   254  				)
   255  
   256  				switch ev.Type {
   257  				case mvccpb.PUT:
   258  					info, err = infoFromJSON(string(ev.Kv.Value))
   259  					info.Version = ev.Kv.Version
   260  					info.Revision = ev.Kv.ModRevision
   261  				case mvccpb.DELETE:
   262  					info, err = infoFromJSON(string(ev.PrevKv.Value))
   263  					info.IsDeleted = true
   264  				default:
   265  					// this should not happen.
   266  					err = fmt.Errorf("unsupported ectd event type %v", ev.Type)
   267  				}
   268  
   269  				if err != nil {
   270  					select {
   271  					case errCh <- err:
   272  					case <-ctx.Done():
   273  						return
   274  					}
   275  				} else {
   276  					select {
   277  					case outCh <- info:
   278  					case <-ctx.Done():
   279  						return
   280  					}
   281  				}
   282  			}
   283  		}
   284  	}
   285  }
   286  
   287  // putInfoOp returns a PUT etcd operation for Info.
   288  func putInfoOp(info Info) (clientv3.Op, error) {
   289  	value, err := info.toJSON()
   290  	if err != nil {
   291  		return clientv3.Op{}, err
   292  	}
   293  	key := common.ShardDDLOptimismInfoKeyAdapter.Encode(info.Task, info.Source, info.UpSchema, info.UpTable)
   294  	return clientv3.OpPut(key, value), nil
   295  }
   296  
   297  // deleteInfoOp returns a DELETE etcd operation for info.
   298  // This operation should often be sent by DM-worker.
   299  func deleteInfoOp(info Info) clientv3.Op {
   300  	return clientv3.OpDelete(common.ShardDDLOptimismInfoKeyAdapter.Encode(
   301  		info.Task, info.Source, info.UpSchema, info.UpTable))
   302  }
   303  
   304  // ClearTestInfoOperationColumns is used to clear all shard DDL information in optimism mode.
   305  // it only used for testing now.
   306  func ClearTestInfoOperationColumn(cli *clientv3.Client) error {
   307  	clearSource := clientv3.OpDelete(common.ShardDDLOptimismSourceTablesKeyAdapter.Path(), clientv3.WithPrefix())
   308  	clearInfo := clientv3.OpDelete(common.ShardDDLOptimismInfoKeyAdapter.Path(), clientv3.WithPrefix())
   309  	clearOp := clientv3.OpDelete(common.ShardDDLOptimismOperationKeyAdapter.Path(), clientv3.WithPrefix())
   310  	clearColumns := clientv3.OpDelete(common.ShardDDLOptimismDroppedColumnsKeyAdapter.Path(), clientv3.WithPrefix())
   311  	_, err := cli.Txn(context.Background()).Then(clearSource, clearInfo, clearOp, clearColumns).Commit()
   312  	return err
   313  }
   314  
   315  // OldInfo represents info in etcd before v2.0.2.
   316  type OldInfo struct {
   317  	Task       string   `json:"task"`
   318  	Source     string   `json:"source"`
   319  	UpSchema   string   `json:"up-schema"`
   320  	UpTable    string   `json:"up-table"`
   321  	DownSchema string   `json:"down-schema"`
   322  	DownTable  string   `json:"down-table"`
   323  	DDLs       []string `json:"ddls"`
   324  
   325  	TableInfoBefore *model.TableInfo `json:"table-info-before"` // the tracked table schema before applying the DDLs
   326  	TableInfoAfter  *model.TableInfo `json:"table-info-after"`  // the tracked table schema after applying the DDLs
   327  }
   328  
   329  // oldInfoFromJSON constructs OldInfo from its JSON represent.
   330  func oldInfoFromJSON(s string) (oldInfo OldInfo, err error) {
   331  	err = json.Unmarshal([]byte(s), &oldInfo)
   332  	return
   333  }
   334  
   335  // toInfo converts OldInfo to Info.
   336  func (oldInfo *OldInfo) toInfo() Info {
   337  	return Info{
   338  		Task:            oldInfo.Task,
   339  		Source:          oldInfo.Source,
   340  		UpSchema:        oldInfo.UpSchema,
   341  		UpTable:         oldInfo.UpTable,
   342  		DownSchema:      oldInfo.DownSchema,
   343  		DownTable:       oldInfo.DownTable,
   344  		DDLs:            oldInfo.DDLs,
   345  		TableInfoBefore: oldInfo.TableInfoBefore,
   346  		TableInfosAfter: []*model.TableInfo{oldInfo.TableInfoAfter},
   347  	}
   348  }
   349  
   350  // CheckDDLInfos try to check and fix all the schema and table names for DDL info.
   351  func CheckDDLInfos(cli *clientv3.Client, source string, schemaMap map[string]string, tablesMap map[string]map[string]string) error {
   352  	allInfos, _, err := GetAllInfo(cli)
   353  	if err != nil {
   354  		return err
   355  	}
   356  
   357  	for _, taskTableInfos := range allInfos {
   358  		sourceInfos, ok := taskTableInfos[source]
   359  		if !ok {
   360  			continue
   361  		}
   362  		for schema, tblInfos := range sourceInfos {
   363  			realSchema, hasChange := schemaMap[schema]
   364  			if !hasChange {
   365  				realSchema = schema
   366  			}
   367  
   368  			tblMap := tablesMap[schema]
   369  			for tbl, info := range tblInfos {
   370  				realTable, tableChange := tblMap[tbl]
   371  				if !tableChange {
   372  					realTable = tbl
   373  					tableChange = hasChange
   374  				}
   375  				if tableChange {
   376  					delOp := deleteInfoOp(info)
   377  					info.UpSchema = realSchema
   378  					info.UpTable = realTable
   379  					putOp, err := putInfoOp(info)
   380  					if err != nil {
   381  						return err
   382  					}
   383  					_, _, err = etcdutil.DoTxnWithRepeatable(cli, etcdutil.ThenOpFunc(delOp, putOp))
   384  					if err != nil {
   385  						return err
   386  					}
   387  				}
   388  			}
   389  		}
   390  	}
   391  	return nil
   392  }