github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/pkg/shardddl/optimism/table.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package optimism
    15  
    16  import (
    17  	"context"
    18  	"encoding/json"
    19  	"fmt"
    20  
    21  	"github.com/pingcap/tiflow/dm/common"
    22  	"github.com/pingcap/tiflow/dm/pkg/etcdutil"
    23  	"go.etcd.io/etcd/api/v3/mvccpb"
    24  	clientv3 "go.etcd.io/etcd/client/v3"
    25  )
    26  
    27  // SourceTables represents the upstream/sources tables for a data migration **subtask**.
    28  // This information should be persistent in etcd so can be retrieved after the DM-master leader restarted or changed.
    29  // We need this because only one shard group exists for **every** target table in the optimistic mode (in DM-master),
    30  // so we need DM-worker to report its upstream table names to DM-master.
    31  // NOTE: `Task` and `Source` are redundant in the etcd key path for convenient.
    32  // SourceTables is putted when starting the subtask by DM-worker,
    33  // and is updated when new tables added/removed in the upstream source by DM-worker,
    34  // and **may be** deleted when stopping the subtask by DM-worker later.
    35  type SourceTables struct {
    36  	Task   string `json:"task"`   // data migration task name
    37  	Source string `json:"source"` // upstream source ID
    38  
    39  	// downstream-schema-name -> downstream-table-name -> upstream-schema-name -> upstream-table-name -> struct{},
    40  	// multiple downstream/target tables (<downstream-schema-name, downstream-table-name> pair) may exist in one subtask.
    41  	Tables map[string]map[string]map[string]map[string]struct{} `json:"tables"`
    42  
    43  	// only used to report to the caller of the watcher, do not marsh it.
    44  	// if it's true, it means the SourceTables has been deleted in etcd.
    45  	IsDeleted bool `json:"-"`
    46  }
    47  
    48  // TargetTable represents some upstream/sources tables for **one** target table.
    49  // It is often generated from `SourceTables` for the specified downstream table.
    50  type TargetTable struct {
    51  	Task       string `json:"task"`        // data migration task name
    52  	Source     string `json:"source"`      // upstream source ID
    53  	DownSchema string `json:"down-schema"` // downstream schema name
    54  	DownTable  string `json:"down-table"`  // downstream table name
    55  
    56  	// upstream-schema-name -> upstream-table-name -> struct{}
    57  	UpTables map[string]map[string]struct{} `json:"up-tables"`
    58  }
    59  
    60  // emptyTargetTable returns an empty TargetTable instance.
    61  func emptyTargetTable() TargetTable {
    62  	return TargetTable{}
    63  }
    64  
    65  // newTargetTable returns a TargetTable instance.
    66  func newTargetTable(task, source, downSchema, downTable string,
    67  	upTables map[string]map[string]struct{},
    68  ) TargetTable {
    69  	return TargetTable{
    70  		Task:       task,
    71  		Source:     source,
    72  		DownSchema: downSchema,
    73  		DownTable:  downTable,
    74  		UpTables:   upTables,
    75  	}
    76  }
    77  
    78  // IsEmpty returns whether the TargetTable instance is empty.
    79  func (tt TargetTable) IsEmpty() bool {
    80  	return tt.Task == "" // now we treat it as empty if no task name specified.
    81  }
    82  
    83  // NewSourceTables creates a new SourceTables instances.
    84  func NewSourceTables(task, source string) SourceTables {
    85  	return SourceTables{
    86  		Task:   task,
    87  		Source: source,
    88  		Tables: make(map[string]map[string]map[string]map[string]struct{}),
    89  	}
    90  }
    91  
    92  // String implements Stringer interface.
    93  func (st SourceTables) String() string {
    94  	s, _ := st.toJSON()
    95  	return s
    96  }
    97  
    98  // toJSON returns the string of JSON represent.
    99  func (st SourceTables) toJSON() (string, error) {
   100  	data, err := json.Marshal(st)
   101  	if err != nil {
   102  		return "", err
   103  	}
   104  	return string(data), nil
   105  }
   106  
   107  // AddTable adds a table into SourceTables.
   108  // it returns whether added (not exist before).
   109  func (st *SourceTables) AddTable(upSchema, upTable, downSchema, downTable string) bool {
   110  	if _, ok := st.Tables[downSchema]; !ok {
   111  		st.Tables[downSchema] = make(map[string]map[string]map[string]struct{})
   112  	}
   113  	if _, ok := st.Tables[downSchema][downTable]; !ok {
   114  		st.Tables[downSchema][downTable] = make(map[string]map[string]struct{})
   115  	}
   116  	if _, ok := st.Tables[downSchema][downTable][upSchema]; !ok {
   117  		st.Tables[downSchema][downTable][upSchema] = make(map[string]struct{})
   118  	}
   119  	if _, ok := st.Tables[downSchema][downTable][upSchema][upTable]; !ok {
   120  		st.Tables[downSchema][downTable][upSchema][upTable] = struct{}{}
   121  		return true
   122  	}
   123  	return false
   124  }
   125  
   126  // RemoveTable removes a table from SourceTables.
   127  // it returns whether removed (exist before).
   128  func (st *SourceTables) RemoveTable(upSchema, upTable, downSchema, downTable string) bool {
   129  	if _, ok := st.Tables[downSchema]; !ok {
   130  		return false
   131  	}
   132  	if _, ok := st.Tables[downSchema][downTable]; !ok {
   133  		return false
   134  	}
   135  	if _, ok := st.Tables[downSchema][downTable][upSchema]; !ok {
   136  		return false
   137  	}
   138  	if _, ok := st.Tables[downSchema][downTable][upSchema][upTable]; !ok {
   139  		return false
   140  	}
   141  
   142  	delete(st.Tables[downSchema][downTable][upSchema], upTable)
   143  	if len(st.Tables[downSchema][downTable][upSchema]) == 0 {
   144  		delete(st.Tables[downSchema][downTable], upSchema)
   145  	}
   146  	if len(st.Tables[downSchema][downTable]) == 0 {
   147  		delete(st.Tables[downSchema], downTable)
   148  	}
   149  	if len(st.Tables[downSchema]) == 0 {
   150  		delete(st.Tables, downSchema)
   151  	}
   152  	return true
   153  }
   154  
   155  // RouteTable represents a table in upstream/downstream.
   156  type RouteTable struct {
   157  	UpSchema   string
   158  	UpTable    string
   159  	DownSchema string
   160  	DownTable  string
   161  }
   162  
   163  func (st *SourceTables) toRouteTable() map[RouteTable]struct{} {
   164  	tables := make(map[RouteTable]struct{})
   165  	for downSchema, downTables := range st.Tables {
   166  		for downTable, upSchemas := range downTables {
   167  			for upSchema, upTables := range upSchemas {
   168  				for upTable := range upTables {
   169  					t := RouteTable{
   170  						UpSchema:   upSchema,
   171  						UpTable:    upTable,
   172  						DownSchema: downSchema,
   173  						DownTable:  downTable,
   174  					}
   175  					tables[t] = struct{}{}
   176  				}
   177  			}
   178  		}
   179  	}
   180  	return tables
   181  }
   182  
   183  func DiffSourceTables(oldST, newST SourceTables) (map[RouteTable]struct{}, map[RouteTable]struct{}) {
   184  	oldTables := oldST.toRouteTable()
   185  	newTables := newST.toRouteTable()
   186  
   187  	droppedTables := make(map[RouteTable]struct{})
   188  	addedTables := make(map[RouteTable]struct{})
   189  	for table := range oldTables {
   190  		if _, ok := newTables[table]; !ok {
   191  			droppedTables[table] = struct{}{}
   192  		} else {
   193  			delete(newTables, table)
   194  		}
   195  	}
   196  
   197  	for table := range newTables {
   198  		addedTables[table] = struct{}{}
   199  	}
   200  	return addedTables, droppedTables
   201  }
   202  
   203  // TargetTable returns a TargetTable instance for a specified downstream table,
   204  // returns an empty TargetTable instance if no tables exist.
   205  func (st *SourceTables) TargetTable(downSchema, downTable string) TargetTable {
   206  	ett := emptyTargetTable()
   207  	if _, ok := st.Tables[downSchema]; !ok {
   208  		return ett
   209  	}
   210  	if _, ok := st.Tables[downSchema][downTable]; !ok {
   211  		return ett
   212  	}
   213  
   214  	// copy upstream tables.
   215  	tables := make(map[string]map[string]struct{})
   216  	for upSchema, upTables := range st.Tables[downSchema][downTable] {
   217  		tables[upSchema] = make(map[string]struct{})
   218  		for upTable := range upTables {
   219  			tables[upSchema][upTable] = struct{}{}
   220  		}
   221  	}
   222  
   223  	return newTargetTable(st.Task, st.Source, downSchema, downTable, tables)
   224  }
   225  
   226  // sourceTablesFromJSON constructs SourceTables from its JSON represent.
   227  func sourceTablesFromJSON(s string) (st SourceTables, err error) {
   228  	err = json.Unmarshal([]byte(s), &st)
   229  	return
   230  }
   231  
   232  // PutSourceTables puts source tables into etcd.
   233  // This function should often be called by DM-worker.
   234  func PutSourceTables(cli *clientv3.Client, st SourceTables) (int64, error) {
   235  	op, err := putSourceTablesOp(st)
   236  	if err != nil {
   237  		return 0, err
   238  	}
   239  	_, rev, err := etcdutil.DoTxnWithRepeatable(cli, etcdutil.ThenOpFunc(op))
   240  	return rev, err
   241  }
   242  
   243  // DeleteSourceTables deletes the source tables in etcd.
   244  // This function should often be called by DM-worker.
   245  func DeleteSourceTables(cli *clientv3.Client, st SourceTables) (int64, error) {
   246  	key := common.ShardDDLOptimismSourceTablesKeyAdapter.Encode(st.Task, st.Source)
   247  	_, rev, err := etcdutil.DoTxnWithRepeatable(cli, etcdutil.ThenOpFunc(clientv3.OpDelete(key)))
   248  	return rev, err
   249  }
   250  
   251  // GetAllSourceTables gets all source tables in etcd currently.
   252  // This function should often be called by DM-master.
   253  // k/k/v: task-name -> source-ID -> source tables.
   254  func GetAllSourceTables(cli *clientv3.Client) (map[string]map[string]SourceTables, int64, error) {
   255  	respTxn, _, err := etcdutil.DoTxnWithRepeatable(cli, etcdutil.ThenOpFunc(clientv3.OpGet(common.ShardDDLOptimismSourceTablesKeyAdapter.Path(), clientv3.WithPrefix())))
   256  	if err != nil {
   257  		return nil, 0, err
   258  	}
   259  	resp := respTxn.Responses[0].GetResponseRange()
   260  
   261  	stm := make(map[string]map[string]SourceTables)
   262  	for _, kv := range resp.Kvs {
   263  		st, err2 := sourceTablesFromJSON(string(kv.Value))
   264  		if err2 != nil {
   265  			return nil, 0, err2
   266  		}
   267  
   268  		if _, ok := stm[st.Task]; !ok {
   269  			stm[st.Task] = make(map[string]SourceTables)
   270  		}
   271  		stm[st.Task][st.Source] = st
   272  	}
   273  
   274  	return stm, resp.Header.Revision, nil
   275  }
   276  
   277  // WatchSourceTables watches PUT & DELETE operations for source tables.
   278  // This function should often be called by DM-master.
   279  func WatchSourceTables(ctx context.Context, cli *clientv3.Client, revision int64,
   280  	outCh chan<- SourceTables, errCh chan<- error,
   281  ) {
   282  	wCtx, cancel := context.WithCancel(ctx)
   283  	defer cancel()
   284  	ch := cli.Watch(wCtx, common.ShardDDLOptimismSourceTablesKeyAdapter.Path(),
   285  		clientv3.WithPrefix(), clientv3.WithRev(revision))
   286  
   287  	for {
   288  		select {
   289  		case <-ctx.Done():
   290  			return
   291  		case resp, ok := <-ch:
   292  			if !ok {
   293  				return
   294  			}
   295  			if resp.Canceled {
   296  				select {
   297  				case errCh <- resp.Err():
   298  				case <-ctx.Done():
   299  				}
   300  				return
   301  			}
   302  
   303  			for _, ev := range resp.Events {
   304  				var (
   305  					st  SourceTables
   306  					err error
   307  				)
   308  
   309  				switch ev.Type {
   310  				case mvccpb.PUT:
   311  					st, err = sourceTablesFromJSON(string(ev.Kv.Value))
   312  				case mvccpb.DELETE:
   313  					st, err = sourceTablesFromKey(string(ev.Kv.Key))
   314  					st.IsDeleted = true
   315  				default:
   316  					// this should not happen.
   317  					err = fmt.Errorf("unsupported ectd event type %v", ev.Type)
   318  				}
   319  
   320  				if err != nil {
   321  					select {
   322  					case errCh <- err:
   323  					case <-ctx.Done():
   324  						return
   325  					}
   326  				} else {
   327  					select {
   328  					case outCh <- st:
   329  					case <-ctx.Done():
   330  						return
   331  					}
   332  				}
   333  			}
   334  		}
   335  	}
   336  }
   337  
   338  // sourceTablesFromKey constructs an incomplete SourceTables from an etcd key.
   339  func sourceTablesFromKey(key string) (SourceTables, error) {
   340  	var st SourceTables
   341  	ks, err := common.ShardDDLOptimismSourceTablesKeyAdapter.Decode(key)
   342  	if err != nil {
   343  		return st, err
   344  	}
   345  	st.Task = ks[0]
   346  	st.Source = ks[1]
   347  	return st, nil
   348  }
   349  
   350  // putSourceTablesOp returns a PUT etcd operation for source tables.
   351  func putSourceTablesOp(st SourceTables) (clientv3.Op, error) {
   352  	value, err := st.toJSON()
   353  	if err != nil {
   354  		return clientv3.Op{}, err
   355  	}
   356  	key := common.ShardDDLOptimismSourceTablesKeyAdapter.Encode(st.Task, st.Source)
   357  	return clientv3.OpPut(key, value), nil
   358  }
   359  
   360  // CheckSourceTables try to check and fix all the source schemas and table names.
   361  func CheckSourceTables(cli *clientv3.Client, source string, schemaMap map[string]string, talesMap map[string]map[string]string) error {
   362  	allSourceTables, _, err := GetAllSourceTables(cli)
   363  	if err != nil {
   364  		return err
   365  	}
   366  
   367  	for _, taskSourceTables := range allSourceTables {
   368  		sourceTables, ok := taskSourceTables[source]
   369  		if !ok {
   370  			continue
   371  		}
   372  		schemaKeys := make([]string, 0)
   373  		tblKeys := make([]string, 0)
   374  		hasChange := false
   375  		for _, tableSources := range sourceTables.Tables {
   376  			for _, sources := range tableSources {
   377  				for schema, tbls := range sources {
   378  					if _, ok := schemaMap[schema]; ok {
   379  						schemaKeys = append(schemaKeys, schema)
   380  						hasChange = true
   381  					}
   382  
   383  					tblMap, ok := talesMap[schema]
   384  					if !ok {
   385  						continue
   386  					}
   387  					for tbl := range tbls {
   388  						if t, ok := tblMap[tbl]; ok {
   389  							tblKeys = append(tblKeys, t)
   390  							hasChange = true
   391  						}
   392  					}
   393  					for _, t := range tblKeys {
   394  						tbls[tblMap[t]] = tbls[t]
   395  						delete(tbls, t)
   396  					}
   397  					tblKeys = tblKeys[:0]
   398  				}
   399  				for _, s := range schemaKeys {
   400  					sources[schemaMap[s]] = sources[s]
   401  					delete(sources, s)
   402  				}
   403  				schemaKeys = schemaKeys[:0]
   404  			}
   405  		}
   406  		if hasChange {
   407  			if _, err = PutSourceTables(cli, sourceTables); err != nil {
   408  				return err
   409  			}
   410  		}
   411  	}
   412  	return err
   413  }