github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/syncer/shardddl/optimist.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package shardddl
    15  
    16  import (
    17  	"context"
    18  	"sync"
    19  	"time"
    20  
    21  	"github.com/pingcap/tidb/pkg/parser/model"
    22  	filter "github.com/pingcap/tidb/pkg/util/table-filter"
    23  	tcontext "github.com/pingcap/tiflow/dm/pkg/context"
    24  	"github.com/pingcap/tiflow/dm/pkg/etcdutil"
    25  	"github.com/pingcap/tiflow/dm/pkg/log"
    26  	"github.com/pingcap/tiflow/dm/pkg/shardddl/optimism"
    27  	"github.com/pingcap/tiflow/dm/pkg/utils"
    28  	clientv3 "go.etcd.io/etcd/client/v3"
    29  	"go.uber.org/zap"
    30  )
    31  
    32  // Optimist used to coordinate the shard DDL migration in optimism mode.
    33  type Optimist struct {
    34  	mu sync.RWMutex
    35  
    36  	logger log.Logger
    37  	cli    *clientv3.Client
    38  	task   string
    39  	source string
    40  
    41  	tables optimism.SourceTables
    42  
    43  	// the shard DDL info which is pending to handle.
    44  	pendingInfo *optimism.Info
    45  	// the shard DDL lock operation which is pending to handle.
    46  	pendingOp *optimism.Operation
    47  	// the shard DDL lock redirect operations which are pending to handle.
    48  	// one target table -> one redirect operation
    49  	pendingRedirectOps        map[string]*optimism.Operation
    50  	pendingRedirectCancelFunc map[string]context.CancelFunc
    51  }
    52  
    53  // NewOptimist creates a new Optimist instance.
    54  func NewOptimist(pLogger *log.Logger, cli *clientv3.Client, task, source string) *Optimist {
    55  	return &Optimist{
    56  		logger: pLogger.WithFields(zap.String("component", "shard DDL optimist")),
    57  		cli:    cli,
    58  		task:   task,
    59  		source: source,
    60  	}
    61  }
    62  
    63  // Init initializes the optimist with source tables.
    64  // NOTE: this will PUT the initial source tables into etcd (and overwrite any previous existing tables).
    65  // NOTE: we do not remove source tables for `stop-task` now, may need to handle it for `remove-meta`.
    66  func (o *Optimist) Init(sourceTables map[string]map[string]map[string]map[string]struct{}) error {
    67  	o.tables = optimism.NewSourceTables(o.task, o.source)
    68  	for downSchema, downTables := range sourceTables {
    69  		for downTable, upSchemas := range downTables {
    70  			for upSchema, upTables := range upSchemas {
    71  				for upTable := range upTables {
    72  					o.tables.AddTable(upSchema, upTable, downSchema, downTable)
    73  				}
    74  			}
    75  		}
    76  	}
    77  	_, err := optimism.PutSourceTables(o.cli, o.tables)
    78  	return err
    79  }
    80  
    81  // Tables clone and return tables
    82  // first one is sourceTable, second one is targetTable.
    83  func (o *Optimist) Tables() [][]filter.Table {
    84  	o.mu.Lock()
    85  	defer o.mu.Unlock()
    86  
    87  	tbls := make([][]filter.Table, 0)
    88  	for downSchema, downTables := range o.tables.Tables {
    89  		for downTable, upSchemas := range downTables {
    90  			for upSchema, upTables := range upSchemas {
    91  				for upTable := range upTables {
    92  					tbls = append(tbls, []filter.Table{{Schema: upSchema, Name: upTable}, {Schema: downSchema, Name: downTable}})
    93  				}
    94  			}
    95  		}
    96  	}
    97  	return tbls
    98  }
    99  
   100  // Reset resets the internal state of the optimist.
   101  func (o *Optimist) Reset() {
   102  	o.mu.Lock()
   103  	defer o.mu.Unlock()
   104  
   105  	o.pendingInfo = nil
   106  	o.pendingOp = nil
   107  	o.pendingRedirectOps = make(map[string]*optimism.Operation)
   108  	o.pendingRedirectCancelFunc = make(map[string]context.CancelFunc)
   109  }
   110  
   111  // ConstructInfo constructs a shard DDL info.
   112  func (o *Optimist) ConstructInfo(upSchema, upTable, downSchema, downTable string,
   113  	ddls []string, tiBefore *model.TableInfo, tisAfter []*model.TableInfo,
   114  ) optimism.Info {
   115  	return optimism.NewInfo(o.task, o.source, upSchema, upTable, downSchema, downTable, ddls, tiBefore, tisAfter)
   116  }
   117  
   118  // PutInfo puts the shard DDL info into etcd and returns the revision.
   119  func (o *Optimist) PutInfo(info optimism.Info) (int64, error) {
   120  	rev, err := optimism.PutInfo(o.cli, info)
   121  	if err != nil {
   122  		return 0, err
   123  	}
   124  
   125  	o.mu.Lock()
   126  	o.pendingInfo = &info
   127  	o.mu.Unlock()
   128  
   129  	return rev, nil
   130  }
   131  
   132  // AddTable adds the table for the info into source tables,
   133  // this is often called for `CREATE TABLE`.
   134  func (o *Optimist) AddTable(info optimism.Info) (int64, error) {
   135  	o.tables.AddTable(info.UpSchema, info.UpTable, info.DownSchema, info.DownTable)
   136  	return optimism.PutSourceTables(o.cli, o.tables)
   137  }
   138  
   139  // RemoveTable removes the table for the info from source tables,
   140  // this is often called for `DROP TABLE`.
   141  func (o *Optimist) RemoveTable(info optimism.Info) (int64, error) {
   142  	o.tables.RemoveTable(info.UpSchema, info.UpTable, info.DownSchema, info.DownTable)
   143  	return optimism.PutSourceTables(o.cli, o.tables)
   144  }
   145  
   146  // GetOperation gets the shard DDL lock operation relative to the shard DDL info.
   147  func (o *Optimist) GetOperation(ctx context.Context, info optimism.Info, rev int64) (optimism.Operation, error) {
   148  	ctx2, cancel2 := context.WithCancel(ctx)
   149  	defer cancel2()
   150  
   151  	ch := make(chan optimism.Operation, 1)
   152  	errCh := make(chan error, 1)
   153  	go optimism.WatchOperationPut(ctx2, o.cli, o.task, o.source, info.UpSchema, info.UpTable, rev, ch, errCh)
   154  
   155  	select {
   156  	case op := <-ch:
   157  		o.mu.Lock()
   158  		o.pendingOp = &op
   159  		o.mu.Unlock()
   160  		return op, nil
   161  	case err := <-errCh:
   162  		return optimism.Operation{}, err
   163  	case <-ctx.Done():
   164  		return optimism.Operation{}, ctx.Err()
   165  	}
   166  }
   167  
   168  func (o *Optimist) GetRedirectOperation(ctx context.Context, info optimism.Info, rev int64) {
   169  	ctx2, cancel2 := context.WithCancel(ctx)
   170  
   171  	ch := make(chan optimism.Operation, 1)
   172  	errCh := make(chan error, 1)
   173  	targetTableID := utils.GenTableID(&filter.Table{Schema: info.DownSchema, Name: info.DownTable})
   174  	o.mu.Lock()
   175  	o.pendingRedirectCancelFunc[targetTableID] = cancel2
   176  	o.mu.Unlock()
   177  
   178  	go func() {
   179  		o.logger.Info("start to wait redirect operation", zap.Stringer("info", info), zap.Int64("revision", rev))
   180  		for {
   181  			op, rev2, err := optimism.GetOperation(o.cli, o.task, o.source, info.UpSchema, info.UpTable)
   182  			if err != nil {
   183  				o.logger.Warn("fail to get redirect operation", zap.Error(err))
   184  				time.Sleep(time.Second)
   185  				continue
   186  			}
   187  			// check whether operation is valid
   188  			if op.Task == o.task && rev2 >= rev {
   189  				switch op.ConflictStage {
   190  				case optimism.ConflictResolved, optimism.ConflictNone:
   191  					o.saveRedirectOperation(targetTableID, &op)
   192  					return
   193  				}
   194  			}
   195  			ctx3, cancel3 := context.WithCancel(ctx2)
   196  			go optimism.WatchOperationPut(ctx3, o.cli, o.task, o.source, info.UpSchema, info.UpTable, rev2+1, ch, errCh)
   197  			select {
   198  			case op = <-ch:
   199  				cancel3()
   200  				switch op.ConflictStage {
   201  				case optimism.ConflictResolved, optimism.ConflictNone:
   202  					o.saveRedirectOperation(targetTableID, &op)
   203  					return
   204  				}
   205  			case err := <-errCh:
   206  				cancel3()
   207  				o.logger.Warn("fail to watch redirect operation", zap.Error(err))
   208  				time.Sleep(time.Second)
   209  			case <-ctx.Done():
   210  				cancel3()
   211  				return
   212  			}
   213  		}
   214  	}()
   215  }
   216  
   217  // DoneOperation marks the shard DDL lock operation as done.
   218  func (o *Optimist) DoneOperation(op optimism.Operation) error {
   219  	op.Done = true
   220  	_, _, err := etcdutil.DoTxnWithRepeatable(o.cli, func(_ *tcontext.Context, cli *clientv3.Client) (interface{}, error) {
   221  		_, _, err := optimism.PutOperation(cli, false, op, 0)
   222  		return nil, err
   223  	})
   224  	if err != nil {
   225  		return err
   226  	}
   227  
   228  	o.mu.Lock()
   229  	o.pendingInfo = nil
   230  	o.pendingOp = nil
   231  	o.mu.Unlock()
   232  
   233  	return nil
   234  }
   235  
   236  // PendingInfo returns the shard DDL info which is pending to handle.
   237  func (o *Optimist) PendingInfo() *optimism.Info {
   238  	o.mu.RLock()
   239  	defer o.mu.RUnlock()
   240  
   241  	if o.pendingInfo == nil {
   242  		return nil
   243  	}
   244  	info := *o.pendingInfo
   245  	return &info
   246  }
   247  
   248  // PendingOperation returns the shard DDL lock operation which is pending to handle.
   249  func (o *Optimist) PendingOperation() *optimism.Operation {
   250  	o.mu.RLock()
   251  	defer o.mu.RUnlock()
   252  
   253  	if o.pendingOp == nil {
   254  		return nil
   255  	}
   256  	op := *o.pendingOp
   257  	return &op
   258  }
   259  
   260  // PendingRedirectOperation returns the shard DDL lock redirect operation which is pending to handle.
   261  func (o *Optimist) PendingRedirectOperation() (*optimism.Operation, string) {
   262  	o.mu.RLock()
   263  	defer o.mu.RUnlock()
   264  
   265  	for targetTableID, op := range o.pendingRedirectOps {
   266  		return op, targetTableID
   267  	}
   268  	return nil, ""
   269  }
   270  
   271  // saveRedirectOperation saves the redirect shard DDL lock operation.
   272  func (o *Optimist) saveRedirectOperation(targetTableID string, op *optimism.Operation) {
   273  	o.logger.Info("receive redirection operation from master", zap.Stringer("op", op))
   274  	o.mu.Lock()
   275  	if _, ok := o.pendingRedirectCancelFunc[targetTableID]; ok {
   276  		o.pendingRedirectCancelFunc[targetTableID]()
   277  		o.pendingRedirectOps[targetTableID] = op
   278  	}
   279  	o.mu.Unlock()
   280  }
   281  
   282  // DoneRedirectOperation marks the redirect shard DDL lock operation as done.
   283  func (o *Optimist) DoneRedirectOperation(targetTableID string) {
   284  	o.mu.Lock()
   285  	if cancelFunc, ok := o.pendingRedirectCancelFunc[targetTableID]; ok {
   286  		cancelFunc()
   287  	}
   288  	delete(o.pendingRedirectCancelFunc, targetTableID)
   289  	delete(o.pendingRedirectOps, targetTableID)
   290  	o.mu.Unlock()
   291  }
   292  
   293  // CheckPersistentData check and fix the persistent data.
   294  //
   295  // NOTE: currently this function is not used because user will meet error at early version
   296  // if set unsupported case-sensitive.
   297  func (o *Optimist) CheckPersistentData(source string, schemas map[string]string, tables map[string]map[string]string) error {
   298  	if o.cli == nil {
   299  		return nil
   300  	}
   301  	err := optimism.CheckSourceTables(o.cli, source, schemas, tables)
   302  	if err != nil {
   303  		return err
   304  	}
   305  
   306  	err = optimism.CheckDDLInfos(o.cli, source, schemas, tables)
   307  	if err != nil {
   308  		return err
   309  	}
   310  
   311  	err = optimism.CheckOperations(o.cli, source, schemas, tables)
   312  	if err != nil {
   313  		return err
   314  	}
   315  
   316  	return optimism.CheckColumns(o.cli, source, schemas, tables)
   317  }