github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/pkg/upgrade/upgrade.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package upgrade
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"time"
    20  
    21  	"github.com/pingcap/tidb/pkg/util/dbutil"
    22  	"github.com/pingcap/tiflow/dm/common"
    23  	"github.com/pingcap/tiflow/dm/config"
    24  	"github.com/pingcap/tiflow/dm/config/dbconfig"
    25  	"github.com/pingcap/tiflow/dm/pkg/conn"
    26  	tcontext "github.com/pingcap/tiflow/dm/pkg/context"
    27  	"github.com/pingcap/tiflow/dm/pkg/cputil"
    28  	"github.com/pingcap/tiflow/dm/pkg/etcdutil"
    29  	"github.com/pingcap/tiflow/dm/pkg/log"
    30  	"github.com/pingcap/tiflow/dm/pkg/utils"
    31  	clientv3 "go.etcd.io/etcd/client/v3"
    32  	"go.uber.org/zap"
    33  )
    34  
    35  // upgrades records all functions used to upgrade from one version to the later version.
    36  var upgrades = []func(cli *clientv3.Client, uctx Context) error{
    37  	upgradeToVer1,
    38  	upgradeToVer2,
    39  	upgradeToVer4,
    40  }
    41  
    42  // upgradesBeforeScheduler records all upgrade functions before scheduler start. e.g. etcd key changed.
    43  var upgradesBeforeScheduler = []func(ctx context.Context, cli *clientv3.Client) error{
    44  	upgradeToVer3,
    45  }
    46  
    47  // Context is used to pass something to TryUpgrade
    48  // NOTE that zero value of Context is nil, be aware of nil-dereference.
    49  type Context struct {
    50  	context.Context
    51  	SubTaskConfigs map[string]map[string]config.SubTaskConfig
    52  }
    53  
    54  // newUpgradeContext creates a Context, avoid nil Context member.
    55  // only used for testing now.
    56  func newUpgradeContext() Context {
    57  	return Context{
    58  		Context:        context.Background(),
    59  		SubTaskConfigs: make(map[string]map[string]config.SubTaskConfig),
    60  	}
    61  }
    62  
    63  // TryUpgrade tries to upgrade the cluster from an older version to a new version.
    64  // This methods should have no side effects even calling multiple times.
    65  func TryUpgrade(cli *clientv3.Client, uctx Context) error {
    66  	// 1. get previous version from etcd.
    67  	preVer, _, err := GetVersion(cli)
    68  	log.L().Info("fetch previous version", zap.Any("preVer", preVer))
    69  	if err != nil {
    70  		return err
    71  	}
    72  
    73  	// 2. check if any previous version exists.
    74  	if preVer.NotSet() {
    75  		if _, err = PutVersion(cli, MinVersion); err != nil {
    76  			return err
    77  		}
    78  		preVer = MinVersion
    79  	}
    80  
    81  	// 3. compare the previous version with the current version.
    82  	if cmp := preVer.Compare(CurrentVersion); cmp == 0 {
    83  		// previous == current version, no need to upgrade.
    84  		return nil
    85  	} else if cmp > 0 {
    86  		// previous >= current version, this often means a older version of DM-master become the leader after started,
    87  		// do nothing for this now.
    88  		return nil
    89  	}
    90  
    91  	// 4. do upgrade operations.
    92  	for _, upgrade := range upgrades {
    93  		err = upgrade(cli, uctx)
    94  		if err != nil {
    95  			return err
    96  		}
    97  	}
    98  
    99  	// 5. put the current version into etcd.
   100  	_, err = PutVersion(cli, CurrentVersion)
   101  	log.L().Info("upgrade cluster version", zap.Any("version", CurrentVersion), zap.Error(err))
   102  	return err
   103  }
   104  
   105  // TryUpgradeBeforeSchedulerStart tries to upgrade the cluster before scheduler start.
   106  // This methods should have no side effects even calling multiple times.
   107  func TryUpgradeBeforeSchedulerStart(ctx context.Context, cli *clientv3.Client) error {
   108  	// 1. get previous version from etcd.
   109  	preVer, _, err := GetVersion(cli)
   110  	log.L().Info("fetch previous version", zap.Any("preVer", preVer))
   111  	if err != nil {
   112  		return err
   113  	}
   114  
   115  	// 2. check if any previous version exists.
   116  	if preVer.NotSet() {
   117  		if _, err = PutVersion(cli, MinVersion); err != nil {
   118  			return err
   119  		}
   120  		preVer = MinVersion
   121  	}
   122  
   123  	// 3. compare the previous version with the current version.
   124  	if cmp := preVer.Compare(CurrentVersion); cmp == 0 {
   125  		// previous == current version, no need to upgrade.
   126  		return nil
   127  	} else if cmp > 0 {
   128  		// previous >= current version, this often means a older version of DM-master become the leader after started,
   129  		// do nothing for this now.
   130  		return nil
   131  	}
   132  
   133  	// 4. do upgrade operations.
   134  	for _, upgrade := range upgradesBeforeScheduler {
   135  		err = upgrade(ctx, cli)
   136  		if err != nil {
   137  			return err
   138  		}
   139  	}
   140  	return nil
   141  }
   142  
   143  // UntouchVersionUpgrade runs all upgrade functions but doesn't change cluster version. This function is called when
   144  // upgrade from v1.0, with a later PutVersion in caller after success.
   145  func UntouchVersionUpgrade(cli *clientv3.Client, uctx Context) error {
   146  	for _, upgrade := range upgrades {
   147  		err := upgrade(cli, uctx)
   148  		if err != nil {
   149  			return err
   150  		}
   151  	}
   152  	return nil
   153  }
   154  
   155  // upgradeToVer1 does upgrade operations from Ver0 to Ver1.
   156  // in fact, this do nothing now, and just for demonstration.
   157  func upgradeToVer1(cli *clientv3.Client, uctx Context) error {
   158  	return nil
   159  }
   160  
   161  // upgradeToVer2 does upgrade operations from Ver1 to Ver2 (v2.0.0-GA) to upgrade syncer checkpoint schema.
   162  func upgradeToVer2(cli *clientv3.Client, uctx Context) error {
   163  	upgradeTaskName := "upgradeToVer2"
   164  	logger := log.L().WithFields(zap.String("task", upgradeTaskName))
   165  
   166  	if uctx.SubTaskConfigs == nil {
   167  		logger.Info("no downstream DB, skipping")
   168  		return nil
   169  	}
   170  
   171  	// tableName -> DBConfig
   172  	dbConfigs := map[string]dbconfig.DBConfig{}
   173  	for task, m := range uctx.SubTaskConfigs {
   174  		for sourceID, subCfg := range m {
   175  			tableName := dbutil.TableName(subCfg.MetaSchema, cputil.SyncerCheckpoint(subCfg.Name))
   176  			subCfg2, err := subCfg.DecryptedClone()
   177  			if err != nil {
   178  				log.L().Error("subconfig error when upgrading", zap.String("task", task),
   179  					zap.String("source id", sourceID), zap.String("subtask config", subCfg.String()), zap.Error(err))
   180  				return err
   181  			}
   182  			dbConfigs[tableName] = subCfg2.To
   183  		}
   184  	}
   185  
   186  	toClose := make([]*conn.BaseDB, 0, len(dbConfigs))
   187  	defer func() {
   188  		for _, db := range toClose {
   189  			db.Close()
   190  		}
   191  	}()
   192  
   193  	// 10 seconds for each subtask
   194  	timeout := time.Duration(len(dbConfigs)*10) * time.Second
   195  	upgradeCtx, cancel := context.WithTimeout(context.Background(), timeout)
   196  	uctx.Context = upgradeCtx
   197  	defer cancel()
   198  
   199  	for tableName, cfg := range dbConfigs {
   200  		targetDB, err := conn.GetDownstreamDB(&cfg)
   201  		if err != nil {
   202  			logger.Error("target DB error when upgrading", zap.String("table name", tableName))
   203  			return err
   204  		}
   205  		toClose = append(toClose, targetDB)
   206  		// try to add columns.
   207  		// NOTE: ignore already exists error to continue the process.
   208  		queries := []string{
   209  			fmt.Sprintf(`ALTER TABLE %s ADD COLUMN exit_safe_binlog_name VARCHAR(128) DEFAULT '' AFTER binlog_gtid`, tableName),
   210  			fmt.Sprintf(`ALTER TABLE %s ADD COLUMN exit_safe_binlog_pos INT UNSIGNED DEFAULT 0 AFTER exit_safe_binlog_name`, tableName),
   211  			fmt.Sprintf(`ALTER TABLE %s ADD COLUMN exit_safe_binlog_gtid TEXT AFTER exit_safe_binlog_pos`, tableName),
   212  		}
   213  		tctx := tcontext.NewContext(uctx.Context, logger)
   214  		dbConn, err := targetDB.GetBaseConn(tctx.Ctx)
   215  		if err != nil {
   216  			logger.Error("skip target DB when upgrading", zap.String("table name", tableName))
   217  			return err
   218  		}
   219  		_, err = dbConn.ExecuteSQLWithIgnoreError(tctx, nil, upgradeTaskName, utils.IgnoreErrorCheckpoint, queries)
   220  		if err != nil {
   221  			logger.Error("error while adding column for checkpoint table", zap.String("table name", tableName))
   222  			return err
   223  		}
   224  	}
   225  
   226  	return nil
   227  }
   228  
   229  // upgradeToVer3 does upgrade operations from Ver2 (v2.0.0-GA) to Ver3 (v2.0.2) to upgrade etcd key encodings.
   230  // This func should be called before scheduler start.
   231  func upgradeToVer3(ctx context.Context, cli *clientv3.Client) error {
   232  	etcdKeyUpgrades := []struct {
   233  		old common.KeyAdapter
   234  		new common.KeyAdapter
   235  	}{
   236  		{
   237  			common.UpstreamConfigKeyAdapterV1,
   238  			common.UpstreamConfigKeyAdapter,
   239  		},
   240  		{
   241  			common.StageRelayKeyAdapterV1,
   242  			common.StageRelayKeyAdapter,
   243  		},
   244  	}
   245  
   246  	ops := make([]clientv3.Op, 0, len(etcdKeyUpgrades))
   247  	for _, pair := range etcdKeyUpgrades {
   248  		resp, err := cli.Get(ctx, pair.old.Path(), clientv3.WithPrefix())
   249  		if err != nil {
   250  			return err
   251  		}
   252  		if len(resp.Kvs) == 0 {
   253  			log.L().Info("no old KVs, skipping", zap.String("etcd path", pair.old.Path()))
   254  			continue
   255  		}
   256  		for _, kv := range resp.Kvs {
   257  			keys, err2 := pair.old.Decode(string(kv.Key))
   258  			if err2 != nil {
   259  				return err2
   260  			}
   261  			newKey := pair.new.Encode(keys...)
   262  
   263  			// note that we lost CreateRevision, Lease, ModRevision, Version
   264  			ops = append(ops, clientv3.OpPut(newKey, string(kv.Value)))
   265  		}
   266  		// delete old key to provide idempotence
   267  		ops = append(ops, clientv3.OpDelete(pair.old.Path(), clientv3.WithPrefix()))
   268  	}
   269  	_, _, err := etcdutil.DoTxnWithRepeatable(cli, etcdutil.ThenOpFunc(ops...))
   270  	return err
   271  }
   272  
   273  // upgradeToVer4 does nothing, version 4 is just to make sure cluster from version 3 could re-run bootstrap, because
   274  // version 3 (v2.0.2) has some bugs and user may downgrade.
   275  func upgradeToVer4(cli *clientv3.Client, uctx Context) error {
   276  	return nil
   277  }