github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/pkg/upgrade/upgrade.go (about) 1 // Copyright 2020 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package upgrade 15 16 import ( 17 "context" 18 "fmt" 19 "time" 20 21 "github.com/pingcap/tidb/pkg/util/dbutil" 22 "github.com/pingcap/tiflow/dm/common" 23 "github.com/pingcap/tiflow/dm/config" 24 "github.com/pingcap/tiflow/dm/config/dbconfig" 25 "github.com/pingcap/tiflow/dm/pkg/conn" 26 tcontext "github.com/pingcap/tiflow/dm/pkg/context" 27 "github.com/pingcap/tiflow/dm/pkg/cputil" 28 "github.com/pingcap/tiflow/dm/pkg/etcdutil" 29 "github.com/pingcap/tiflow/dm/pkg/log" 30 "github.com/pingcap/tiflow/dm/pkg/utils" 31 clientv3 "go.etcd.io/etcd/client/v3" 32 "go.uber.org/zap" 33 ) 34 35 // upgrades records all functions used to upgrade from one version to the later version. 36 var upgrades = []func(cli *clientv3.Client, uctx Context) error{ 37 upgradeToVer1, 38 upgradeToVer2, 39 upgradeToVer4, 40 } 41 42 // upgradesBeforeScheduler records all upgrade functions before scheduler start. e.g. etcd key changed. 43 var upgradesBeforeScheduler = []func(ctx context.Context, cli *clientv3.Client) error{ 44 upgradeToVer3, 45 } 46 47 // Context is used to pass something to TryUpgrade 48 // NOTE that zero value of Context is nil, be aware of nil-dereference. 49 type Context struct { 50 context.Context 51 SubTaskConfigs map[string]map[string]config.SubTaskConfig 52 } 53 54 // newUpgradeContext creates a Context, avoid nil Context member. 55 // only used for testing now. 56 func newUpgradeContext() Context { 57 return Context{ 58 Context: context.Background(), 59 SubTaskConfigs: make(map[string]map[string]config.SubTaskConfig), 60 } 61 } 62 63 // TryUpgrade tries to upgrade the cluster from an older version to a new version. 64 // This methods should have no side effects even calling multiple times. 65 func TryUpgrade(cli *clientv3.Client, uctx Context) error { 66 // 1. get previous version from etcd. 67 preVer, _, err := GetVersion(cli) 68 log.L().Info("fetch previous version", zap.Any("preVer", preVer)) 69 if err != nil { 70 return err 71 } 72 73 // 2. check if any previous version exists. 74 if preVer.NotSet() { 75 if _, err = PutVersion(cli, MinVersion); err != nil { 76 return err 77 } 78 preVer = MinVersion 79 } 80 81 // 3. compare the previous version with the current version. 82 if cmp := preVer.Compare(CurrentVersion); cmp == 0 { 83 // previous == current version, no need to upgrade. 84 return nil 85 } else if cmp > 0 { 86 // previous >= current version, this often means a older version of DM-master become the leader after started, 87 // do nothing for this now. 88 return nil 89 } 90 91 // 4. do upgrade operations. 92 for _, upgrade := range upgrades { 93 err = upgrade(cli, uctx) 94 if err != nil { 95 return err 96 } 97 } 98 99 // 5. put the current version into etcd. 100 _, err = PutVersion(cli, CurrentVersion) 101 log.L().Info("upgrade cluster version", zap.Any("version", CurrentVersion), zap.Error(err)) 102 return err 103 } 104 105 // TryUpgradeBeforeSchedulerStart tries to upgrade the cluster before scheduler start. 106 // This methods should have no side effects even calling multiple times. 107 func TryUpgradeBeforeSchedulerStart(ctx context.Context, cli *clientv3.Client) error { 108 // 1. get previous version from etcd. 109 preVer, _, err := GetVersion(cli) 110 log.L().Info("fetch previous version", zap.Any("preVer", preVer)) 111 if err != nil { 112 return err 113 } 114 115 // 2. check if any previous version exists. 116 if preVer.NotSet() { 117 if _, err = PutVersion(cli, MinVersion); err != nil { 118 return err 119 } 120 preVer = MinVersion 121 } 122 123 // 3. compare the previous version with the current version. 124 if cmp := preVer.Compare(CurrentVersion); cmp == 0 { 125 // previous == current version, no need to upgrade. 126 return nil 127 } else if cmp > 0 { 128 // previous >= current version, this often means a older version of DM-master become the leader after started, 129 // do nothing for this now. 130 return nil 131 } 132 133 // 4. do upgrade operations. 134 for _, upgrade := range upgradesBeforeScheduler { 135 err = upgrade(ctx, cli) 136 if err != nil { 137 return err 138 } 139 } 140 return nil 141 } 142 143 // UntouchVersionUpgrade runs all upgrade functions but doesn't change cluster version. This function is called when 144 // upgrade from v1.0, with a later PutVersion in caller after success. 145 func UntouchVersionUpgrade(cli *clientv3.Client, uctx Context) error { 146 for _, upgrade := range upgrades { 147 err := upgrade(cli, uctx) 148 if err != nil { 149 return err 150 } 151 } 152 return nil 153 } 154 155 // upgradeToVer1 does upgrade operations from Ver0 to Ver1. 156 // in fact, this do nothing now, and just for demonstration. 157 func upgradeToVer1(cli *clientv3.Client, uctx Context) error { 158 return nil 159 } 160 161 // upgradeToVer2 does upgrade operations from Ver1 to Ver2 (v2.0.0-GA) to upgrade syncer checkpoint schema. 162 func upgradeToVer2(cli *clientv3.Client, uctx Context) error { 163 upgradeTaskName := "upgradeToVer2" 164 logger := log.L().WithFields(zap.String("task", upgradeTaskName)) 165 166 if uctx.SubTaskConfigs == nil { 167 logger.Info("no downstream DB, skipping") 168 return nil 169 } 170 171 // tableName -> DBConfig 172 dbConfigs := map[string]dbconfig.DBConfig{} 173 for task, m := range uctx.SubTaskConfigs { 174 for sourceID, subCfg := range m { 175 tableName := dbutil.TableName(subCfg.MetaSchema, cputil.SyncerCheckpoint(subCfg.Name)) 176 subCfg2, err := subCfg.DecryptedClone() 177 if err != nil { 178 log.L().Error("subconfig error when upgrading", zap.String("task", task), 179 zap.String("source id", sourceID), zap.String("subtask config", subCfg.String()), zap.Error(err)) 180 return err 181 } 182 dbConfigs[tableName] = subCfg2.To 183 } 184 } 185 186 toClose := make([]*conn.BaseDB, 0, len(dbConfigs)) 187 defer func() { 188 for _, db := range toClose { 189 db.Close() 190 } 191 }() 192 193 // 10 seconds for each subtask 194 timeout := time.Duration(len(dbConfigs)*10) * time.Second 195 upgradeCtx, cancel := context.WithTimeout(context.Background(), timeout) 196 uctx.Context = upgradeCtx 197 defer cancel() 198 199 for tableName, cfg := range dbConfigs { 200 targetDB, err := conn.GetDownstreamDB(&cfg) 201 if err != nil { 202 logger.Error("target DB error when upgrading", zap.String("table name", tableName)) 203 return err 204 } 205 toClose = append(toClose, targetDB) 206 // try to add columns. 207 // NOTE: ignore already exists error to continue the process. 208 queries := []string{ 209 fmt.Sprintf(`ALTER TABLE %s ADD COLUMN exit_safe_binlog_name VARCHAR(128) DEFAULT '' AFTER binlog_gtid`, tableName), 210 fmt.Sprintf(`ALTER TABLE %s ADD COLUMN exit_safe_binlog_pos INT UNSIGNED DEFAULT 0 AFTER exit_safe_binlog_name`, tableName), 211 fmt.Sprintf(`ALTER TABLE %s ADD COLUMN exit_safe_binlog_gtid TEXT AFTER exit_safe_binlog_pos`, tableName), 212 } 213 tctx := tcontext.NewContext(uctx.Context, logger) 214 dbConn, err := targetDB.GetBaseConn(tctx.Ctx) 215 if err != nil { 216 logger.Error("skip target DB when upgrading", zap.String("table name", tableName)) 217 return err 218 } 219 _, err = dbConn.ExecuteSQLWithIgnoreError(tctx, nil, upgradeTaskName, utils.IgnoreErrorCheckpoint, queries) 220 if err != nil { 221 logger.Error("error while adding column for checkpoint table", zap.String("table name", tableName)) 222 return err 223 } 224 } 225 226 return nil 227 } 228 229 // upgradeToVer3 does upgrade operations from Ver2 (v2.0.0-GA) to Ver3 (v2.0.2) to upgrade etcd key encodings. 230 // This func should be called before scheduler start. 231 func upgradeToVer3(ctx context.Context, cli *clientv3.Client) error { 232 etcdKeyUpgrades := []struct { 233 old common.KeyAdapter 234 new common.KeyAdapter 235 }{ 236 { 237 common.UpstreamConfigKeyAdapterV1, 238 common.UpstreamConfigKeyAdapter, 239 }, 240 { 241 common.StageRelayKeyAdapterV1, 242 common.StageRelayKeyAdapter, 243 }, 244 } 245 246 ops := make([]clientv3.Op, 0, len(etcdKeyUpgrades)) 247 for _, pair := range etcdKeyUpgrades { 248 resp, err := cli.Get(ctx, pair.old.Path(), clientv3.WithPrefix()) 249 if err != nil { 250 return err 251 } 252 if len(resp.Kvs) == 0 { 253 log.L().Info("no old KVs, skipping", zap.String("etcd path", pair.old.Path())) 254 continue 255 } 256 for _, kv := range resp.Kvs { 257 keys, err2 := pair.old.Decode(string(kv.Key)) 258 if err2 != nil { 259 return err2 260 } 261 newKey := pair.new.Encode(keys...) 262 263 // note that we lost CreateRevision, Lease, ModRevision, Version 264 ops = append(ops, clientv3.OpPut(newKey, string(kv.Value))) 265 } 266 // delete old key to provide idempotence 267 ops = append(ops, clientv3.OpDelete(pair.old.Path(), clientv3.WithPrefix())) 268 } 269 _, _, err := etcdutil.DoTxnWithRepeatable(cli, etcdutil.ThenOpFunc(ops...)) 270 return err 271 } 272 273 // upgradeToVer4 does nothing, version 4 is just to make sure cluster from version 3 could re-run bootstrap, because 274 // version 3 (v2.0.2) has some bugs and user may downgrade. 275 func upgradeToVer4(cli *clientv3.Client, uctx Context) error { 276 return nil 277 }