github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/loader/util.go (about) 1 // Copyright 2019 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package loader 15 16 import ( 17 "context" 18 "crypto/sha1" 19 "fmt" 20 "os" 21 "path" 22 "path/filepath" 23 "strings" 24 "time" 25 26 "github.com/pingcap/failpoint" 27 "github.com/pingcap/tiflow/dm/config" 28 "github.com/pingcap/tiflow/dm/pkg/dumpling" 29 "github.com/pingcap/tiflow/dm/pkg/ha" 30 "github.com/pingcap/tiflow/dm/pkg/log" 31 "github.com/pingcap/tiflow/dm/pkg/storage" 32 "github.com/pingcap/tiflow/dm/pkg/terror" 33 "github.com/pingcap/tiflow/dm/pkg/utils" 34 clientv3 "go.etcd.io/etcd/client/v3" 35 "go.uber.org/zap" 36 ) 37 38 // SQLReplace works like strings.Replace but only supports one replacement. 39 // It uses backquote pairs to quote the old and new word. 40 func SQLReplace(s, oldStr, newStr string, ansiquote bool) string { 41 var quote string 42 if ansiquote { 43 quote = "\"" 44 } else { 45 quote = "`" 46 } 47 quoteF := func(s string) string { 48 var b strings.Builder 49 b.WriteString(quote) 50 b.WriteString(s) 51 b.WriteString(quote) 52 return b.String() 53 } 54 55 oldStr = quoteF(oldStr) 56 newStr = quoteF(newStr) 57 return strings.Replace(s, oldStr, newStr, 1) 58 } 59 60 // shortSha1 returns the first 6 characters of sha1 value. 61 func shortSha1(s string) string { 62 h := sha1.New() 63 64 h.Write([]byte(s)) 65 return fmt.Sprintf("%x", h.Sum(nil))[:6] 66 } 67 68 // percent calculates percentage of a/b. 69 func percent(a int64, b int64, finish bool) string { 70 if b == 0 { 71 if finish { 72 return "100.00 %" 73 } 74 return "0.00 %" 75 } 76 return fmt.Sprintf("%.2f %%", float64(a)/float64(b)*100) 77 } 78 79 func generateSchemaCreateFile(dir string, schema string) error { 80 file, err := os.Create(path.Join(dir, fmt.Sprintf("%s-schema-create.sql", schema))) 81 if err != nil { 82 return terror.ErrLoadUnitCreateSchemaFile.Delegate(err) 83 } 84 defer file.Close() 85 86 _, err = fmt.Fprintf(file, "CREATE DATABASE `%s`;\n", escapeName(schema)) 87 return terror.ErrLoadUnitCreateSchemaFile.Delegate(err) 88 } 89 90 func escapeName(name string) string { 91 return strings.ReplaceAll(name, "`", "``") 92 } 93 94 // input filename is like `all_mode.t1.0.sql` or `all_mode.t1.sql`. 95 func getDBAndTableFromFilename(filename string) (string, string, error) { 96 idx := strings.LastIndex(filename, ".sql") 97 if idx < 0 { 98 return "", "", fmt.Errorf("%s doesn't have a `.sql` suffix", filename) 99 } 100 fields := strings.Split(filename[:idx], ".") 101 if len(fields) != 2 && len(fields) != 3 { 102 return "", "", fmt.Errorf("%s doesn't have correct `.` separator", filename) 103 } 104 return fields[0], fields[1], nil 105 } 106 107 func getMydumpMetadata(ctx context.Context, cli *clientv3.Client, cfg *config.SubTaskConfig, workerName string) (string, string, error) { 108 metafile := "metadata" 109 failpoint.Inject("TestRemoveMetaFile", func() { 110 err := storage.RemoveAll(ctx, cfg.LoaderConfig.Dir, nil) 111 if err != nil { 112 log.L().Warn("TestRemoveMetaFile Error", log.ShortError(err)) 113 } 114 }) 115 loc, _, err := dumpling.ParseMetaData(ctx, cfg.LoaderConfig.Dir, metafile, cfg.ExtStorage) 116 if err == nil { 117 return loc.Position.String(), loc.GTIDSetStr(), nil 118 } 119 if storage.IsNotExistError(err) { 120 failpoint.Inject("TestRemoveMetaFile", func() { 121 panic("success check file not exist!!") 122 }) 123 worker, err2 := getLoadTask(cli, cfg.Name, cfg.SourceID) 124 if err2 != nil { 125 log.L().Warn("get load task", log.ShortError(err2)) 126 } 127 if worker != "" && worker != workerName { 128 return "", "", terror.ErrLoadTaskWorkerNotMatch.Generate(worker, workerName) 129 } 130 return "", "", terror.ErrParseMydumperMeta.Generate(err, "not found") 131 } 132 if terror.ErrMetadataNoBinlogLoc.Equal(err) { 133 log.L().Warn("dumped metadata doesn't have binlog location, it's OK if DM doesn't enter incremental mode") 134 return "", "", nil 135 } 136 137 toPrint, err2 := storage.ReadFile(ctx, cfg.Dir, metafile, nil) 138 if err2 != nil { 139 toPrint = []byte(err2.Error()) 140 } 141 log.L().Error("fail to parse dump metadata", log.ShortError(err)) 142 return "", "", terror.ErrParseMydumperMeta.Generate(err, toPrint) 143 } 144 145 // cleanDumpFiles is called when finish restoring data, to clean useless files. 146 func cleanDumpFiles(ctx context.Context, cfg *config.SubTaskConfig) { 147 log.L().Info("clean dump files") 148 if cfg.Mode == config.ModeFull { 149 // in full-mode all files won't be need in the future 150 if err := storage.RemoveAll(ctx, cfg.Dir, nil); err != nil { 151 log.L().Warn("error when remove loaded dump folder", zap.String("data folder", cfg.Dir), zap.Error(err)) 152 } 153 } else { 154 if storage.IsS3Path(cfg.Dir) { 155 // s3 no need immediately remove 156 log.L().Info("dump path is s3, and s3 storage does not need to immediately remove dump data files.", zap.String("S3 Path", cfg.Dir)) 157 return 158 } 159 // leave metadata file and table structure files, only delete data files 160 files, err := utils.CollectDirFiles(cfg.Dir) 161 if err != nil { 162 log.L().Warn("fail to collect files", zap.String("data folder", cfg.Dir), zap.Error(err)) 163 } 164 var lastErr error 165 for f := range files { 166 if strings.HasSuffix(f, ".sql") { 167 if strings.HasSuffix(f, "-schema-create.sql") || strings.HasSuffix(f, "-schema.sql") { 168 continue 169 } 170 lastErr = os.Remove(filepath.Join(cfg.Dir, f)) 171 } 172 } 173 if lastErr != nil { 174 log.L().Warn("show last error when remove loaded dump sql files", zap.String("data folder", cfg.Dir), zap.Error(lastErr)) 175 } 176 } 177 } 178 179 // putLoadTask is called when start restoring data, to put load worker in etcd. 180 // This is no-op when the `cli` argument is nil. 181 func putLoadTask(cli *clientv3.Client, cfg *config.SubTaskConfig, workerName string) error { 182 // some usage like DM as a library, we don't support this feature 183 if cli == nil { 184 return nil 185 } 186 _, err := ha.PutLoadTask(cli, cfg.Name, cfg.SourceID, workerName) 187 if err != nil { 188 return err 189 } 190 log.L().Info("put load worker in etcd", zap.String("task", cfg.Name), zap.String("source", cfg.SourceID), zap.String("worker", workerName)) 191 return nil 192 } 193 194 // delLoadTask is called when finish restoring data, to delete load worker in etcd. 195 // This is no-op when the `cli` argument is nil. 196 func delLoadTask(cli *clientv3.Client, cfg *config.SubTaskConfig, workerName string) error { 197 // some usage like DM as a library, we don't support this feature 198 if cli == nil { 199 return nil 200 } 201 _, _, err := ha.DelLoadTask(cli, cfg.Name, cfg.SourceID) 202 if err != nil { 203 return err 204 } 205 log.L().Info("delete load worker in etcd for full mode", zap.String("task", cfg.Name), zap.String("source", cfg.SourceID), zap.String("worker", workerName)) 206 return nil 207 } 208 209 // getLoadTask gets the worker which in load stage for the source of the subtask. 210 // It will return "" and no error when the `cli` argument is nil. 211 func getLoadTask(cli *clientv3.Client, task, sourceID string) (string, error) { 212 if cli == nil { 213 return "", nil 214 } 215 name, _, err := ha.GetLoadTask(cli, task, sourceID) 216 return name, err 217 } 218 219 // readyAndWait updates the lightning status of this worker to LightningReady and 220 // waits for all workers' status not LightningNotReady. 221 // Only works for physical import. 222 func readyAndWait(ctx context.Context, cli *clientv3.Client, cfg *config.SubTaskConfig) error { 223 return putAndWait(ctx, cli, cfg, ha.LightningReady, func(s string) bool { 224 return s == ha.LightningNotReady 225 }) 226 } 227 228 // finishAndWait updates the lightning status of this worker to LightningFinished 229 // and waits for all workers' status LightningFinished. 230 // Only works for physical import. 231 func finishAndWait(ctx context.Context, cli *clientv3.Client, cfg *config.SubTaskConfig) error { 232 return putAndWait(ctx, cli, cfg, ha.LightningFinished, func(s string) bool { 233 return s != ha.LightningFinished 234 }) 235 } 236 237 func putAndWait( 238 ctx context.Context, 239 cli *clientv3.Client, 240 cfg *config.SubTaskConfig, 241 putStatus string, 242 failFn func(string) bool, 243 ) error { 244 if cli == nil || cfg.LoaderConfig.ImportMode != config.LoadModePhysical { 245 return nil 246 } 247 _, err := ha.PutLightningStatus(cli, cfg.Name, cfg.SourceID, putStatus) 248 if err != nil { 249 return err 250 } 251 252 ticker := time.NewTicker(5 * time.Second) 253 defer ticker.Stop() 254 WaitLoop: 255 for { 256 select { 257 case <-ctx.Done(): 258 return ctx.Err() 259 case <-ticker.C: 260 status, err := ha.GetAllLightningStatus(cli, cfg.Name) 261 if err != nil { 262 return err 263 } 264 for _, s := range status { 265 if failFn(s) { 266 continue WaitLoop 267 } 268 } 269 return nil 270 } 271 } 272 }