github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/loader/util.go (about)

     1  // Copyright 2019 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package loader
    15  
    16  import (
    17  	"context"
    18  	"crypto/sha1"
    19  	"fmt"
    20  	"os"
    21  	"path"
    22  	"path/filepath"
    23  	"strings"
    24  	"time"
    25  
    26  	"github.com/pingcap/failpoint"
    27  	"github.com/pingcap/tiflow/dm/config"
    28  	"github.com/pingcap/tiflow/dm/pkg/dumpling"
    29  	"github.com/pingcap/tiflow/dm/pkg/ha"
    30  	"github.com/pingcap/tiflow/dm/pkg/log"
    31  	"github.com/pingcap/tiflow/dm/pkg/storage"
    32  	"github.com/pingcap/tiflow/dm/pkg/terror"
    33  	"github.com/pingcap/tiflow/dm/pkg/utils"
    34  	clientv3 "go.etcd.io/etcd/client/v3"
    35  	"go.uber.org/zap"
    36  )
    37  
    38  // SQLReplace works like strings.Replace but only supports one replacement.
    39  // It uses backquote pairs to quote the old and new word.
    40  func SQLReplace(s, oldStr, newStr string, ansiquote bool) string {
    41  	var quote string
    42  	if ansiquote {
    43  		quote = "\""
    44  	} else {
    45  		quote = "`"
    46  	}
    47  	quoteF := func(s string) string {
    48  		var b strings.Builder
    49  		b.WriteString(quote)
    50  		b.WriteString(s)
    51  		b.WriteString(quote)
    52  		return b.String()
    53  	}
    54  
    55  	oldStr = quoteF(oldStr)
    56  	newStr = quoteF(newStr)
    57  	return strings.Replace(s, oldStr, newStr, 1)
    58  }
    59  
    60  // shortSha1 returns the first 6 characters of sha1 value.
    61  func shortSha1(s string) string {
    62  	h := sha1.New()
    63  
    64  	h.Write([]byte(s))
    65  	return fmt.Sprintf("%x", h.Sum(nil))[:6]
    66  }
    67  
    68  // percent calculates percentage of a/b.
    69  func percent(a int64, b int64, finish bool) string {
    70  	if b == 0 {
    71  		if finish {
    72  			return "100.00 %"
    73  		}
    74  		return "0.00 %"
    75  	}
    76  	return fmt.Sprintf("%.2f %%", float64(a)/float64(b)*100)
    77  }
    78  
    79  func generateSchemaCreateFile(dir string, schema string) error {
    80  	file, err := os.Create(path.Join(dir, fmt.Sprintf("%s-schema-create.sql", schema)))
    81  	if err != nil {
    82  		return terror.ErrLoadUnitCreateSchemaFile.Delegate(err)
    83  	}
    84  	defer file.Close()
    85  
    86  	_, err = fmt.Fprintf(file, "CREATE DATABASE `%s`;\n", escapeName(schema))
    87  	return terror.ErrLoadUnitCreateSchemaFile.Delegate(err)
    88  }
    89  
    90  func escapeName(name string) string {
    91  	return strings.ReplaceAll(name, "`", "``")
    92  }
    93  
    94  // input filename is like `all_mode.t1.0.sql` or `all_mode.t1.sql`.
    95  func getDBAndTableFromFilename(filename string) (string, string, error) {
    96  	idx := strings.LastIndex(filename, ".sql")
    97  	if idx < 0 {
    98  		return "", "", fmt.Errorf("%s doesn't have a `.sql` suffix", filename)
    99  	}
   100  	fields := strings.Split(filename[:idx], ".")
   101  	if len(fields) != 2 && len(fields) != 3 {
   102  		return "", "", fmt.Errorf("%s doesn't have correct `.` separator", filename)
   103  	}
   104  	return fields[0], fields[1], nil
   105  }
   106  
   107  func getMydumpMetadata(ctx context.Context, cli *clientv3.Client, cfg *config.SubTaskConfig, workerName string) (string, string, error) {
   108  	metafile := "metadata"
   109  	failpoint.Inject("TestRemoveMetaFile", func() {
   110  		err := storage.RemoveAll(ctx, cfg.LoaderConfig.Dir, nil)
   111  		if err != nil {
   112  			log.L().Warn("TestRemoveMetaFile Error", log.ShortError(err))
   113  		}
   114  	})
   115  	loc, _, err := dumpling.ParseMetaData(ctx, cfg.LoaderConfig.Dir, metafile, cfg.ExtStorage)
   116  	if err == nil {
   117  		return loc.Position.String(), loc.GTIDSetStr(), nil
   118  	}
   119  	if storage.IsNotExistError(err) {
   120  		failpoint.Inject("TestRemoveMetaFile", func() {
   121  			panic("success check file not exist!!")
   122  		})
   123  		worker, err2 := getLoadTask(cli, cfg.Name, cfg.SourceID)
   124  		if err2 != nil {
   125  			log.L().Warn("get load task", log.ShortError(err2))
   126  		}
   127  		if worker != "" && worker != workerName {
   128  			return "", "", terror.ErrLoadTaskWorkerNotMatch.Generate(worker, workerName)
   129  		}
   130  		return "", "", terror.ErrParseMydumperMeta.Generate(err, "not found")
   131  	}
   132  	if terror.ErrMetadataNoBinlogLoc.Equal(err) {
   133  		log.L().Warn("dumped metadata doesn't have binlog location, it's OK if DM doesn't enter incremental mode")
   134  		return "", "", nil
   135  	}
   136  
   137  	toPrint, err2 := storage.ReadFile(ctx, cfg.Dir, metafile, nil)
   138  	if err2 != nil {
   139  		toPrint = []byte(err2.Error())
   140  	}
   141  	log.L().Error("fail to parse dump metadata", log.ShortError(err))
   142  	return "", "", terror.ErrParseMydumperMeta.Generate(err, toPrint)
   143  }
   144  
   145  // cleanDumpFiles is called when finish restoring data, to clean useless files.
   146  func cleanDumpFiles(ctx context.Context, cfg *config.SubTaskConfig) {
   147  	log.L().Info("clean dump files")
   148  	if cfg.Mode == config.ModeFull {
   149  		// in full-mode all files won't be need in the future
   150  		if err := storage.RemoveAll(ctx, cfg.Dir, nil); err != nil {
   151  			log.L().Warn("error when remove loaded dump folder", zap.String("data folder", cfg.Dir), zap.Error(err))
   152  		}
   153  	} else {
   154  		if storage.IsS3Path(cfg.Dir) {
   155  			// s3 no need immediately remove
   156  			log.L().Info("dump path is s3, and s3 storage does not need to immediately remove dump data files.", zap.String("S3 Path", cfg.Dir))
   157  			return
   158  		}
   159  		// leave metadata file and table structure files, only delete data files
   160  		files, err := utils.CollectDirFiles(cfg.Dir)
   161  		if err != nil {
   162  			log.L().Warn("fail to collect files", zap.String("data folder", cfg.Dir), zap.Error(err))
   163  		}
   164  		var lastErr error
   165  		for f := range files {
   166  			if strings.HasSuffix(f, ".sql") {
   167  				if strings.HasSuffix(f, "-schema-create.sql") || strings.HasSuffix(f, "-schema.sql") {
   168  					continue
   169  				}
   170  				lastErr = os.Remove(filepath.Join(cfg.Dir, f))
   171  			}
   172  		}
   173  		if lastErr != nil {
   174  			log.L().Warn("show last error when remove loaded dump sql files", zap.String("data folder", cfg.Dir), zap.Error(lastErr))
   175  		}
   176  	}
   177  }
   178  
   179  // putLoadTask is called when start restoring data, to put load worker in etcd.
   180  // This is no-op when the `cli` argument is nil.
   181  func putLoadTask(cli *clientv3.Client, cfg *config.SubTaskConfig, workerName string) error {
   182  	// some usage like DM as a library, we don't support this feature
   183  	if cli == nil {
   184  		return nil
   185  	}
   186  	_, err := ha.PutLoadTask(cli, cfg.Name, cfg.SourceID, workerName)
   187  	if err != nil {
   188  		return err
   189  	}
   190  	log.L().Info("put load worker in etcd", zap.String("task", cfg.Name), zap.String("source", cfg.SourceID), zap.String("worker", workerName))
   191  	return nil
   192  }
   193  
   194  // delLoadTask is called when finish restoring data, to delete load worker in etcd.
   195  // This is no-op when the `cli` argument is nil.
   196  func delLoadTask(cli *clientv3.Client, cfg *config.SubTaskConfig, workerName string) error {
   197  	// some usage like DM as a library, we don't support this feature
   198  	if cli == nil {
   199  		return nil
   200  	}
   201  	_, _, err := ha.DelLoadTask(cli, cfg.Name, cfg.SourceID)
   202  	if err != nil {
   203  		return err
   204  	}
   205  	log.L().Info("delete load worker in etcd for full mode", zap.String("task", cfg.Name), zap.String("source", cfg.SourceID), zap.String("worker", workerName))
   206  	return nil
   207  }
   208  
   209  // getLoadTask gets the worker which in load stage for the source of the subtask.
   210  // It will return "" and no error when the `cli` argument is nil.
   211  func getLoadTask(cli *clientv3.Client, task, sourceID string) (string, error) {
   212  	if cli == nil {
   213  		return "", nil
   214  	}
   215  	name, _, err := ha.GetLoadTask(cli, task, sourceID)
   216  	return name, err
   217  }
   218  
   219  // readyAndWait updates the lightning status of this worker to LightningReady and
   220  // waits for all workers' status not LightningNotReady.
   221  // Only works for physical import.
   222  func readyAndWait(ctx context.Context, cli *clientv3.Client, cfg *config.SubTaskConfig) error {
   223  	return putAndWait(ctx, cli, cfg, ha.LightningReady, func(s string) bool {
   224  		return s == ha.LightningNotReady
   225  	})
   226  }
   227  
   228  // finishAndWait updates the lightning status of this worker to LightningFinished
   229  // and waits for all workers' status LightningFinished.
   230  // Only works for physical import.
   231  func finishAndWait(ctx context.Context, cli *clientv3.Client, cfg *config.SubTaskConfig) error {
   232  	return putAndWait(ctx, cli, cfg, ha.LightningFinished, func(s string) bool {
   233  		return s != ha.LightningFinished
   234  	})
   235  }
   236  
   237  func putAndWait(
   238  	ctx context.Context,
   239  	cli *clientv3.Client,
   240  	cfg *config.SubTaskConfig,
   241  	putStatus string,
   242  	failFn func(string) bool,
   243  ) error {
   244  	if cli == nil || cfg.LoaderConfig.ImportMode != config.LoadModePhysical {
   245  		return nil
   246  	}
   247  	_, err := ha.PutLightningStatus(cli, cfg.Name, cfg.SourceID, putStatus)
   248  	if err != nil {
   249  		return err
   250  	}
   251  
   252  	ticker := time.NewTicker(5 * time.Second)
   253  	defer ticker.Stop()
   254  WaitLoop:
   255  	for {
   256  		select {
   257  		case <-ctx.Done():
   258  			return ctx.Err()
   259  		case <-ticker.C:
   260  			status, err := ha.GetAllLightningStatus(cli, cfg.Name)
   261  			if err != nil {
   262  				return err
   263  			}
   264  			for _, s := range status {
   265  				if failFn(s) {
   266  					continue WaitLoop
   267  				}
   268  			}
   269  			return nil
   270  		}
   271  	}
   272  }