github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/loader/lightning.go (about)

     1  // Copyright 2021 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package loader
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"path/filepath"
    20  	"regexp"
    21  	"strings"
    22  	"sync"
    23  	"time"
    24  
    25  	"github.com/pingcap/errors"
    26  	"github.com/pingcap/failpoint"
    27  	"github.com/pingcap/tidb/dumpling/export"
    28  	lserver "github.com/pingcap/tidb/lightning/pkg/server"
    29  	"github.com/pingcap/tidb/pkg/lightning/checkpoints"
    30  	"github.com/pingcap/tidb/pkg/lightning/common"
    31  	lcfg "github.com/pingcap/tidb/pkg/lightning/config"
    32  	"github.com/pingcap/tidb/pkg/lightning/errormanager"
    33  	"github.com/pingcap/tidb/pkg/parser/mysql"
    34  	tidbpromutil "github.com/pingcap/tidb/pkg/util/promutil"
    35  	"github.com/pingcap/tiflow/dm/config"
    36  	"github.com/pingcap/tiflow/dm/pb"
    37  	"github.com/pingcap/tiflow/dm/pkg/binlog"
    38  	"github.com/pingcap/tiflow/dm/pkg/conn"
    39  	"github.com/pingcap/tiflow/dm/pkg/cputil"
    40  	"github.com/pingcap/tiflow/dm/pkg/log"
    41  	"github.com/pingcap/tiflow/dm/pkg/storage"
    42  	"github.com/pingcap/tiflow/dm/pkg/terror"
    43  	"github.com/pingcap/tiflow/dm/pkg/utils"
    44  	"github.com/pingcap/tiflow/dm/unit"
    45  	"github.com/pingcap/tiflow/engine/pkg/promutil"
    46  	"github.com/prometheus/client_golang/prometheus"
    47  	clientv3 "go.etcd.io/etcd/client/v3"
    48  	"go.uber.org/atomic"
    49  	"go.uber.org/zap"
    50  )
    51  
    52  const (
    53  	// checkpoint file name for lightning loader
    54  	// this file is used to store the real checkpoint data for lightning.
    55  	lightningCheckpointFileName = "tidb_lightning_checkpoint.pb"
    56  )
    57  
    58  // LightningLoader can load your mydumper data into TiDB database.
    59  type LightningLoader struct {
    60  	sync.RWMutex
    61  
    62  	timeZone              string
    63  	sqlMode               string
    64  	lightningGlobalConfig *lcfg.GlobalConfig
    65  	cfg                   *config.SubTaskConfig
    66  
    67  	checkPointList *LightningCheckpointList
    68  
    69  	logger log.Logger
    70  	cli    *clientv3.Client
    71  	core   *lserver.Lightning
    72  	cancel context.CancelFunc // for per task context, which maybe different from lightning context
    73  
    74  	toDB *conn.BaseDB
    75  
    76  	workerName     string
    77  	finish         atomic.Bool
    78  	closed         atomic.Bool
    79  	metaBinlog     atomic.String
    80  	metaBinlogGTID atomic.String
    81  	lastErr        error
    82  
    83  	speedRecorder *export.SpeedRecorder
    84  	metricProxies *metricProxies
    85  }
    86  
    87  // NewLightning creates a new Loader importing data with lightning.
    88  func NewLightning(cfg *config.SubTaskConfig, cli *clientv3.Client, workerName string) *LightningLoader {
    89  	lightningCfg := MakeGlobalConfig(cfg)
    90  	logger := log.L()
    91  	if cfg.FrameworkLogger != nil {
    92  		logger = log.Logger{Logger: cfg.FrameworkLogger}
    93  	}
    94  	loader := &LightningLoader{
    95  		cfg:                   cfg,
    96  		cli:                   cli,
    97  		workerName:            workerName,
    98  		lightningGlobalConfig: lightningCfg,
    99  		core:                  lserver.New(lightningCfg),
   100  		logger:                logger.WithFields(zap.String("task", cfg.Name), zap.String("unit", "lightning-load")),
   101  		speedRecorder:         export.NewSpeedRecorder(),
   102  	}
   103  	return loader
   104  }
   105  
   106  // MakeGlobalConfig converts subtask config to lightning global config.
   107  func MakeGlobalConfig(cfg *config.SubTaskConfig) *lcfg.GlobalConfig {
   108  	lightningCfg := lcfg.NewGlobalConfig()
   109  	if cfg.To.Security != nil {
   110  		lightningCfg.Security.CABytes = cfg.To.Security.SSLCABytes
   111  		lightningCfg.Security.CertBytes = cfg.To.Security.SSLCertBytes
   112  		lightningCfg.Security.KeyBytes = cfg.To.Security.SSLKeyBytes
   113  	}
   114  	lightningCfg.TiDB.Host = cfg.To.Host
   115  	lightningCfg.TiDB.Psw = cfg.To.Password
   116  	lightningCfg.TiDB.User = cfg.To.User
   117  	lightningCfg.TiDB.Port = cfg.To.Port
   118  	if len(cfg.LoaderConfig.PDAddr) > 0 {
   119  		lightningCfg.TiDB.PdAddr = cfg.LoaderConfig.PDAddr
   120  	}
   121  	lightningCfg.TikvImporter.Backend = lcfg.BackendTiDB
   122  	if cfg.LoaderConfig.ImportMode == config.LoadModePhysical {
   123  		lightningCfg.TikvImporter.Backend = lcfg.BackendLocal
   124  	}
   125  	lightningCfg.PostRestore.Checksum = lcfg.OpLevelOff
   126  	if lightningCfg.TikvImporter.Backend == lcfg.BackendLocal {
   127  		lightningCfg.TikvImporter.SortedKVDir = cfg.SortingDirPhysical
   128  	}
   129  	lightningCfg.Mydumper.SourceDir = cfg.Dir
   130  	lightningCfg.App.Config.File = "" // make lightning not init logger, see more in https://github.com/pingcap/tidb/pull/29291
   131  	return lightningCfg
   132  }
   133  
   134  // Type implements Unit.Type.
   135  func (l *LightningLoader) Type() pb.UnitType {
   136  	return pb.UnitType_Load
   137  }
   138  
   139  func (l *LightningLoader) initMetricProxies() {
   140  	if l.cfg.MetricsFactory != nil {
   141  		// running inside dataflow-engine and the factory is an auto register/deregister factory
   142  		l.metricProxies = newMetricProxies(l.cfg.MetricsFactory)
   143  	} else {
   144  		l.metricProxies = defaultMetricProxies
   145  	}
   146  }
   147  
   148  // Init initializes loader for a load task, but not start Process.
   149  // if fail, it should not call l.Close.
   150  func (l *LightningLoader) Init(ctx context.Context) (err error) {
   151  	l.initMetricProxies()
   152  
   153  	l.toDB, err = conn.GetDownstreamDB(&l.cfg.To)
   154  	if err != nil {
   155  		return err
   156  	}
   157  
   158  	checkpointList := NewLightningCheckpointList(l.toDB, l.cfg.Name, l.cfg.SourceID, l.cfg.MetaSchema, l.logger)
   159  	err = checkpointList.Prepare(ctx)
   160  	if err == nil {
   161  		l.checkPointList = checkpointList
   162  	}
   163  	failpoint.Inject("ignoreLoadCheckpointErr", func(_ failpoint.Value) {
   164  		l.logger.Info("", zap.String("failpoint", "ignoreLoadCheckpointErr"))
   165  		err = nil
   166  	})
   167  	if err != nil {
   168  		return err
   169  	}
   170  
   171  	timeZone := l.cfg.Timezone
   172  	if len(timeZone) == 0 {
   173  		baseDB, err2 := conn.GetDownstreamDB(&l.cfg.To)
   174  		if err2 != nil {
   175  			return err2
   176  		}
   177  		defer baseDB.Close()
   178  		var err1 error
   179  		timeZone, err1 = config.FetchTimeZoneSetting(ctx, baseDB.DB)
   180  		if err1 != nil {
   181  			return err1
   182  		}
   183  	}
   184  	l.timeZone = timeZone
   185  
   186  	for k, v := range l.cfg.To.Session {
   187  		if strings.ToLower(k) == "sql_mode" {
   188  			l.sqlMode = v
   189  			break
   190  		}
   191  	}
   192  
   193  	if len(l.sqlMode) == 0 {
   194  		sqlModes, err3 := conn.AdjustSQLModeCompatible(l.cfg.LoaderConfig.SQLMode)
   195  		if err3 != nil {
   196  			l.logger.Warn("cannot adjust sql_mode compatible, the sql_mode will stay the same", log.ShortError(err3))
   197  		}
   198  		l.sqlMode = sqlModes
   199  	}
   200  
   201  	return nil
   202  }
   203  
   204  func (l *LightningLoader) ignoreCheckpointError(ctx context.Context, cfg *lcfg.Config) error {
   205  	status, err := l.checkPointList.taskStatus(ctx)
   206  	if err != nil {
   207  		return err
   208  	}
   209  	if status != lightningStatusRunning {
   210  		return nil
   211  	}
   212  	cpdb, err := checkpoints.OpenCheckpointsDB(ctx, cfg)
   213  	if err != nil {
   214  		return err
   215  	}
   216  	defer func() {
   217  		_ = cpdb.Close()
   218  	}()
   219  	return errors.Trace(cpdb.IgnoreErrorCheckpoint(ctx, "all"))
   220  }
   221  
   222  func (l *LightningLoader) runLightning(ctx context.Context, cfg *lcfg.Config) (err error) {
   223  	taskCtx, cancel := context.WithCancel(ctx)
   224  	l.Lock()
   225  	l.cancel = cancel
   226  	l.Unlock()
   227  
   228  	// always try to skill all checkpoint errors so we can resume this phase.
   229  	err = l.ignoreCheckpointError(ctx, cfg)
   230  	if err != nil {
   231  		l.logger.Warn("check lightning checkpoint status failed, skip this error", log.ShortError(err))
   232  	}
   233  	if err = l.checkPointList.UpdateStatus(ctx, lightningStatusRunning); err != nil {
   234  		return err
   235  	}
   236  
   237  	var opts []lserver.Option
   238  	if l.cfg.MetricsFactory != nil {
   239  		// this branch means dataflow engine has set a Factory, the Factory itself
   240  		// will register and deregister metrics, but lightning will expect the
   241  		// register and deregister at the beginning and end of its lifetime.
   242  		// So we use dataflow engine's Factory to register, and use dataflow engine's
   243  		// global metrics to manually deregister.
   244  		opts = append(opts,
   245  			lserver.WithPromFactory(
   246  				promutil.NewWrappingFactory(
   247  					l.cfg.MetricsFactory,
   248  					"",
   249  					prometheus.Labels{"task": l.cfg.Name, "source_id": l.cfg.SourceID},
   250  				)),
   251  			lserver.WithPromRegistry(promutil.GetGlobalMetricRegistry()))
   252  	} else {
   253  		registry := prometheus.DefaultGatherer.(prometheus.Registerer)
   254  		failpoint.Inject("DontUnregister", func() {
   255  			registry = promutil.NewOnlyRegRegister(registry)
   256  		})
   257  
   258  		opts = append(opts,
   259  			lserver.WithPromFactory(
   260  				promutil.NewWrappingFactory(
   261  					tidbpromutil.NewDefaultFactory(),
   262  					"",
   263  					prometheus.Labels{"task": l.cfg.Name, "source_id": l.cfg.SourceID},
   264  				),
   265  			),
   266  			lserver.WithPromRegistry(registry))
   267  	}
   268  	if l.cfg.ExtStorage != nil {
   269  		opts = append(opts,
   270  			lserver.WithDumpFileStorage(l.cfg.ExtStorage))
   271  	}
   272  	if l.cfg.FrameworkLogger != nil {
   273  		opts = append(opts, lserver.WithLogger(l.cfg.FrameworkLogger))
   274  	} else {
   275  		opts = append(opts, lserver.WithLogger(l.logger.Logger))
   276  	}
   277  
   278  	var hasDup atomic.Bool
   279  	if l.cfg.LoaderConfig.ImportMode == config.LoadModePhysical {
   280  		opts = append(opts, lserver.WithDupIndicator(&hasDup))
   281  	}
   282  
   283  	err = l.core.RunOnceWithOptions(taskCtx, cfg, opts...)
   284  	failpoint.Inject("LoadDataSlowDown", nil)
   285  	failpoint.Inject("LoadDataSlowDownByTask", func(val failpoint.Value) {
   286  		tasks := val.(string)
   287  		taskNames := strings.Split(tasks, ",")
   288  		for _, taskName := range taskNames {
   289  			if l.cfg.Name == taskName {
   290  				l.logger.Info("inject failpoint LoadDataSlowDownByTask in lightning loader", zap.String("task", taskName))
   291  				<-taskCtx.Done()
   292  			}
   293  		}
   294  	})
   295  	defer func() {
   296  		l.lastErr = err
   297  	}()
   298  	if err != nil {
   299  		return convertLightningError(err)
   300  	}
   301  	if hasDup.Load() {
   302  		return terror.ErrLoadLightningHasDup.Generate(cfg.App.TaskInfoSchemaName, errormanager.ConflictErrorTableName)
   303  	}
   304  	return nil
   305  }
   306  
   307  var checksumErrorPattern = regexp.MustCompile(`total_kvs: (\d*) vs (\d*)`)
   308  
   309  func convertLightningError(err error) error {
   310  	if common.ErrChecksumMismatch.Equal(err) {
   311  		lErr := errors.Cause(err).(*errors.Error)
   312  		msg := lErr.GetMsg()
   313  		matches := checksumErrorPattern.FindStringSubmatch(msg)
   314  		if len(matches) == 3 {
   315  			return terror.ErrLoadLightningChecksum.Generate(matches[2], matches[1])
   316  		}
   317  	}
   318  	return terror.ErrLoadLightningRuntime.Delegate(err)
   319  }
   320  
   321  // GetTaskInfoSchemaName is used to assign to TikvImporter.DuplicateResolution in lightning config.
   322  func GetTaskInfoSchemaName(dmMetaSchema, taskName string) string {
   323  	return dmMetaSchema + "_" + taskName
   324  }
   325  
   326  // GetLightningConfig returns the lightning task config for the lightning global config and DM subtask config.
   327  func GetLightningConfig(globalCfg *lcfg.GlobalConfig, subtaskCfg *config.SubTaskConfig) (*lcfg.Config, error) {
   328  	cfg := lcfg.NewConfig()
   329  	if err := cfg.LoadFromGlobal(globalCfg); err != nil {
   330  		return nil, err
   331  	}
   332  	// TableConcurrency is adjusted to the value of RegionConcurrency
   333  	// when using TiDB backend.
   334  	// TODO: should we set the TableConcurrency separately.
   335  	cfg.App.RegionConcurrency = subtaskCfg.LoaderConfig.PoolSize
   336  	cfg.Routes = subtaskCfg.RouteRules
   337  
   338  	// Use MySQL checkpoint when we use s3/gcs as dumper storage
   339  	if subtaskCfg.ExtStorage != nil || !storage.IsLocalDiskPath(subtaskCfg.LoaderConfig.Dir) {
   340  		// NOTE: If we use bucket as dumper storage, write lightning checkpoint to downstream DB to avoid bucket ratelimit
   341  		// since we will use check Checkpoint in 'ignoreCheckpointError', MAKE SURE we have assigned the Checkpoint config properly here
   342  		if err := cfg.Security.BuildTLSConfig(); err != nil {
   343  			return nil, err
   344  		}
   345  		// To enable the loader worker failover, we need to use jobID+sourceID to isolate the checkpoint schema
   346  		cfg.Checkpoint.Schema = cputil.LightningCheckpointSchema(subtaskCfg.Name, subtaskCfg.SourceID)
   347  		cfg.Checkpoint.Driver = lcfg.CheckpointDriverMySQL
   348  		cfg.Checkpoint.MySQLParam = connParamFromConfig(cfg)
   349  	} else {
   350  		// NOTE: for op dm, we recommend to keep data files and checkpoint file in the same place to avoid inconsistent deletion
   351  		cfg.Checkpoint.Driver = lcfg.CheckpointDriverFile
   352  		var cpPath string
   353  		// l.cfg.LoaderConfig.Dir may be a s3 path, and Lightning supports checkpoint in s3, we can use storage.AdjustPath to adjust path both local and s3.
   354  		cpPath, err := storage.AdjustPath(subtaskCfg.LoaderConfig.Dir, string(filepath.Separator)+lightningCheckpointFileName)
   355  		if err != nil {
   356  			return nil, err
   357  		}
   358  		cfg.Checkpoint.DSN = cpPath
   359  	}
   360  	// TODO: Fix me. Remove strategy may cause the re-import if the process exits unexpectly between removing lightning
   361  	// checkpoint meta and updating dm checkpoint meta to 'finished'.
   362  	cfg.Checkpoint.KeepAfterSuccess = lcfg.CheckpointRemove
   363  
   364  	if subtaskCfg.LoaderConfig.DiskQuotaPhysical > 0 {
   365  		cfg.TikvImporter.DiskQuota = subtaskCfg.LoaderConfig.DiskQuotaPhysical
   366  	}
   367  	if cfg.TikvImporter.Backend == lcfg.BackendLocal {
   368  		cfg.TikvImporter.IncrementalImport = true
   369  	} else if err := cfg.TikvImporter.OnDuplicate.FromStringValue(string(subtaskCfg.OnDuplicateLogical)); err != nil {
   370  		return nil, err
   371  	}
   372  	switch subtaskCfg.OnDuplicatePhysical {
   373  	case config.OnDuplicateManual:
   374  		cfg.TikvImporter.DuplicateResolution = lcfg.ReplaceOnDup
   375  		cfg.App.TaskInfoSchemaName = GetTaskInfoSchemaName(subtaskCfg.MetaSchema, subtaskCfg.Name)
   376  	case config.OnDuplicateNone:
   377  		cfg.TikvImporter.DuplicateResolution = lcfg.NoneOnDup
   378  	}
   379  	switch subtaskCfg.ChecksumPhysical {
   380  	case config.OpLevelRequired:
   381  		cfg.PostRestore.Checksum = lcfg.OpLevelRequired
   382  	case config.OpLevelOptional:
   383  		cfg.PostRestore.Checksum = lcfg.OpLevelOptional
   384  	case config.OpLevelOff:
   385  		cfg.PostRestore.Checksum = lcfg.OpLevelOff
   386  	}
   387  	switch subtaskCfg.Analyze {
   388  	case config.OpLevelRequired:
   389  		cfg.PostRestore.Analyze = lcfg.OpLevelRequired
   390  	case config.OpLevelOptional:
   391  		cfg.PostRestore.Analyze = lcfg.OpLevelOptional
   392  	case config.OpLevelOff:
   393  		cfg.PostRestore.Analyze = lcfg.OpLevelOff
   394  	}
   395  	cfg.TiDB.Vars = make(map[string]string)
   396  	cfg.Routes = subtaskCfg.RouteRules
   397  	if subtaskCfg.To.Session != nil {
   398  		for k, v := range subtaskCfg.To.Session {
   399  			cfg.TiDB.Vars[k] = v
   400  		}
   401  	}
   402  
   403  	if subtaskCfg.RangeConcurrency > 0 {
   404  		cfg.TikvImporter.RangeConcurrency = subtaskCfg.RangeConcurrency
   405  	}
   406  	if len(subtaskCfg.CompressKVPairs) > 0 {
   407  		err := cfg.TikvImporter.CompressKVPairs.FromStringValue(subtaskCfg.CompressKVPairs)
   408  		if err != nil {
   409  			return nil, err
   410  		}
   411  	}
   412  
   413  	cfg.TiDB.Vars = map[string]string{
   414  		// always set transaction mode to optimistic
   415  		"tidb_txn_mode": "optimistic",
   416  		// always disable foreign key check when do full sync.
   417  		"foreign_key_checks": "0",
   418  	}
   419  	cfg.Mydumper.SourceID = subtaskCfg.SourceID
   420  	return cfg, nil
   421  }
   422  
   423  func (l *LightningLoader) getLightningConfig() (*lcfg.Config, error) {
   424  	cfg, err := GetLightningConfig(l.lightningGlobalConfig, l.cfg)
   425  	if err != nil {
   426  		return nil, err
   427  	}
   428  	cfg.TiDB.StrSQLMode = l.sqlMode
   429  	cfg.TiDB.Vars["time_zone"] = l.timeZone
   430  	return cfg, nil
   431  }
   432  
   433  func (l *LightningLoader) restore(ctx context.Context) error {
   434  	if err := putLoadTask(l.cli, l.cfg, l.workerName); err != nil {
   435  		return err
   436  	}
   437  
   438  	status, err := l.checkPointList.taskStatus(ctx)
   439  	if err != nil {
   440  		return err
   441  	}
   442  
   443  	// we have disabled auto-resume for below errors, so if lightning is resuming
   444  	// it means user wants to skip this error.
   445  	switch {
   446  	case terror.ErrLoadLightningHasDup.Equal(l.lastErr),
   447  		terror.ErrLoadLightningChecksum.Equal(l.lastErr):
   448  		l.logger.Info("manually resume from error, DM will skip the error and continue to next unit",
   449  			zap.Error(l.lastErr))
   450  
   451  		l.finish.Store(true)
   452  		err = l.checkPointList.UpdateStatus(ctx, lightningStatusFinished)
   453  		if err != nil {
   454  			l.logger.Error("failed to update checkpoint status", zap.Error(err))
   455  			return err
   456  		}
   457  		status = lightningStatusFinished
   458  	}
   459  
   460  	if status < lightningStatusFinished {
   461  		if err = l.checkPointList.RegisterCheckPoint(ctx); err != nil {
   462  			return err
   463  		}
   464  		var cfg *lcfg.Config
   465  		cfg, err = l.getLightningConfig()
   466  		if err != nil {
   467  			return err
   468  		}
   469  		if err2 := readyAndWait(ctx, l.cli, l.cfg); err2 != nil {
   470  			return err2
   471  		}
   472  		err = l.runLightning(ctx, cfg)
   473  		if err == nil {
   474  			l.finish.Store(true)
   475  			err = l.checkPointList.UpdateStatus(ctx, lightningStatusFinished)
   476  			if err != nil {
   477  				l.logger.Error("failed to update checkpoint status", zap.Error(err))
   478  				return err
   479  			}
   480  		} else {
   481  			l.logger.Error("failed to runlightning", zap.Error(err))
   482  		}
   483  	} else {
   484  		l.finish.Store(true)
   485  	}
   486  	if err == nil && l.finish.Load() && l.cfg.Mode == config.ModeFull {
   487  		if err = delLoadTask(l.cli, l.cfg, l.workerName); err != nil {
   488  			return err
   489  		}
   490  	}
   491  	if l.finish.Load() {
   492  		if l.cfg.CleanDumpFile {
   493  			cleanDumpFiles(ctx, l.cfg)
   494  		}
   495  		return finishAndWait(ctx, l.cli, l.cfg)
   496  	}
   497  	return err
   498  }
   499  
   500  func (l *LightningLoader) handleExitErrMetric(err *pb.ProcessError) {
   501  	resumable := fmt.Sprintf("%t", unit.IsResumableError(err))
   502  	l.metricProxies.loaderExitWithErrorCounter.WithLabelValues(l.cfg.Name, l.cfg.SourceID, resumable).Inc()
   503  }
   504  
   505  // Process implements Unit.Process.
   506  func (l *LightningLoader) Process(ctx context.Context, pr chan pb.ProcessResult) {
   507  	l.logger.Info("lightning load start")
   508  	errs := make([]*pb.ProcessError, 0, 1)
   509  	failpoint.Inject("lightningAlwaysErr", func(_ failpoint.Value) {
   510  		l.logger.Info("", zap.String("failpoint", "lightningAlwaysErr"))
   511  		pr <- pb.ProcessResult{
   512  			Errors: []*pb.ProcessError{unit.NewProcessError(errors.New("failpoint lightningAlwaysErr"))},
   513  		}
   514  		failpoint.Return()
   515  	})
   516  
   517  	binlog, gtid, err := getMydumpMetadata(ctx, l.cli, l.cfg, l.workerName)
   518  	if err != nil {
   519  		processError := unit.NewProcessError(err)
   520  		l.handleExitErrMetric(processError)
   521  		pr <- pb.ProcessResult{
   522  			Errors: []*pb.ProcessError{processError},
   523  		}
   524  		return
   525  	}
   526  	if binlog != "" {
   527  		l.metaBinlog.Store(binlog)
   528  	}
   529  	if gtid != "" {
   530  		l.metaBinlogGTID.Store(gtid)
   531  	}
   532  
   533  	failpoint.Inject("longLoadProcess", func(val failpoint.Value) {
   534  		if sec, ok := val.(int); ok {
   535  			l.logger.Info("long loader unit", zap.Int("second", sec))
   536  			time.Sleep(time.Duration(sec) * time.Second)
   537  		}
   538  	})
   539  
   540  	if err := l.restore(ctx); err != nil && !utils.IsContextCanceledError(err) {
   541  		l.logger.Error("process error", zap.Error(err))
   542  		processError := unit.NewProcessError(err)
   543  		l.handleExitErrMetric(processError)
   544  		errs = append(errs, processError)
   545  	}
   546  	isCanceled := false
   547  	select {
   548  	case <-ctx.Done():
   549  		isCanceled = true
   550  	default:
   551  	}
   552  	s := l.status()
   553  	l.logger.Info("lightning load end",
   554  		zap.Bool("IsCanceled", isCanceled),
   555  		zap.Int64("finished_bytes", s.FinishedBytes),
   556  		zap.Int64("total_bytes", s.TotalBytes),
   557  		zap.String("progress", s.Progress))
   558  	pr <- pb.ProcessResult{IsCanceled: isCanceled, Errors: errs}
   559  }
   560  
   561  func (l *LightningLoader) isClosed() bool {
   562  	return l.closed.Load()
   563  }
   564  
   565  // IsFreshTask implements Unit.IsFreshTask.
   566  func (l *LightningLoader) IsFreshTask(ctx context.Context) (bool, error) {
   567  	status, err := l.checkPointList.taskStatus(ctx)
   568  	return status == lightningStatusInit, err
   569  }
   570  
   571  // Close does graceful shutdown.
   572  func (l *LightningLoader) Close() {
   573  	l.Pause()
   574  	l.removeLabelValuesWithTaskInMetrics(l.cfg.Name, l.cfg.SourceID)
   575  	l.checkPointList.Close()
   576  	l.closed.Store(true)
   577  }
   578  
   579  // Kill does ungraceful shutdown.
   580  func (l *LightningLoader) Kill() {
   581  	// TODO: implement kill
   582  	l.Close()
   583  }
   584  
   585  // Pause pauses the process, and it can be resumed later
   586  // should cancel context from external.
   587  func (l *LightningLoader) Pause() {
   588  	l.Lock()
   589  	defer l.Unlock()
   590  	if l.isClosed() {
   591  		l.logger.Warn("try to pause, but already closed")
   592  		return
   593  	}
   594  	if l.cancel != nil {
   595  		l.cancel()
   596  	}
   597  	l.core.Stop()
   598  }
   599  
   600  // Resume resumes the paused process.
   601  func (l *LightningLoader) Resume(ctx context.Context, pr chan pb.ProcessResult) {
   602  	if l.isClosed() {
   603  		l.logger.Warn("try to resume, but already closed")
   604  		return
   605  	}
   606  	l.core = lserver.New(l.lightningGlobalConfig)
   607  	// continue the processing
   608  	l.Process(ctx, pr)
   609  }
   610  
   611  // Update implements Unit.Update
   612  // now, only support to update config for routes, filters, column-mappings, block-allow-list
   613  // now no config diff implemented, so simply re-init use new config
   614  // no binlog filter for loader need to update.
   615  func (l *LightningLoader) Update(ctx context.Context, cfg *config.SubTaskConfig) error {
   616  	l.Lock()
   617  	defer l.Unlock()
   618  	l.cfg.BAList = cfg.BAList
   619  	l.cfg.RouteRules = cfg.RouteRules
   620  	l.cfg.ColumnMappingRules = cfg.ColumnMappingRules
   621  	return nil
   622  }
   623  
   624  func (l *LightningLoader) status() *pb.LoadStatus {
   625  	finished, total := l.core.Status()
   626  	progress := percent(finished, total, l.finish.Load())
   627  	currentSpeed := int64(l.speedRecorder.GetSpeed(float64(finished)))
   628  
   629  	l.logger.Info("progress status of lightning",
   630  		zap.Int64("finished_bytes", finished),
   631  		zap.Int64("total_bytes", total),
   632  		zap.String("progress", progress),
   633  		zap.Int64("current speed (bytes / seconds)", currentSpeed),
   634  	)
   635  	s := &pb.LoadStatus{
   636  		FinishedBytes:  finished,
   637  		TotalBytes:     total,
   638  		Progress:       progress,
   639  		MetaBinlog:     l.metaBinlog.Load(),
   640  		MetaBinlogGTID: l.metaBinlogGTID.Load(),
   641  		Bps:            currentSpeed,
   642  	}
   643  	return s
   644  }
   645  
   646  // Status returns the unit's current status.
   647  func (l *LightningLoader) Status(_ *binlog.SourceStatus) interface{} {
   648  	return l.status()
   649  }
   650  
   651  func connParamFromConfig(config *lcfg.Config) *common.MySQLConnectParam {
   652  	return &common.MySQLConnectParam{
   653  		Host:     config.TiDB.Host,
   654  		Port:     config.TiDB.Port,
   655  		User:     config.TiDB.User,
   656  		Password: config.TiDB.Psw,
   657  		SQLMode:  mysql.DefaultSQLMode,
   658  		// TODO: keep same as Lightning defaultMaxAllowedPacket later
   659  		MaxAllowedPacket:         64 * 1024 * 1024,
   660  		TLSConfig:                config.Security.TLSConfig,
   661  		AllowFallbackToPlaintext: config.Security.AllowFallbackToPlaintext,
   662  	}
   663  }