github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/dbs/dbs.go (about)

     1  // Copyright 2020 The ql Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSES/QL-LICENSE file.
     4  
     5  // Copyright 2020 WHTCORPS INC, Inc.
     6  //
     7  // Licensed under the Apache License, Version 2.0 (the "License");
     8  // you may not use this file except in compliance with the License.
     9  // You may obtain a copy of the License at
    10  //
    11  //     http://www.apache.org/licenses/LICENSE-2.0
    12  //
    13  // Unless required by applicable law or agreed to in writing, software
    14  // distributed under the License is distributed on an "AS IS" BASIS,
    15  // See the License for the specific language governing permissions and
    16  // limitations under the License.
    17  
    18  package dbs
    19  
    20  import (
    21  	"context"
    22  	"fmt"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/google/uuid"
    27  	"github.com/ngaut/pools"
    28  	"github.com/whtcorpsinc/BerolinaSQL/allegrosql"
    29  	"github.com/whtcorpsinc/BerolinaSQL/ast"
    30  	"github.com/whtcorpsinc/BerolinaSQL/perceptron"
    31  	"github.com/whtcorpsinc/errors"
    32  	"github.com/whtcorpsinc/failpoint"
    33  	pumpcli "github.com/whtcorpsinc/milevadb-tools/milevadb-binlog/pump_client"
    34  	"github.com/whtcorpsinc/milevadb/causet"
    35  	"github.com/whtcorpsinc/milevadb/config"
    36  	"github.com/whtcorpsinc/milevadb/dbs/soliton"
    37  	"github.com/whtcorpsinc/milevadb/ekv"
    38  	"github.com/whtcorpsinc/milevadb/metrics"
    39  	"github.com/whtcorpsinc/milevadb/schemareplicant"
    40  	goutil "github.com/whtcorpsinc/milevadb/soliton"
    41  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    42  	"github.com/whtcorpsinc/milevadb/spacetime"
    43  	"github.com/whtcorpsinc/milevadb/stochastikctx"
    44  	"github.com/whtcorpsinc/milevadb/stochastikctx/binloginfo"
    45  	"github.com/whtcorpsinc/milevadb/stochastikctx/variable"
    46  	"github.com/whtcorpsinc/milevadb/tenant"
    47  	"go.uber.org/zap"
    48  )
    49  
    50  const (
    51  	// currentVersion is for all new DBS jobs.
    52  	currentVersion = 1
    53  	// DBSTenantKey is the dbs tenant path that is saved to etcd, and it's exported for testing.
    54  	DBSTenantKey = "/milevadb/dbs/fg/tenant"
    55  	dbsPrompt    = "dbs"
    56  
    57  	shardRowIDBitsMax = 15
    58  
    59  	batchAddingJobs = 10
    60  
    61  	// PartitionCountLimit is limit of the number of partitions in a causet.
    62  	// Reference linking https://dev.allegrosql.com/doc/refman/5.7/en/partitioning-limitations.html.
    63  	PartitionCountLimit = 8192
    64  )
    65  
    66  // OnExist specifies what to do when a new object has a name defCauslision.
    67  type OnExist uint8
    68  
    69  const (
    70  	// OnExistError throws an error on name defCauslision.
    71  	OnExistError OnExist = iota
    72  	// OnExistIgnore skips creating the new object.
    73  	OnExistIgnore
    74  	// OnExistReplace replaces the old object by the new object. This is only
    75  	// supported by VIEWs at the moment. For other object types, this is
    76  	// equivalent to OnExistError.
    77  	OnExistReplace
    78  )
    79  
    80  var (
    81  	// BlockDeferredCausetCountLimit is limit of the number of defCausumns in a causet.
    82  	// It's exported for testing.
    83  	BlockDeferredCausetCountLimit = uint32(512)
    84  	// EnableSplitBlockRegion is a flag to decide whether to split a new region for
    85  	// a newly created causet. It takes effect only if the CausetStorage supports split
    86  	// region.
    87  	EnableSplitBlockRegion = uint32(0)
    88  )
    89  
    90  // DBS is responsible for uFIDelating schemaReplicant in data causetstore and maintaining in-memory SchemaReplicant cache.
    91  type DBS interface {
    92  	CreateSchema(ctx stochastikctx.Context, name perceptron.CIStr, charsetInfo *ast.CharsetOpt) error
    93  	AlterSchema(ctx stochastikctx.Context, stmt *ast.AlterDatabaseStmt) error
    94  	DropSchema(ctx stochastikctx.Context, schemaReplicant perceptron.CIStr) error
    95  	CreateBlock(ctx stochastikctx.Context, stmt *ast.CreateBlockStmt) error
    96  	CreateView(ctx stochastikctx.Context, stmt *ast.CreateViewStmt) error
    97  	DropBlock(ctx stochastikctx.Context, blockIdent ast.Ident) (err error)
    98  	RecoverBlock(ctx stochastikctx.Context, recoverInfo *RecoverInfo) (err error)
    99  	DropView(ctx stochastikctx.Context, blockIdent ast.Ident) (err error)
   100  	CreateIndex(ctx stochastikctx.Context, blockIdent ast.Ident, keyType ast.IndexKeyType, indexName perceptron.CIStr,
   101  		defCausumnNames []*ast.IndexPartSpecification, indexOption *ast.IndexOption, ifNotExists bool) error
   102  	DropIndex(ctx stochastikctx.Context, blockIdent ast.Ident, indexName perceptron.CIStr, ifExists bool) error
   103  	AlterBlock(ctx stochastikctx.Context, blockIdent ast.Ident, spec []*ast.AlterBlockSpec) error
   104  	TruncateBlock(ctx stochastikctx.Context, blockIdent ast.Ident) error
   105  	RenameBlock(ctx stochastikctx.Context, oldBlockIdent, newBlockIdent ast.Ident, isAlterBlock bool) error
   106  	LockBlocks(ctx stochastikctx.Context, stmt *ast.LockBlocksStmt) error
   107  	UnlockBlocks(ctx stochastikctx.Context, lockedBlocks []perceptron.BlockLockTpInfo) error
   108  	CleanupBlockLock(ctx stochastikctx.Context, blocks []*ast.BlockName) error
   109  	UFIDelateBlockReplicaInfo(ctx stochastikctx.Context, physicalID int64, available bool) error
   110  	RepairBlock(ctx stochastikctx.Context, causet *ast.BlockName, createStmt *ast.CreateBlockStmt) error
   111  	CreateSequence(ctx stochastikctx.Context, stmt *ast.CreateSequenceStmt) error
   112  	DropSequence(ctx stochastikctx.Context, blockIdent ast.Ident, ifExists bool) (err error)
   113  
   114  	// CreateSchemaWithInfo creates a database (schemaReplicant) given its database info.
   115  	//
   116  	// If `tryRetainID` is true, this method will try to keep the database ID specified in
   117  	// the `info` rather than generating new ones. This is just a hint though, if the ID defCauslides
   118  	// with an existing database a new ID will always be used.
   119  	//
   120  	// WARNING: the DBS owns the `info` after calling this function, and will modify its fields
   121  	// in-place. If you want to keep using `info`, please call Clone() first.
   122  	CreateSchemaWithInfo(
   123  		ctx stochastikctx.Context,
   124  		info *perceptron.DBInfo,
   125  		onExist OnExist,
   126  		tryRetainID bool) error
   127  
   128  	// CreateBlockWithInfo creates a causet, view or sequence given its causet info.
   129  	//
   130  	// If `tryRetainID` is true, this method will try to keep the causet ID specified in the `info`
   131  	// rather than generating new ones. This is just a hint though, if the ID defCauslides with an
   132  	// existing causet a new ID will always be used.
   133  	//
   134  	// WARNING: the DBS owns the `info` after calling this function, and will modify its fields
   135  	// in-place. If you want to keep using `info`, please call Clone() first.
   136  	CreateBlockWithInfo(
   137  		ctx stochastikctx.Context,
   138  		schemaReplicant perceptron.CIStr,
   139  		info *perceptron.BlockInfo,
   140  		onExist OnExist,
   141  		tryRetainID bool) error
   142  
   143  	// Start campaigns the tenant and starts workers.
   144  	// ctxPool is used for the worker's delRangeManager and creates stochastik.
   145  	Start(ctxPool *pools.ResourcePool) error
   146  	// GetLease returns current schemaReplicant lease time.
   147  	GetLease() time.Duration
   148  	// Stats returns the DBS statistics.
   149  	Stats(vars *variable.StochastikVars) (map[string]interface{}, error)
   150  	// GetScope gets the status variables scope.
   151  	GetScope(status string) variable.ScopeFlag
   152  	// Stop stops DBS worker.
   153  	Stop() error
   154  	// RegisterEventCh registers event channel for dbs.
   155  	RegisterEventCh(chan<- *soliton.Event)
   156  	// SchemaSyncer gets the schemaReplicant syncer.
   157  	SchemaSyncer() soliton.SchemaSyncer
   158  	// TenantManager gets the tenant manager.
   159  	TenantManager() tenant.Manager
   160  	// GetID gets the dbs ID.
   161  	GetID() string
   162  	// GetBlockMaxRowID gets the max event ID of a normal causet or a partition.
   163  	GetBlockMaxHandle(startTS uint64, tbl causet.PhysicalBlock) (ekv.Handle, bool, error)
   164  	// SetBinlogClient sets the binlog client for DBS worker. It's exported for testing.
   165  	SetBinlogClient(*pumpcli.PumpsClient)
   166  	// GetHook gets the hook. It's exported for testing.
   167  	GetHook() Callback
   168  }
   169  
   170  type limitJobTask struct {
   171  	job *perceptron.Job
   172  	err chan error
   173  }
   174  
   175  // dbs is used to handle the memexs that define the structure or schemaReplicant of the database.
   176  type dbs struct {
   177  	m          sync.RWMutex
   178  	ctx        context.Context
   179  	cancel     context.CancelFunc
   180  	wg         sync.WaitGroup // It's only used to deal with data race in restart_test.
   181  	limitJobCh chan *limitJobTask
   182  
   183  	*dbsCtx
   184  	workers     map[workerType]*worker
   185  	sessPool    *stochastikPool
   186  	delRangeMgr delRangeManager
   187  }
   188  
   189  // dbsCtx is the context when we use worker to handle DBS jobs.
   190  type dbsCtx struct {
   191  	uuid          string
   192  	causetstore   ekv.CausetStorage
   193  	tenantManager tenant.Manager
   194  	schemaSyncer  soliton.SchemaSyncer
   195  	dbsJobDoneCh  chan struct{}
   196  	dbsEventCh    chan<- *soliton.Event
   197  	lease         time.Duration        // lease is schemaReplicant lease.
   198  	binlogCli     *pumpcli.PumpsClient // binlogCli is used for Binlog.
   199  	infoHandle    *schemareplicant.Handle
   200  	blockLockCkr  soliton.DeadBlockLockChecker
   201  
   202  	// hook may be modified.
   203  	mu struct {
   204  		sync.RWMutex
   205  		hook        Callback
   206  		interceptor Interceptor
   207  	}
   208  }
   209  
   210  func (dc *dbsCtx) isTenant() bool {
   211  	isTenant := dc.tenantManager.IsTenant()
   212  	logutil.BgLogger().Debug("[dbs] check whether is the DBS tenant", zap.Bool("isTenant", isTenant), zap.String("selfID", dc.uuid))
   213  	if isTenant {
   214  		metrics.DBSCounter.WithLabelValues(metrics.DBSTenant + "_" + allegrosql.MilevaDBReleaseVersion).Inc()
   215  	}
   216  	return isTenant
   217  }
   218  
   219  // RegisterEventCh registers passed channel for dbs Event.
   220  func (d *dbs) RegisterEventCh(ch chan<- *soliton.Event) {
   221  	d.dbsEventCh = ch
   222  }
   223  
   224  // asyncNotifyEvent will notify the dbs event to outside world, say statistic handle. When the channel is full, we may
   225  // give up notify and log it.
   226  func asyncNotifyEvent(d *dbsCtx, e *soliton.Event) {
   227  	if d.dbsEventCh != nil {
   228  		if d.lease == 0 {
   229  			// If lease is 0, it's always used in test.
   230  			select {
   231  			case d.dbsEventCh <- e:
   232  			default:
   233  			}
   234  			return
   235  		}
   236  		for i := 0; i < 10; i++ {
   237  			select {
   238  			case d.dbsEventCh <- e:
   239  				return
   240  			default:
   241  				logutil.BgLogger().Warn("[dbs] fail to notify DBS event", zap.String("event", e.String()))
   242  				time.Sleep(time.Microsecond * 10)
   243  			}
   244  		}
   245  	}
   246  }
   247  
   248  // NewDBS creates a new DBS.
   249  func NewDBS(ctx context.Context, options ...Option) DBS {
   250  	return newDBS(ctx, options...)
   251  }
   252  
   253  func newDBS(ctx context.Context, options ...Option) *dbs {
   254  	opt := &Options{
   255  		Hook: &BaseCallback{},
   256  	}
   257  	for _, o := range options {
   258  		o(opt)
   259  	}
   260  
   261  	id := uuid.New().String()
   262  	var manager tenant.Manager
   263  	var syncer soliton.SchemaSyncer
   264  	var deadLockCkr soliton.DeadBlockLockChecker
   265  	if etcdCli := opt.EtcdCli; etcdCli == nil {
   266  		// The etcdCli is nil if the causetstore is localstore which is only used for testing.
   267  		// So we use mockTenantManager and MockSchemaSyncer.
   268  		manager = tenant.NewMockManager(ctx, id)
   269  		syncer = NewMockSchemaSyncer()
   270  	} else {
   271  		manager = tenant.NewTenantManager(ctx, etcdCli, dbsPrompt, id, DBSTenantKey)
   272  		syncer = soliton.NewSchemaSyncer(ctx, etcdCli, id, manager)
   273  		deadLockCkr = soliton.NewDeadBlockLockChecker(etcdCli)
   274  	}
   275  
   276  	dbsCtx := &dbsCtx{
   277  		uuid:          id,
   278  		causetstore:   opt.CausetStore,
   279  		lease:         opt.Lease,
   280  		dbsJobDoneCh:  make(chan struct{}, 1),
   281  		tenantManager: manager,
   282  		schemaSyncer:  syncer,
   283  		binlogCli:     binloginfo.GetPumpsClient(),
   284  		infoHandle:    opt.InfoHandle,
   285  		blockLockCkr:  deadLockCkr,
   286  	}
   287  	dbsCtx.mu.hook = opt.Hook
   288  	dbsCtx.mu.interceptor = &BaseInterceptor{}
   289  	d := &dbs{
   290  		ctx:        ctx,
   291  		dbsCtx:     dbsCtx,
   292  		limitJobCh: make(chan *limitJobTask, batchAddingJobs),
   293  	}
   294  
   295  	return d
   296  }
   297  
   298  // Stop implements DBS.Stop interface.
   299  func (d *dbs) Stop() error {
   300  	d.m.Lock()
   301  	defer d.m.Unlock()
   302  
   303  	d.close()
   304  	logutil.BgLogger().Info("[dbs] stop DBS", zap.String("ID", d.uuid))
   305  	return nil
   306  }
   307  
   308  func (d *dbs) newDeleteRangeManager(mock bool) delRangeManager {
   309  	var delRangeMgr delRangeManager
   310  	if !mock {
   311  		delRangeMgr = newDelRangeManager(d.causetstore, d.sessPool)
   312  		logutil.BgLogger().Info("[dbs] start delRangeManager OK", zap.Bool("is a emulator", !d.causetstore.SupportDeleteRange()))
   313  	} else {
   314  		delRangeMgr = newMockDelRangeManager()
   315  	}
   316  
   317  	delRangeMgr.start()
   318  	return delRangeMgr
   319  }
   320  
   321  // Start implements DBS.Start interface.
   322  func (d *dbs) Start(ctxPool *pools.ResourcePool) error {
   323  	logutil.BgLogger().Info("[dbs] start DBS", zap.String("ID", d.uuid), zap.Bool("runWorker", RunWorker))
   324  	d.ctx, d.cancel = context.WithCancel(d.ctx)
   325  
   326  	d.wg.Add(1)
   327  	go d.limitDBSJobs()
   328  
   329  	// If RunWorker is true, we need campaign tenant and do DBS job.
   330  	// Otherwise, we needn't do that.
   331  	if RunWorker {
   332  		err := d.tenantManager.CampaignTenant()
   333  		if err != nil {
   334  			return errors.Trace(err)
   335  		}
   336  
   337  		d.workers = make(map[workerType]*worker, 2)
   338  		d.sessPool = newStochastikPool(ctxPool)
   339  		d.delRangeMgr = d.newDeleteRangeManager(ctxPool == nil)
   340  		d.workers[generalWorker] = newWorker(d.ctx, generalWorker, d.sessPool, d.delRangeMgr)
   341  		d.workers[addIdxWorker] = newWorker(d.ctx, addIdxWorker, d.sessPool, d.delRangeMgr)
   342  		for _, worker := range d.workers {
   343  			worker.wg.Add(1)
   344  			w := worker
   345  			go w.start(d.dbsCtx)
   346  
   347  			metrics.DBSCounter.WithLabelValues(fmt.Sprintf("%s_%s", metrics.CreateDBS, worker.String())).Inc()
   348  
   349  			// When the start function is called, we will send a fake job to let worker
   350  			// checks tenant firstly and try to find whether a job exists and run.
   351  			asyncNotify(worker.dbsJobCh)
   352  		}
   353  
   354  		go d.schemaSyncer.StartCleanWork()
   355  		if config.BlockLockEnabled() {
   356  			d.wg.Add(1)
   357  			go d.startCleanDeadBlockLock()
   358  		}
   359  		metrics.DBSCounter.WithLabelValues(metrics.StartCleanWork).Inc()
   360  	}
   361  
   362  	variable.RegisterStatistics(d)
   363  
   364  	metrics.DBSCounter.WithLabelValues(metrics.CreateDBSInstance).Inc()
   365  	return nil
   366  }
   367  
   368  func (d *dbs) close() {
   369  	if isChanClosed(d.ctx.Done()) {
   370  		return
   371  	}
   372  
   373  	startTime := time.Now()
   374  	d.cancel()
   375  	d.wg.Wait()
   376  	d.tenantManager.Cancel()
   377  	d.schemaSyncer.Close()
   378  
   379  	for _, worker := range d.workers {
   380  		worker.close()
   381  	}
   382  	// d.delRangeMgr using stochastik from d.sessPool.
   383  	// Put it before d.sessPool.close to reduce the time spent by d.sessPool.close.
   384  	if d.delRangeMgr != nil {
   385  		d.delRangeMgr.clear()
   386  	}
   387  	if d.sessPool != nil {
   388  		d.sessPool.close()
   389  	}
   390  
   391  	logutil.BgLogger().Info("[dbs] DBS closed", zap.String("ID", d.uuid), zap.Duration("take time", time.Since(startTime)))
   392  }
   393  
   394  // GetLease implements DBS.GetLease interface.
   395  func (d *dbs) GetLease() time.Duration {
   396  	d.m.RLock()
   397  	lease := d.lease
   398  	d.m.RUnlock()
   399  	return lease
   400  }
   401  
   402  // GetSchemaReplicantWithInterceptor gets the schemareplicant binding to d. It's exported for testing.
   403  // Please don't use this function, it is used by TestParallelDBSBeforeRunDBSJob to intercept the calling of d.infoHandle.Get(), use d.infoHandle.Get() instead.
   404  // Otherwise, the TestParallelDBSBeforeRunDBSJob will hang up forever.
   405  func (d *dbs) GetSchemaReplicantWithInterceptor(ctx stochastikctx.Context) schemareplicant.SchemaReplicant {
   406  	is := d.infoHandle.Get()
   407  
   408  	d.mu.RLock()
   409  	defer d.mu.RUnlock()
   410  	return d.mu.interceptor.OnGetSchemaReplicant(ctx, is)
   411  }
   412  
   413  func (d *dbs) genGlobalIDs(count int) ([]int64, error) {
   414  	var ret []int64
   415  	err := ekv.RunInNewTxn(d.causetstore, true, func(txn ekv.Transaction) error {
   416  		failpoint.Inject("mockGenGlobalIDFail", func(val failpoint.Value) {
   417  			if val.(bool) {
   418  				failpoint.Return(errors.New("gofail genGlobalIDs error"))
   419  			}
   420  		})
   421  
   422  		m := spacetime.NewMeta(txn)
   423  		var err error
   424  		ret, err = m.GenGlobalIDs(count)
   425  		return err
   426  	})
   427  
   428  	return ret, err
   429  }
   430  
   431  // SchemaSyncer implements DBS.SchemaSyncer interface.
   432  func (d *dbs) SchemaSyncer() soliton.SchemaSyncer {
   433  	return d.schemaSyncer
   434  }
   435  
   436  // TenantManager implements DBS.TenantManager interface.
   437  func (d *dbs) TenantManager() tenant.Manager {
   438  	return d.tenantManager
   439  }
   440  
   441  // GetID implements DBS.GetID interface.
   442  func (d *dbs) GetID() string {
   443  	return d.uuid
   444  }
   445  
   446  func checkJobMaxInterval(job *perceptron.Job) time.Duration {
   447  	// The job of adding index takes more time to process.
   448  	// So it uses the longer time.
   449  	if job.Type == perceptron.CausetActionAddIndex || job.Type == perceptron.CausetActionAddPrimaryKey {
   450  		return 3 * time.Second
   451  	}
   452  	if job.Type == perceptron.CausetActionCreateBlock || job.Type == perceptron.CausetActionCreateSchema {
   453  		return 500 * time.Millisecond
   454  	}
   455  	return 1 * time.Second
   456  }
   457  
   458  func (d *dbs) asyncNotifyWorker(jobTp perceptron.CausetActionType) {
   459  	// If the workers don't run, we needn't to notify workers.
   460  	if !RunWorker {
   461  		return
   462  	}
   463  
   464  	if jobTp == perceptron.CausetActionAddIndex || jobTp == perceptron.CausetActionAddPrimaryKey {
   465  		asyncNotify(d.workers[addIdxWorker].dbsJobCh)
   466  	} else {
   467  		asyncNotify(d.workers[generalWorker].dbsJobCh)
   468  	}
   469  }
   470  
   471  func (d *dbs) doDBSJob(ctx stochastikctx.Context, job *perceptron.Job) error {
   472  	if isChanClosed(d.ctx.Done()) {
   473  		return d.ctx.Err()
   474  	}
   475  
   476  	// Get a global job ID and put the DBS job in the queue.
   477  	job.Query, _ = ctx.Value(stochastikctx.QueryString).(string)
   478  	task := &limitJobTask{job, make(chan error)}
   479  	d.limitJobCh <- task
   480  	err := <-task.err
   481  
   482  	ctx.GetStochastikVars().StmtCtx.IsDBSJobInQueue = true
   483  
   484  	// Notice worker that we push a new job and wait the job done.
   485  	d.asyncNotifyWorker(job.Type)
   486  	logutil.BgLogger().Info("[dbs] start DBS job", zap.String("job", job.String()), zap.String("query", job.Query))
   487  
   488  	var historyJob *perceptron.Job
   489  	jobID := job.ID
   490  	// For a job from start to end, the state of it will be none -> delete only -> write only -> reorganization -> public
   491  	// For every state changes, we will wait as lease 2 * lease time, so here the ticker check is 10 * lease.
   492  	// But we use etcd to speed up, normally it takes less than 0.5s now, so we use 0.5s or 1s or 3s as the max value.
   493  	ticker := time.NewTicker(chooseLeaseTime(10*d.lease, checkJobMaxInterval(job)))
   494  	startTime := time.Now()
   495  	metrics.JobsGauge.WithLabelValues(job.Type.String()).Inc()
   496  	defer func() {
   497  		ticker.Stop()
   498  		metrics.JobsGauge.WithLabelValues(job.Type.String()).Dec()
   499  		metrics.HandleJobHistogram.WithLabelValues(job.Type.String(), metrics.RetLabel(err)).Observe(time.Since(startTime).Seconds())
   500  	}()
   501  	for {
   502  		failpoint.Inject("storeCloseInLoop", func(_ failpoint.Value) {
   503  			d.cancel()
   504  		})
   505  
   506  		select {
   507  		case <-d.dbsJobDoneCh:
   508  		case <-ticker.C:
   509  		case <-d.ctx.Done():
   510  			logutil.BgLogger().Error("[dbs] doDBSJob will quit because context done", zap.Error(d.ctx.Err()))
   511  			err := d.ctx.Err()
   512  			return err
   513  		}
   514  
   515  		historyJob, err = d.getHistoryDBSJob(jobID)
   516  		if err != nil {
   517  			logutil.BgLogger().Error("[dbs] get history DBS job failed, check again", zap.Error(err))
   518  			continue
   519  		} else if historyJob == nil {
   520  			logutil.BgLogger().Debug("[dbs] DBS job is not in history, maybe not run", zap.Int64("jobID", jobID))
   521  			continue
   522  		}
   523  
   524  		// If a job is a history job, the state must be JobStateSynced or JobStateRollbackDone or JobStateCancelled.
   525  		if historyJob.IsSynced() {
   526  			logutil.BgLogger().Info("[dbs] DBS job is finished", zap.Int64("jobID", jobID))
   527  			return nil
   528  		}
   529  
   530  		if historyJob.Error != nil {
   531  			return errors.Trace(historyJob.Error)
   532  		}
   533  		// Only for JobStateCancelled job which is adding defCausumns or drop defCausumns.
   534  		if historyJob.IsCancelled() && (historyJob.Type == perceptron.CausetActionAddDeferredCausets || historyJob.Type == perceptron.CausetActionDropDeferredCausets) {
   535  			logutil.BgLogger().Info("[dbs] DBS job is cancelled", zap.Int64("jobID", jobID))
   536  			return nil
   537  		}
   538  		panic("When the state is JobStateRollbackDone or JobStateCancelled, historyJob.Error should never be nil")
   539  	}
   540  }
   541  
   542  func (d *dbs) callHookOnChanged(err error) error {
   543  	d.mu.RLock()
   544  	defer d.mu.RUnlock()
   545  
   546  	err = d.mu.hook.OnChanged(err)
   547  	return errors.Trace(err)
   548  }
   549  
   550  // SetBinlogClient implements DBS.SetBinlogClient interface.
   551  func (d *dbs) SetBinlogClient(binlogCli *pumpcli.PumpsClient) {
   552  	d.binlogCli = binlogCli
   553  }
   554  
   555  // GetHook implements DBS.GetHook interface.
   556  func (d *dbs) GetHook() Callback {
   557  	d.mu.Lock()
   558  	defer d.mu.Unlock()
   559  
   560  	return d.mu.hook
   561  }
   562  
   563  func (d *dbs) startCleanDeadBlockLock() {
   564  	defer func() {
   565  		goutil.Recover(metrics.LabelDBS, "startCleanDeadBlockLock", nil, false)
   566  		d.wg.Done()
   567  	}()
   568  
   569  	ticker := time.NewTicker(time.Second * 10)
   570  	defer ticker.Stop()
   571  	for {
   572  		select {
   573  		case <-ticker.C:
   574  			if !d.tenantManager.IsTenant() {
   575  				continue
   576  			}
   577  			deadLockBlocks, err := d.blockLockCkr.GetDeadLockedBlocks(d.ctx, d.infoHandle.Get().AllSchemas())
   578  			if err != nil {
   579  				logutil.BgLogger().Info("[dbs] get dead causet dagger failed.", zap.Error(err))
   580  				continue
   581  			}
   582  			for se, blocks := range deadLockBlocks {
   583  				err := d.CleanDeadBlockLock(blocks, se)
   584  				if err != nil {
   585  					logutil.BgLogger().Info("[dbs] clean dead causet dagger failed.", zap.Error(err))
   586  				}
   587  			}
   588  		case <-d.ctx.Done():
   589  			return
   590  		}
   591  	}
   592  }
   593  
   594  // RecoverInfo contains information needed by DBS.RecoverBlock.
   595  type RecoverInfo struct {
   596  	SchemaID      int64
   597  	BlockInfo     *perceptron.BlockInfo
   598  	DropJobID     int64
   599  	SnapshotTS    uint64
   600  	CurAutoIncID  int64
   601  	CurAutoRandID int64
   602  }