github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/dbs/backfilling.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package dbs
    15  
    16  import (
    17  	"context"
    18  	"math"
    19  	"strconv"
    20  	"sync/atomic"
    21  	"time"
    22  
    23  	"github.com/whtcorpsinc/BerolinaSQL/perceptron"
    24  	"github.com/whtcorpsinc/errors"
    25  	"github.com/whtcorpsinc/failpoint"
    26  	"github.com/whtcorpsinc/milevadb/blockcodec"
    27  	"github.com/whtcorpsinc/milevadb/causet"
    28  	"github.com/whtcorpsinc/milevadb/causetstore/einsteindb"
    29  	dbsutil "github.com/whtcorpsinc/milevadb/dbs/soliton"
    30  	"github.com/whtcorpsinc/milevadb/ekv"
    31  	"github.com/whtcorpsinc/milevadb/memex"
    32  	"github.com/whtcorpsinc/milevadb/metrics"
    33  	"github.com/whtcorpsinc/milevadb/soliton"
    34  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    35  	causetDecoder "github.com/whtcorpsinc/milevadb/soliton/rowCausetDecoder"
    36  	"github.com/whtcorpsinc/milevadb/stochastikctx"
    37  	"github.com/whtcorpsinc/milevadb/stochastikctx/variable"
    38  	"go.uber.org/zap"
    39  )
    40  
    41  type backfillWorkerType byte
    42  
    43  const (
    44  	typeAddIndexWorker                backfillWorkerType = 0
    45  	typeUFIDelateDeferredCausetWorker backfillWorkerType = 1
    46  )
    47  
    48  func (bWT backfillWorkerType) String() string {
    49  	switch bWT {
    50  	case typeAddIndexWorker:
    51  		return "add index"
    52  	case typeUFIDelateDeferredCausetWorker:
    53  		return "uFIDelate column"
    54  	default:
    55  		return "unknown"
    56  	}
    57  }
    58  
    59  type backfiller interface {
    60  	BackfillDataInTxn(handleRange reorgBackfillTask) (taskCtx backfillTaskContext, errInTxn error)
    61  	AddMetricInfo(float64)
    62  }
    63  
    64  type backfillResult struct {
    65  	addedCount int
    66  	scanCount  int
    67  	nextHandle ekv.Handle
    68  	err        error
    69  }
    70  
    71  // backfillTaskContext is the context of the batch adding indices or uFIDelating column values.
    72  // After finishing the batch adding indices or uFIDelating column values, result in backfillTaskContext will be merged into backfillResult.
    73  type backfillTaskContext struct {
    74  	nextHandle ekv.Handle
    75  	done       bool
    76  	addedCount int
    77  	scanCount  int
    78  }
    79  
    80  type backfillWorker struct {
    81  	id        int
    82  	dbsWorker *worker
    83  	batchCnt  int
    84  	sessCtx   stochastikctx.Context
    85  	taskCh    chan *reorgBackfillTask
    86  	resultCh  chan *backfillResult
    87  	causet    causet.Block
    88  	closed    bool
    89  	priority  int
    90  }
    91  
    92  func newBackfillWorker(sessCtx stochastikctx.Context, worker *worker, id int, t causet.PhysicalTable) *backfillWorker {
    93  	return &backfillWorker{
    94  		id:        id,
    95  		causet:    t,
    96  		dbsWorker: worker,
    97  		batchCnt:  int(variable.GetDBSReorgBatchSize()),
    98  		sessCtx:   sessCtx,
    99  		taskCh:    make(chan *reorgBackfillTask, 1),
   100  		resultCh:  make(chan *backfillResult, 1),
   101  		priority:  ekv.PriorityLow,
   102  	}
   103  }
   104  
   105  func (w *backfillWorker) Close() {
   106  	if !w.closed {
   107  		w.closed = true
   108  		close(w.taskCh)
   109  	}
   110  }
   111  
   112  func closeBackfillWorkers(workers []*backfillWorker) {
   113  	for _, worker := range workers {
   114  		worker.Close()
   115  	}
   116  }
   117  
   118  type reorgBackfillTask struct {
   119  	physicalTableID int64
   120  	startHandle     ekv.Handle
   121  	endHandle       ekv.Handle
   122  	// endIncluded indicates whether the range include the endHandle.
   123  	// When the last handle is math.MaxInt64, set endIncluded to true to
   124  	// tell worker backfilling index of endHandle.
   125  	endIncluded bool
   126  }
   127  
   128  func (r *reorgBackfillTask) String() string {
   129  	rightParenthesis := ")"
   130  	if r.endIncluded {
   131  		rightParenthesis = "]"
   132  	}
   133  	return "physicalTableID" + strconv.FormatInt(r.physicalTableID, 10) + "_" + "[" + r.startHandle.String() + "," + r.endHandle.String() + rightParenthesis
   134  }
   135  
   136  func logSlowOperations(elapsed time.Duration, slowMsg string, threshold uint32) {
   137  	if threshold == 0 {
   138  		threshold = atomic.LoadUint32(&variable.DBSSlowOprThreshold)
   139  	}
   140  
   141  	if elapsed >= time.Duration(threshold)*time.Millisecond {
   142  		logutil.BgLogger().Info("[dbs] slow operations", zap.Duration("takeTimes", elapsed), zap.String("msg", slowMsg))
   143  	}
   144  }
   145  
   146  // mergeBackfillCtxToResult merge partial result in taskCtx into result.
   147  func mergeBackfillCtxToResult(taskCtx *backfillTaskContext, result *backfillResult) {
   148  	result.nextHandle = taskCtx.nextHandle
   149  	result.addedCount += taskCtx.addedCount
   150  	result.scanCount += taskCtx.scanCount
   151  }
   152  
   153  // handleBackfillTask backfills range [task.startHandle, task.endHandle) handle's index to causet.
   154  func (w *backfillWorker) handleBackfillTask(d *dbsCtx, task *reorgBackfillTask, bf backfiller) *backfillResult {
   155  	handleRange := *task
   156  	result := &backfillResult{addedCount: 0, nextHandle: handleRange.startHandle, err: nil}
   157  	lastLogCount := 0
   158  	lastLogTime := time.Now()
   159  	startTime := lastLogTime
   160  
   161  	for {
   162  		// Give job chance to be canceled, if we not check it here,
   163  		// if there is panic in bf.BackfillDataInTxn we will never cancel the job.
   164  		// Because reorgRecordTask may run a long time,
   165  		// we should check whether this dbs job is still runnable.
   166  		err := w.dbsWorker.isReorgRunnable(d)
   167  		if err != nil {
   168  			result.err = err
   169  			return result
   170  		}
   171  
   172  		taskCtx, err := bf.BackfillDataInTxn(handleRange)
   173  		if err != nil {
   174  			result.err = err
   175  			return result
   176  		}
   177  
   178  		bf.AddMetricInfo(float64(taskCtx.addedCount))
   179  		mergeBackfillCtxToResult(&taskCtx, result)
   180  		w.dbsWorker.reorgCtx.increaseRowCount(int64(taskCtx.addedCount))
   181  
   182  		if num := result.scanCount - lastLogCount; num >= 30000 {
   183  			lastLogCount = result.scanCount
   184  			logutil.BgLogger().Info("[dbs] backfill worker back fill index", zap.Int("workerID", w.id), zap.Int("addedCount", result.addedCount),
   185  				zap.Int("scanCount", result.scanCount), zap.String("nextHandle", toString(taskCtx.nextHandle)), zap.Float64("speed(rows/s)", float64(num)/time.Since(lastLogTime).Seconds()))
   186  			lastLogTime = time.Now()
   187  		}
   188  
   189  		handleRange.startHandle = taskCtx.nextHandle
   190  		if taskCtx.done {
   191  			break
   192  		}
   193  	}
   194  	logutil.BgLogger().Info("[dbs] backfill worker finish task", zap.Int("workerID", w.id),
   195  		zap.String("task", task.String()), zap.Int("addedCount", result.addedCount),
   196  		zap.Int("scanCount", result.scanCount), zap.String("nextHandle", toString(result.nextHandle)),
   197  		zap.String("takeTime", time.Since(startTime).String()))
   198  	return result
   199  }
   200  
   201  func (w *backfillWorker) run(d *dbsCtx, bf backfiller) {
   202  	logutil.BgLogger().Info("[dbs] backfill worker start", zap.Int("workerID", w.id))
   203  	defer func() {
   204  		w.resultCh <- &backfillResult{err: errReorgPanic}
   205  	}()
   206  	defer soliton.Recover(metrics.LabelDBS, "backfillWorker.run", nil, false)
   207  	for {
   208  		task, more := <-w.taskCh
   209  		if !more {
   210  			break
   211  		}
   212  
   213  		logutil.BgLogger().Debug("[dbs] backfill worker got task", zap.Int("workerID", w.id), zap.String("task", task.String()))
   214  		failpoint.Inject("mockBackfillRunErr", func() {
   215  			if w.id == 0 {
   216  				result := &backfillResult{addedCount: 0, nextHandle: nil, err: errors.Errorf("mock backfill error")}
   217  				w.resultCh <- result
   218  				failpoint.Continue()
   219  			}
   220  		})
   221  
   222  		// Dynamic change batch size.
   223  		w.batchCnt = int(variable.GetDBSReorgBatchSize())
   224  		result := w.handleBackfillTask(d, task, bf)
   225  		w.resultCh <- result
   226  	}
   227  	logutil.BgLogger().Info("[dbs] backfill worker exit", zap.Int("workerID", w.id))
   228  }
   229  
   230  // splitTableRanges uses FIDel region's key ranges to split the backfilling causet key range space,
   231  // to speed up backfilling data in causet with disperse handle.
   232  // The `t` should be a non-partitioned causet or a partition.
   233  func splitTableRanges(t causet.PhysicalTable, causetstore ekv.CausetStorage, startHandle, endHandle ekv.Handle) ([]ekv.KeyRange, error) {
   234  	startRecordKey := t.RecordKey(startHandle)
   235  	endRecordKey := t.RecordKey(endHandle)
   236  
   237  	logutil.BgLogger().Info("[dbs] split causet range from FIDel", zap.Int64("physicalTableID", t.GetPhysicalID()),
   238  		zap.String("startHandle", toString(startHandle)), zap.String("endHandle", toString(endHandle)))
   239  	ekvRange := ekv.KeyRange{StartKey: startRecordKey, EndKey: endRecordKey}
   240  	s, ok := causetstore.(einsteindb.CausetStorage)
   241  	if !ok {
   242  		// Only support split ranges in einsteindb.CausetStorage now.
   243  		return []ekv.KeyRange{ekvRange}, nil
   244  	}
   245  
   246  	maxSleep := 10000 // ms
   247  	bo := einsteindb.NewBackofferWithVars(context.Background(), maxSleep, nil)
   248  	ranges, err := einsteindb.SplitRegionRanges(bo, s.GetRegionCache(), []ekv.KeyRange{ekvRange})
   249  	if err != nil {
   250  		return nil, errors.Trace(err)
   251  	}
   252  	if len(ranges) == 0 {
   253  		return nil, errors.Trace(errInvalidSplitRegionRanges)
   254  	}
   255  	return ranges, nil
   256  }
   257  
   258  func (w *worker) waitTaskResults(workers []*backfillWorker, taskCnt int, totalAddedCount *int64, startHandle ekv.Handle) (ekv.Handle, int64, error) {
   259  	var (
   260  		addedCount int64
   261  		nextHandle = startHandle
   262  		firstErr   error
   263  	)
   264  	for i := 0; i < taskCnt; i++ {
   265  		worker := workers[i]
   266  		result := <-worker.resultCh
   267  		if firstErr == nil && result.err != nil {
   268  			firstErr = result.err
   269  			// We should wait all working workers exits, any way.
   270  			continue
   271  		}
   272  
   273  		if result.err != nil {
   274  			logutil.BgLogger().Warn("[dbs] backfill worker failed", zap.Int("workerID", worker.id),
   275  				zap.Error(result.err))
   276  		}
   277  
   278  		if firstErr == nil {
   279  			*totalAddedCount += int64(result.addedCount)
   280  			addedCount += int64(result.addedCount)
   281  			nextHandle = result.nextHandle
   282  		}
   283  	}
   284  
   285  	return nextHandle, addedCount, errors.Trace(firstErr)
   286  }
   287  
   288  // handleReorgTasks sends tasks to workers, and waits for all the running workers to return results,
   289  // there are taskCnt running workers.
   290  func (w *worker) handleReorgTasks(reorgInfo *reorgInfo, totalAddedCount *int64, workers []*backfillWorker, batchTasks []*reorgBackfillTask) error {
   291  	for i, task := range batchTasks {
   292  		workers[i].taskCh <- task
   293  	}
   294  
   295  	startHandle := batchTasks[0].startHandle
   296  	taskCnt := len(batchTasks)
   297  	startTime := time.Now()
   298  	nextHandle, taskAddedCount, err := w.waitTaskResults(workers, taskCnt, totalAddedCount, startHandle)
   299  	elapsedTime := time.Since(startTime)
   300  	if err == nil {
   301  		err = w.isReorgRunnable(reorgInfo.d)
   302  	}
   303  
   304  	if err != nil {
   305  		// UFIDelate the reorg handle that has been processed.
   306  		err1 := ekv.RunInNewTxn(reorgInfo.d.causetstore, true, func(txn ekv.Transaction) error {
   307  			return errors.Trace(reorgInfo.UFIDelateReorgMeta(txn, nextHandle, reorgInfo.EndHandle, reorgInfo.PhysicalTableID))
   308  		})
   309  		metrics.BatchAddIdxHistogram.WithLabelValues(metrics.LblError).Observe(elapsedTime.Seconds())
   310  		logutil.BgLogger().Warn("[dbs] backfill worker handle batch tasks failed",
   311  			zap.Int64("totalAddedCount", *totalAddedCount), zap.String("startHandle", toString(startHandle)),
   312  			zap.String("nextHandle", toString(nextHandle)), zap.Int64("batchAddedCount", taskAddedCount),
   313  			zap.String("taskFailedError", err.Error()), zap.String("takeTime", elapsedTime.String()),
   314  			zap.NamedError("uFIDelateHandleError", err1))
   315  		return errors.Trace(err)
   316  	}
   317  
   318  	// nextHandle will be uFIDelated periodically in runReorgJob, so no need to uFIDelate it here.
   319  	w.reorgCtx.setNextHandle(nextHandle)
   320  	metrics.BatchAddIdxHistogram.WithLabelValues(metrics.LblOK).Observe(elapsedTime.Seconds())
   321  	logutil.BgLogger().Info("[dbs] backfill worker handle batch tasks successful", zap.Int64("totalAddedCount", *totalAddedCount), zap.String("startHandle", toString(startHandle)),
   322  		zap.String("nextHandle", toString(nextHandle)), zap.Int64("batchAddedCount", taskAddedCount), zap.String("takeTime", elapsedTime.String()))
   323  	return nil
   324  }
   325  
   326  func decodeHandleRange(keyRange ekv.KeyRange) (ekv.Handle, ekv.Handle, error) {
   327  	startHandle, err := blockcodec.DecodeRowKey(keyRange.StartKey)
   328  	if err != nil {
   329  		return nil, nil, errors.Trace(err)
   330  	}
   331  	endHandle, err := blockcodec.DecodeRowKey(keyRange.EndKey)
   332  	if err != nil {
   333  		return nil, nil, errors.Trace(err)
   334  	}
   335  
   336  	return startHandle, endHandle, nil
   337  }
   338  
   339  // sendRangeTaskToWorkers sends tasks to workers, and returns remaining ekvRanges that is not handled.
   340  func (w *worker) sendRangeTaskToWorkers(workers []*backfillWorker, reorgInfo *reorgInfo,
   341  	totalAddedCount *int64, ekvRanges []ekv.KeyRange, globalEndHandle ekv.Handle) ([]ekv.KeyRange, error) {
   342  	batchTasks := make([]*reorgBackfillTask, 0, len(workers))
   343  	physicalTableID := reorgInfo.PhysicalTableID
   344  
   345  	// Build reorg tasks.
   346  	for _, keyRange := range ekvRanges {
   347  		startHandle, endHandle, err := decodeHandleRange(keyRange)
   348  		if err != nil {
   349  			return nil, errors.Trace(err)
   350  		}
   351  
   352  		endIncluded := false
   353  		if endHandle.Equal(globalEndHandle) {
   354  			endIncluded = true
   355  		}
   356  		task := &reorgBackfillTask{physicalTableID, startHandle, endHandle, endIncluded}
   357  		batchTasks = append(batchTasks, task)
   358  
   359  		if len(batchTasks) >= len(workers) {
   360  			break
   361  		}
   362  	}
   363  
   364  	if len(batchTasks) == 0 {
   365  		return nil, nil
   366  	}
   367  
   368  	// Wait tasks finish.
   369  	err := w.handleReorgTasks(reorgInfo, totalAddedCount, workers, batchTasks)
   370  	if err != nil {
   371  		return nil, errors.Trace(err)
   372  	}
   373  
   374  	if len(batchTasks) < len(ekvRanges) {
   375  		// There are ekvRanges not handled.
   376  		remains := ekvRanges[len(batchTasks):]
   377  		return remains, nil
   378  	}
   379  
   380  	return nil, nil
   381  }
   382  
   383  var (
   384  	// TestCheckWorkerNumCh use for test adjust backfill worker.
   385  	TestCheckWorkerNumCh = make(chan struct{})
   386  	// TestCheckWorkerNumber use for test adjust backfill worker.
   387  	TestCheckWorkerNumber = int32(16)
   388  )
   389  
   390  func loadDBSReorgVars(w *worker) error {
   391  	// Get stochastikctx from context resource pool.
   392  	var ctx stochastikctx.Context
   393  	ctx, err := w.sessPool.get()
   394  	if err != nil {
   395  		return errors.Trace(err)
   396  	}
   397  	defer w.sessPool.put(ctx)
   398  	return dbsutil.LoadDBSReorgVars(ctx)
   399  }
   400  
   401  func makeuFIDelecodeDefCausMap(sessCtx stochastikctx.Context, t causet.Block) (map[int64]causetDecoder.DeferredCauset, error) {
   402  	dbName := perceptron.NewCIStr(sessCtx.GetStochastikVars().CurrentDB)
   403  	wriblockDefCausInfos := make([]*perceptron.DeferredCausetInfo, 0, len(t.WriblockDefCauss()))
   404  	for _, col := range t.WriblockDefCauss() {
   405  		wriblockDefCausInfos = append(wriblockDefCausInfos, col.DeferredCausetInfo)
   406  	}
   407  	exprDefCauss, _, err := memex.DeferredCausetInfos2DeferredCausetsAndNames(sessCtx, dbName, t.Meta().Name, wriblockDefCausInfos, t.Meta())
   408  	if err != nil {
   409  		return nil, err
   410  	}
   411  	mockSchema := memex.NewSchema(exprDefCauss...)
   412  
   413  	decodeDefCausMap := causetDecoder.BuildFullDecodeDefCausMap(t.WriblockDefCauss(), mockSchema)
   414  
   415  	return decodeDefCausMap, nil
   416  }
   417  
   418  // writePhysicalTableRecord handles the "add index" or "modify/change column" reorganization state for a non-partitioned causet or a partition.
   419  // For a partitioned causet, it should be handled partition by partition.
   420  //
   421  // How to "add index" or "uFIDelate column value" in reorganization state?
   422  // Concurrently process the @@milevadb_dbs_reorg_worker_cnt tasks. Each task deals with a handle range of the index/event record.
   423  // The handle range is split from FIDel regions now. Each worker deal with a region causet key range one time.
   424  // Each handle range by estimation, concurrent processing needs to perform after the handle range has been acquired.
   425  // The operation flow is as follows:
   426  //	1. Open numbers of defaultWorkers goroutines.
   427  //	2. Split causet key range from FIDel regions.
   428  //	3. Send tasks to running workers by workers's task channel. Each task deals with a region key ranges.
   429  //	4. Wait all these running tasks finished, then continue to step 3, until all tasks is done.
   430  // The above operations are completed in a transaction.
   431  // Finally, uFIDelate the concurrent processing of the total number of rows, and causetstore the completed handle value.
   432  func (w *worker) writePhysicalTableRecord(t causet.PhysicalTable, bfWorkerType backfillWorkerType, indexInfo *perceptron.IndexInfo, oldDefCausInfo, colInfo *perceptron.DeferredCausetInfo, reorgInfo *reorgInfo) error {
   433  	job := reorgInfo.Job
   434  	totalAddedCount := job.GetRowCount()
   435  
   436  	startHandle, endHandle := reorgInfo.StartHandle, reorgInfo.EndHandle
   437  	sessCtx := newContext(reorgInfo.d.causetstore)
   438  	decodeDefCausMap, err := makeuFIDelecodeDefCausMap(sessCtx, t)
   439  	if err != nil {
   440  		return errors.Trace(err)
   441  	}
   442  
   443  	if err := w.isReorgRunnable(reorgInfo.d); err != nil {
   444  		return errors.Trace(err)
   445  	}
   446  	if startHandle == nil && endHandle == nil {
   447  		return nil
   448  	}
   449  
   450  	failpoint.Inject("MockCaseWhenParseFailure", func(val failpoint.Value) {
   451  		if val.(bool) {
   452  			failpoint.Return(errors.New("job.ErrCount:" + strconv.Itoa(int(job.ErrorCount)) + ", mock unknown type: ast.whenClause."))
   453  		}
   454  	})
   455  
   456  	// variable.dbsReorgWorkerCounter can be modified by system variable "milevadb_dbs_reorg_worker_cnt".
   457  	workerCnt := variable.GetDBSReorgWorkerCounter()
   458  	backfillWorkers := make([]*backfillWorker, 0, workerCnt)
   459  	defer func() {
   460  		closeBackfillWorkers(backfillWorkers)
   461  	}()
   462  
   463  	for {
   464  		ekvRanges, err := splitTableRanges(t, reorgInfo.d.causetstore, startHandle, endHandle)
   465  		if err != nil {
   466  			return errors.Trace(err)
   467  		}
   468  
   469  		// For dynamic adjust backfill worker number.
   470  		if err := loadDBSReorgVars(w); err != nil {
   471  			logutil.BgLogger().Error("[dbs] load DBS reorganization variable failed", zap.Error(err))
   472  		}
   473  		workerCnt = variable.GetDBSReorgWorkerCounter()
   474  		// If only have 1 range, we can only start 1 worker.
   475  		if len(ekvRanges) < int(workerCnt) {
   476  			workerCnt = int32(len(ekvRanges))
   477  		}
   478  		// Enlarge the worker size.
   479  		for i := len(backfillWorkers); i < int(workerCnt); i++ {
   480  			sessCtx := newContext(reorgInfo.d.causetstore)
   481  			sessCtx.GetStochastikVars().StmtCtx.IsDBSJobInQueue = true
   482  
   483  			if bfWorkerType == typeAddIndexWorker {
   484  				idxWorker := newAddIndexWorker(sessCtx, w, i, t, indexInfo, decodeDefCausMap)
   485  				idxWorker.priority = job.Priority
   486  				backfillWorkers = append(backfillWorkers, idxWorker.backfillWorker)
   487  				go idxWorker.backfillWorker.run(reorgInfo.d, idxWorker)
   488  			} else {
   489  				uFIDelateWorker := newUFIDelateDeferredCausetWorker(sessCtx, w, i, t, oldDefCausInfo, colInfo, decodeDefCausMap)
   490  				uFIDelateWorker.priority = job.Priority
   491  				backfillWorkers = append(backfillWorkers, uFIDelateWorker.backfillWorker)
   492  				go uFIDelateWorker.backfillWorker.run(reorgInfo.d, uFIDelateWorker)
   493  			}
   494  		}
   495  		// Shrink the worker size.
   496  		if len(backfillWorkers) > int(workerCnt) {
   497  			workers := backfillWorkers[workerCnt:]
   498  			backfillWorkers = backfillWorkers[:workerCnt]
   499  			closeBackfillWorkers(workers)
   500  		}
   501  
   502  		failpoint.Inject("checkBackfillWorkerNum", func(val failpoint.Value) {
   503  			if val.(bool) {
   504  				num := int(atomic.LoadInt32(&TestCheckWorkerNumber))
   505  				if num != 0 {
   506  					if num > len(ekvRanges) {
   507  						if len(backfillWorkers) != len(ekvRanges) {
   508  							failpoint.Return(errors.Errorf("check backfill worker num error, len ekv ranges is: %v, check backfill worker num is: %v, actual record num is: %v", len(ekvRanges), num, len(backfillWorkers)))
   509  						}
   510  					} else if num != len(backfillWorkers) {
   511  						failpoint.Return(errors.Errorf("check backfill worker num error, len ekv ranges is: %v, check backfill worker num is: %v, actual record num is: %v", len(ekvRanges), num, len(backfillWorkers)))
   512  					}
   513  					TestCheckWorkerNumCh <- struct{}{}
   514  				}
   515  			}
   516  		})
   517  
   518  		logutil.BgLogger().Info("[dbs] start backfill workers to reorg record", zap.Int("workerCnt", len(backfillWorkers)),
   519  			zap.Int("regionCnt", len(ekvRanges)), zap.String("startHandle", toString(startHandle)), zap.String("endHandle", toString(endHandle)))
   520  		remains, err := w.sendRangeTaskToWorkers(backfillWorkers, reorgInfo, &totalAddedCount, ekvRanges, endHandle)
   521  		if err != nil {
   522  			return errors.Trace(err)
   523  		}
   524  
   525  		if len(remains) == 0 {
   526  			break
   527  		}
   528  		startHandle, _, err = decodeHandleRange(remains[0])
   529  		if err != nil {
   530  			return errors.Trace(err)
   531  		}
   532  	}
   533  	return nil
   534  }
   535  
   536  // recordIterFunc is used for low-level record iteration.
   537  type recordIterFunc func(h ekv.Handle, rowKey ekv.Key, rawRecord []byte) (more bool, err error)
   538  
   539  func iterateSnapshotRows(causetstore ekv.CausetStorage, priority int, t causet.Block, version uint64, startHandle ekv.Handle, endHandle ekv.Handle, endIncluded bool, fn recordIterFunc) error {
   540  	var firstKey ekv.Key
   541  	if startHandle == nil {
   542  		firstKey = t.RecordPrefix()
   543  	} else {
   544  		firstKey = t.RecordKey(startHandle)
   545  	}
   546  
   547  	var upperBound ekv.Key
   548  	if endHandle == nil {
   549  		upperBound = t.RecordPrefix().PrefixNext()
   550  	} else {
   551  		if endIncluded {
   552  			if endHandle.IsInt() && endHandle.IntValue() == math.MaxInt64 {
   553  				upperBound = t.RecordKey(endHandle).PrefixNext()
   554  			} else {
   555  				upperBound = t.RecordKey(endHandle.Next())
   556  			}
   557  		} else {
   558  			upperBound = t.RecordKey(endHandle)
   559  		}
   560  	}
   561  
   562  	ver := ekv.Version{Ver: version}
   563  	snap, err := causetstore.GetSnapshot(ver)
   564  	snap.SetOption(ekv.Priority, priority)
   565  	if err != nil {
   566  		return errors.Trace(err)
   567  	}
   568  
   569  	it, err := snap.Iter(firstKey, upperBound)
   570  	if err != nil {
   571  		return errors.Trace(err)
   572  	}
   573  	defer it.Close()
   574  
   575  	for it.Valid() {
   576  		if !it.Key().HasPrefix(t.RecordPrefix()) {
   577  			break
   578  		}
   579  
   580  		var handle ekv.Handle
   581  		handle, err = blockcodec.DecodeRowKey(it.Key())
   582  		if err != nil {
   583  			return errors.Trace(err)
   584  		}
   585  		rk := t.RecordKey(handle)
   586  
   587  		more, err := fn(handle, rk, it.Value())
   588  		if !more || err != nil {
   589  			return errors.Trace(err)
   590  		}
   591  
   592  		err = ekv.NextUntil(it, soliton.RowKeyPrefixFilter(rk))
   593  		if err != nil {
   594  			if ekv.ErrNotExist.Equal(err) {
   595  				break
   596  			}
   597  			return errors.Trace(err)
   598  		}
   599  	}
   600  
   601  	return nil
   602  }