github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/einsteindb/range_task.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package einsteindb
    15  
    16  import (
    17  	"bytes"
    18  	"context"
    19  	"sync"
    20  	"sync/atomic"
    21  	"time"
    22  
    23  	"github.com/whtcorpsinc/errors"
    24  	"github.com/whtcorpsinc/milevadb/ekv"
    25  	"github.com/whtcorpsinc/milevadb/metrics"
    26  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    27  	"go.uber.org/zap"
    28  )
    29  
    30  const (
    31  	rangeTaskDefaultStatLogInterval = time.Minute * 10
    32  	defaultRegionsPerTask           = 128
    33  
    34  	lblCompletedRegions = "completed-regions"
    35  	lblFailedRegions    = "failed-regions"
    36  )
    37  
    38  // RangeTaskRunner splits a range into many ranges to process concurrently, and convenient to send requests to all
    39  // regions in the range. Because of merging and splitting, it's possible that multiple requests for disjoint ranges are
    40  // sent to the same region.
    41  type RangeTaskRunner struct {
    42  	name            string
    43  	causetstore     CausetStorage
    44  	concurrency     int
    45  	handler         RangeTaskHandler
    46  	statLogInterval time.Duration
    47  	regionsPerTask  int
    48  
    49  	completedRegions int32
    50  	failedRegions    int32
    51  }
    52  
    53  // RangeTaskStat is used to count Regions that completed or failed to do the task.
    54  type RangeTaskStat struct {
    55  	CompletedRegions int
    56  	FailedRegions    int
    57  }
    58  
    59  // RangeTaskHandler is the type of functions that processes a task of a key range.
    60  // The function should calculate Regions that succeeded or failed to the task.
    61  // Returning error from the handler means the error caused the whole task should be stopped.
    62  type RangeTaskHandler = func(ctx context.Context, r ekv.KeyRange) (RangeTaskStat, error)
    63  
    64  // NewRangeTaskRunner creates a RangeTaskRunner.
    65  //
    66  // `requestCreator` is the function used to create RPC request according to the given range.
    67  // `responseHandler` is the function to process responses of errors. If `responseHandler` returns error, the whole job
    68  // will be canceled.
    69  func NewRangeTaskRunner(
    70  	name string,
    71  	causetstore CausetStorage,
    72  	concurrency int,
    73  	handler RangeTaskHandler,
    74  ) *RangeTaskRunner {
    75  	return &RangeTaskRunner{
    76  		name:            name,
    77  		causetstore:     causetstore,
    78  		concurrency:     concurrency,
    79  		handler:         handler,
    80  		statLogInterval: rangeTaskDefaultStatLogInterval,
    81  		regionsPerTask:  defaultRegionsPerTask,
    82  	}
    83  }
    84  
    85  // SetRegionsPerTask sets how many regions is in a divided task. Since regions may split and merge, it's possible that
    86  // a sub task contains not exactly specified number of regions.
    87  func (s *RangeTaskRunner) SetRegionsPerTask(regionsPerTask int) {
    88  	if regionsPerTask < 1 {
    89  		panic("RangeTaskRunner: regionsPerTask should be at least 1")
    90  	}
    91  	s.regionsPerTask = regionsPerTask
    92  }
    93  
    94  // RunOnRange runs the task on the given range.
    95  // Empty startKey or endKey means unbounded.
    96  func (s *RangeTaskRunner) RunOnRange(ctx context.Context, startKey, endKey ekv.Key) error {
    97  	s.completedRegions = 0
    98  	metrics.EinsteinDBRangeTaskStats.WithLabelValues(s.name, lblCompletedRegions).Set(0)
    99  
   100  	if len(endKey) != 0 && bytes.Compare(startKey, endKey) >= 0 {
   101  		logutil.Logger(ctx).Info("empty range task executed. ignored",
   102  			zap.String("name", s.name),
   103  			zap.Stringer("startKey", startKey),
   104  			zap.Stringer("endKey", endKey))
   105  		return nil
   106  	}
   107  
   108  	logutil.Logger(ctx).Info("range task started",
   109  		zap.String("name", s.name),
   110  		zap.Stringer("startKey", startKey),
   111  		zap.Stringer("endKey", endKey),
   112  		zap.Int("concurrency", s.concurrency))
   113  
   114  	// Periodically log the progress
   115  	statLogTicker := time.NewTicker(s.statLogInterval)
   116  
   117  	ctx, cancel := context.WithCancel(ctx)
   118  	taskCh := make(chan *ekv.KeyRange, s.concurrency)
   119  	var wg sync.WaitGroup
   120  
   121  	// Create workers that concurrently process the whole range.
   122  	workers := make([]*rangeTaskWorker, 0, s.concurrency)
   123  	for i := 0; i < s.concurrency; i++ {
   124  		w := s.createWorker(taskCh, &wg)
   125  		workers = append(workers, w)
   126  		wg.Add(1)
   127  		go w.run(ctx, cancel)
   128  	}
   129  
   130  	startTime := time.Now()
   131  
   132  	// Make sure taskCh is closed exactly once
   133  	isClosed := false
   134  	defer func() {
   135  		if !isClosed {
   136  			close(taskCh)
   137  			wg.Wait()
   138  		}
   139  		statLogTicker.Stop()
   140  		cancel()
   141  		metrics.EinsteinDBRangeTaskStats.WithLabelValues(s.name, lblCompletedRegions).Set(0)
   142  	}()
   143  
   144  	// Iterate all regions and send each region's range as a task to the workers.
   145  	key := startKey
   146  Loop:
   147  	for {
   148  		select {
   149  		case <-statLogTicker.C:
   150  			logutil.Logger(ctx).Info("range task in progress",
   151  				zap.String("name", s.name),
   152  				zap.Stringer("startKey", startKey),
   153  				zap.Stringer("endKey", endKey),
   154  				zap.Int("concurrency", s.concurrency),
   155  				zap.Duration("cost time", time.Since(startTime)),
   156  				zap.Int("completed regions", s.CompletedRegions()))
   157  		default:
   158  		}
   159  
   160  		bo := NewBackofferWithVars(ctx, locateRegionMaxBackoff, nil)
   161  
   162  		rangeEndKey, err := s.causetstore.GetRegionCache().BatchLoadRegionsFromKey(bo, key, s.regionsPerTask)
   163  		if err != nil {
   164  			logutil.Logger(ctx).Info("range task failed",
   165  				zap.String("name", s.name),
   166  				zap.Stringer("startKey", startKey),
   167  				zap.Stringer("endKey", endKey),
   168  				zap.Duration("cost time", time.Since(startTime)),
   169  				zap.Error(err))
   170  			return errors.Trace(err)
   171  		}
   172  		task := &ekv.KeyRange{
   173  			StartKey: key,
   174  			EndKey:   rangeEndKey,
   175  		}
   176  
   177  		isLast := len(task.EndKey) == 0 || (len(endKey) > 0 && bytes.Compare(task.EndKey, endKey) >= 0)
   178  		// Let task.EndKey = min(endKey, loc.EndKey)
   179  		if isLast {
   180  			task.EndKey = endKey
   181  		}
   182  
   183  		pushTaskStartTime := time.Now()
   184  
   185  		select {
   186  		case taskCh <- task:
   187  		case <-ctx.Done():
   188  			break Loop
   189  		}
   190  		metrics.EinsteinDBRangeTaskPushDuration.WithLabelValues(s.name).Observe(time.Since(pushTaskStartTime).Seconds())
   191  
   192  		if isLast {
   193  			break
   194  		}
   195  
   196  		key = task.EndKey
   197  	}
   198  
   199  	isClosed = true
   200  	close(taskCh)
   201  	wg.Wait()
   202  	for _, w := range workers {
   203  		if w.err != nil {
   204  			logutil.Logger(ctx).Info("range task failed",
   205  				zap.String("name", s.name),
   206  				zap.Stringer("startKey", startKey),
   207  				zap.Stringer("endKey", endKey),
   208  				zap.Duration("cost time", time.Since(startTime)),
   209  				zap.Error(w.err))
   210  			return errors.Trace(w.err)
   211  		}
   212  	}
   213  
   214  	logutil.Logger(ctx).Info("range task finished",
   215  		zap.String("name", s.name),
   216  		zap.Stringer("startKey", startKey),
   217  		zap.Stringer("endKey", endKey),
   218  		zap.Duration("cost time", time.Since(startTime)),
   219  		zap.Int("completed regions", s.CompletedRegions()))
   220  
   221  	return nil
   222  }
   223  
   224  // createWorker creates a worker that can process tasks from the given channel.
   225  func (s *RangeTaskRunner) createWorker(taskCh chan *ekv.KeyRange, wg *sync.WaitGroup) *rangeTaskWorker {
   226  	return &rangeTaskWorker{
   227  		name:        s.name,
   228  		causetstore: s.causetstore,
   229  		handler:     s.handler,
   230  		taskCh:      taskCh,
   231  		wg:          wg,
   232  
   233  		completedRegions: &s.completedRegions,
   234  		failedRegions:    &s.failedRegions,
   235  	}
   236  }
   237  
   238  // CompletedRegions returns how many regions has been sent requests.
   239  func (s *RangeTaskRunner) CompletedRegions() int {
   240  	return int(atomic.LoadInt32(&s.completedRegions))
   241  }
   242  
   243  // FailedRegions returns how many regions has failed to do the task.
   244  func (s *RangeTaskRunner) FailedRegions() int {
   245  	return int(atomic.LoadInt32(&s.failedRegions))
   246  }
   247  
   248  // rangeTaskWorker is used by RangeTaskRunner to process tasks concurrently.
   249  type rangeTaskWorker struct {
   250  	name        string
   251  	causetstore CausetStorage
   252  	handler     RangeTaskHandler
   253  	taskCh      chan *ekv.KeyRange
   254  	wg          *sync.WaitGroup
   255  
   256  	err error
   257  
   258  	completedRegions *int32
   259  	failedRegions    *int32
   260  }
   261  
   262  // run starts the worker. It collects all objects from `w.taskCh` and process them one by one.
   263  func (w *rangeTaskWorker) run(ctx context.Context, cancel context.CancelFunc) {
   264  	defer w.wg.Done()
   265  	for r := range w.taskCh {
   266  		select {
   267  		case <-ctx.Done():
   268  			w.err = ctx.Err()
   269  			return
   270  		default:
   271  		}
   272  
   273  		stat, err := w.handler(ctx, *r)
   274  
   275  		atomic.AddInt32(w.completedRegions, int32(stat.CompletedRegions))
   276  		atomic.AddInt32(w.failedRegions, int32(stat.FailedRegions))
   277  		metrics.EinsteinDBRangeTaskStats.WithLabelValues(w.name, lblCompletedRegions).Add(float64(stat.CompletedRegions))
   278  		metrics.EinsteinDBRangeTaskStats.WithLabelValues(w.name, lblFailedRegions).Add(float64(stat.FailedRegions))
   279  
   280  		if err != nil {
   281  			logutil.Logger(ctx).Info("canceling range task because of error",
   282  				zap.String("name", w.name),
   283  				zap.Stringer("failed startKey", r.StartKey),
   284  				zap.Stringer("failed endKey", r.EndKey),
   285  				zap.Error(err))
   286  			w.err = err
   287  			cancel()
   288  			break
   289  		}
   290  	}
   291  }