github.com/KinWaiYuen/client-go/v2@v2.5.4/txnkv/rangetask/range_task.go (about)

     1  // Copyright 2021 TiKV Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // NOTE: The code in this file is based on code from the
    16  // TiDB project, licensed under the Apache License v 2.0
    17  //
    18  // https://github.com/pingcap/tidb/tree/cc5e161ac06827589c4966674597c137cc9e809c/store/tikv/range_task.go
    19  //
    20  
    21  // Copyright 2019 PingCAP, Inc.
    22  //
    23  // Licensed under the Apache License, Version 2.0 (the "License");
    24  // you may not use this file except in compliance with the License.
    25  // You may obtain a copy of the License at
    26  //
    27  //     http://www.apache.org/licenses/LICENSE-2.0
    28  //
    29  // Unless required by applicable law or agreed to in writing, software
    30  // distributed under the License is distributed on an "AS IS" BASIS,
    31  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    32  // See the License for the specific language governing permissions and
    33  // limitations under the License.
    34  
    35  package rangetask
    36  
    37  import (
    38  	"bytes"
    39  	"context"
    40  	"sync"
    41  	"sync/atomic"
    42  	"time"
    43  
    44  	"github.com/KinWaiYuen/client-go/v2/internal/logutil"
    45  	"github.com/KinWaiYuen/client-go/v2/internal/retry"
    46  	"github.com/KinWaiYuen/client-go/v2/kv"
    47  	"github.com/KinWaiYuen/client-go/v2/metrics"
    48  	"github.com/pingcap/errors"
    49  	"go.uber.org/zap"
    50  )
    51  
    52  const (
    53  	rangeTaskDefaultStatLogInterval = time.Minute * 10
    54  	defaultRegionsPerTask           = 128
    55  
    56  	lblCompletedRegions = "completed-regions"
    57  	lblFailedRegions    = "failed-regions"
    58  )
    59  
    60  // Runner splits a range into many ranges to process concurrently, and convenient to send requests to all
    61  // regions in the range. Because of merging and splitting, it's possible that multiple requests for disjoint ranges are
    62  // sent to the same region.
    63  type Runner struct {
    64  	name            string
    65  	store           storage
    66  	concurrency     int
    67  	handler         TaskHandler
    68  	statLogInterval time.Duration
    69  	regionsPerTask  int
    70  
    71  	completedRegions int32
    72  	failedRegions    int32
    73  }
    74  
    75  // TaskStat is used to count Regions that completed or failed to do the task.
    76  type TaskStat struct {
    77  	CompletedRegions int
    78  	FailedRegions    int
    79  }
    80  
    81  // TaskHandler is the type of functions that processes a task of a key range.
    82  // The function should calculate Regions that succeeded or failed to the task.
    83  // Returning error from the handler means the error caused the whole task should be stopped.
    84  type TaskHandler = func(ctx context.Context, r kv.KeyRange) (TaskStat, error)
    85  
    86  // NewRangeTaskRunner creates a RangeTaskRunner.
    87  //
    88  // `requestCreator` is the function used to create RPC request according to the given range.
    89  // `responseHandler` is the function to process responses of errors. If `responseHandler` returns error, the whole job
    90  // will be canceled.
    91  func NewRangeTaskRunner(
    92  	name string,
    93  	store storage,
    94  	concurrency int,
    95  	handler TaskHandler,
    96  ) *Runner {
    97  	return &Runner{
    98  		name:            name,
    99  		store:           store,
   100  		concurrency:     concurrency,
   101  		handler:         handler,
   102  		statLogInterval: rangeTaskDefaultStatLogInterval,
   103  		regionsPerTask:  defaultRegionsPerTask,
   104  	}
   105  }
   106  
   107  // SetRegionsPerTask sets how many regions is in a divided task. Since regions may split and merge, it's possible that
   108  // a sub task contains not exactly specified number of regions.
   109  func (s *Runner) SetRegionsPerTask(regionsPerTask int) {
   110  	if regionsPerTask < 1 {
   111  		panic("RangeTaskRunner: regionsPerTask should be at least 1")
   112  	}
   113  	s.regionsPerTask = regionsPerTask
   114  }
   115  
   116  const locateRegionMaxBackoff = 20000
   117  
   118  // NewLocateRegionBackoffer creates the backoofer for LocateRegion request.
   119  func NewLocateRegionBackoffer(ctx context.Context) *retry.Backoffer {
   120  	return retry.NewBackofferWithVars(ctx, locateRegionMaxBackoff, nil)
   121  }
   122  
   123  // RunOnRange runs the task on the given range.
   124  // Empty startKey or endKey means unbounded.
   125  func (s *Runner) RunOnRange(ctx context.Context, startKey, endKey []byte) error {
   126  	s.completedRegions = 0
   127  	metrics.TiKVRangeTaskStats.WithLabelValues(s.name, lblCompletedRegions).Set(0)
   128  
   129  	if len(endKey) != 0 && bytes.Compare(startKey, endKey) >= 0 {
   130  		logutil.Logger(ctx).Info("empty range task executed. ignored",
   131  			zap.String("name", s.name),
   132  			zap.String("startKey", kv.StrKey(startKey)),
   133  			zap.String("endKey", kv.StrKey(endKey)))
   134  		return nil
   135  	}
   136  
   137  	logutil.Logger(ctx).Info("range task started",
   138  		zap.String("name", s.name),
   139  		zap.String("startKey", kv.StrKey(startKey)),
   140  		zap.String("endKey", kv.StrKey(endKey)),
   141  		zap.Int("concurrency", s.concurrency))
   142  
   143  	// Periodically log the progress
   144  	statLogTicker := time.NewTicker(s.statLogInterval)
   145  
   146  	ctx, cancel := context.WithCancel(ctx)
   147  	taskCh := make(chan *kv.KeyRange, s.concurrency)
   148  	var wg sync.WaitGroup
   149  
   150  	// Create workers that concurrently process the whole range.
   151  	workers := make([]*rangeTaskWorker, 0, s.concurrency)
   152  	for i := 0; i < s.concurrency; i++ {
   153  		w := s.createWorker(taskCh, &wg)
   154  		workers = append(workers, w)
   155  		wg.Add(1)
   156  		go w.run(ctx, cancel)
   157  	}
   158  
   159  	startTime := time.Now()
   160  
   161  	// Make sure taskCh is closed exactly once
   162  	isClosed := false
   163  	defer func() {
   164  		if !isClosed {
   165  			close(taskCh)
   166  			wg.Wait()
   167  		}
   168  		statLogTicker.Stop()
   169  		cancel()
   170  		metrics.TiKVRangeTaskStats.WithLabelValues(s.name, lblCompletedRegions).Set(0)
   171  	}()
   172  
   173  	// Iterate all regions and send each region's range as a task to the workers.
   174  	key := startKey
   175  Loop:
   176  	for {
   177  		select {
   178  		case <-statLogTicker.C:
   179  			logutil.Logger(ctx).Info("range task in progress",
   180  				zap.String("name", s.name),
   181  				zap.String("startKey", kv.StrKey(startKey)),
   182  				zap.String("endKey", kv.StrKey(endKey)),
   183  				zap.Int("concurrency", s.concurrency),
   184  				zap.Duration("cost time", time.Since(startTime)),
   185  				zap.Int("completed regions", s.CompletedRegions()))
   186  		default:
   187  		}
   188  
   189  		bo := NewLocateRegionBackoffer(ctx)
   190  
   191  		rangeEndKey, err := s.store.GetRegionCache().BatchLoadRegionsFromKey(bo, key, s.regionsPerTask)
   192  		if err != nil {
   193  			logutil.Logger(ctx).Info("range task failed",
   194  				zap.String("name", s.name),
   195  				zap.String("startKey", kv.StrKey(startKey)),
   196  				zap.String("endKey", kv.StrKey(endKey)),
   197  				zap.Duration("cost time", time.Since(startTime)),
   198  				zap.Error(err))
   199  			return errors.Trace(err)
   200  		}
   201  		task := &kv.KeyRange{
   202  			StartKey: key,
   203  			EndKey:   rangeEndKey,
   204  		}
   205  
   206  		isLast := len(task.EndKey) == 0 || (len(endKey) > 0 && bytes.Compare(task.EndKey, endKey) >= 0)
   207  		// Let task.EndKey = min(endKey, loc.EndKey)
   208  		if isLast {
   209  			task.EndKey = endKey
   210  		}
   211  
   212  		pushTaskStartTime := time.Now()
   213  
   214  		select {
   215  		case taskCh <- task:
   216  		case <-ctx.Done():
   217  			break Loop
   218  		}
   219  		metrics.TiKVRangeTaskPushDuration.WithLabelValues(s.name).Observe(time.Since(pushTaskStartTime).Seconds())
   220  
   221  		if isLast {
   222  			break
   223  		}
   224  
   225  		key = task.EndKey
   226  	}
   227  
   228  	isClosed = true
   229  	close(taskCh)
   230  	wg.Wait()
   231  	for _, w := range workers {
   232  		if w.err != nil {
   233  			logutil.Logger(ctx).Info("range task failed",
   234  				zap.String("name", s.name),
   235  				zap.String("startKey", kv.StrKey(startKey)),
   236  				zap.String("endKey", kv.StrKey(endKey)),
   237  				zap.Duration("cost time", time.Since(startTime)),
   238  				zap.Error(w.err))
   239  			return errors.Trace(w.err)
   240  		}
   241  	}
   242  
   243  	logutil.Logger(ctx).Info("range task finished",
   244  		zap.String("name", s.name),
   245  		zap.String("startKey", kv.StrKey(startKey)),
   246  		zap.String("endKey", kv.StrKey(endKey)),
   247  		zap.Duration("cost time", time.Since(startTime)),
   248  		zap.Int("completed regions", s.CompletedRegions()))
   249  
   250  	return nil
   251  }
   252  
   253  // createWorker creates a worker that can process tasks from the given channel.
   254  func (s *Runner) createWorker(taskCh chan *kv.KeyRange, wg *sync.WaitGroup) *rangeTaskWorker {
   255  	return &rangeTaskWorker{
   256  		name:    s.name,
   257  		store:   s.store,
   258  		handler: s.handler,
   259  		taskCh:  taskCh,
   260  		wg:      wg,
   261  
   262  		completedRegions: &s.completedRegions,
   263  		failedRegions:    &s.failedRegions,
   264  	}
   265  }
   266  
   267  // CompletedRegions returns how many regions has been sent requests.
   268  func (s *Runner) CompletedRegions() int {
   269  	return int(atomic.LoadInt32(&s.completedRegions))
   270  }
   271  
   272  // FailedRegions returns how many regions has failed to do the task.
   273  func (s *Runner) FailedRegions() int {
   274  	return int(atomic.LoadInt32(&s.failedRegions))
   275  }
   276  
   277  // rangeTaskWorker is used by RangeTaskRunner to process tasks concurrently.
   278  type rangeTaskWorker struct {
   279  	name    string
   280  	store   storage
   281  	handler TaskHandler
   282  	taskCh  chan *kv.KeyRange
   283  	wg      *sync.WaitGroup
   284  
   285  	err error
   286  
   287  	completedRegions *int32
   288  	failedRegions    *int32
   289  }
   290  
   291  // run starts the worker. It collects all objects from `w.taskCh` and process them one by one.
   292  func (w *rangeTaskWorker) run(ctx context.Context, cancel context.CancelFunc) {
   293  	defer w.wg.Done()
   294  	for r := range w.taskCh {
   295  		select {
   296  		case <-ctx.Done():
   297  			w.err = ctx.Err()
   298  			return
   299  		default:
   300  		}
   301  
   302  		stat, err := w.handler(ctx, *r)
   303  
   304  		atomic.AddInt32(w.completedRegions, int32(stat.CompletedRegions))
   305  		atomic.AddInt32(w.failedRegions, int32(stat.FailedRegions))
   306  		metrics.TiKVRangeTaskStats.WithLabelValues(w.name, lblCompletedRegions).Add(float64(stat.CompletedRegions))
   307  		metrics.TiKVRangeTaskStats.WithLabelValues(w.name, lblFailedRegions).Add(float64(stat.FailedRegions))
   308  
   309  		if err != nil {
   310  			logutil.Logger(ctx).Info("canceling range task because of error",
   311  				zap.String("name", w.name),
   312  				zap.String("startKey", kv.StrKey(r.StartKey)),
   313  				zap.String("endKey", kv.StrKey(r.EndKey)),
   314  				zap.Error(err))
   315  			w.err = err
   316  			cancel()
   317  			break
   318  		}
   319  	}
   320  }