github.com/KinWaiYuen/client-go/v2@v2.5.4/tikv/split_region.go (about)

     1  // Copyright 2021 TiKV Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // NOTE: The code in this file is based on code from the
    16  // TiDB project, licensed under the Apache License v 2.0
    17  //
    18  // https://github.com/pingcap/tidb/tree/cc5e161ac06827589c4966674597c137cc9e809c/store/tikv/split_region.go
    19  //
    20  
    21  // Copyright 2017 PingCAP, Inc.
    22  //
    23  // Licensed under the Apache License, Version 2.0 (the "License");
    24  // you may not use this file except in compliance with the License.
    25  // You may obtain a copy of the License at
    26  //
    27  //     http://www.apache.org/licenses/LICENSE-2.0
    28  //
    29  // Unless required by applicable law or agreed to in writing, software
    30  // distributed under the License is distributed on an "AS IS" BASIS,
    31  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    32  // See the License for the specific language governing permissions and
    33  // limitations under the License.
    34  
    35  package tikv
    36  
    37  import (
    38  	"bytes"
    39  	"context"
    40  	"fmt"
    41  	"math"
    42  
    43  	tikverr "github.com/KinWaiYuen/client-go/v2/error"
    44  	"github.com/KinWaiYuen/client-go/v2/internal/client"
    45  	"github.com/KinWaiYuen/client-go/v2/internal/kvrpc"
    46  	"github.com/KinWaiYuen/client-go/v2/internal/locate"
    47  	"github.com/KinWaiYuen/client-go/v2/internal/logutil"
    48  	"github.com/KinWaiYuen/client-go/v2/internal/retry"
    49  	"github.com/KinWaiYuen/client-go/v2/kv"
    50  	"github.com/KinWaiYuen/client-go/v2/tikvrpc"
    51  	"github.com/KinWaiYuen/client-go/v2/txnkv/rangetask"
    52  	"github.com/KinWaiYuen/client-go/v2/util"
    53  	"github.com/pingcap/errors"
    54  	"github.com/pingcap/kvproto/pkg/kvrpcpb"
    55  	"github.com/pingcap/kvproto/pkg/metapb"
    56  	"github.com/pingcap/kvproto/pkg/pdpb"
    57  	pd "github.com/tikv/pd/client"
    58  	"go.uber.org/zap"
    59  )
    60  
    61  const splitBatchRegionLimit = 2048
    62  
    63  func equalRegionStartKey(key, regionStartKey []byte) bool {
    64  	return bytes.Equal(key, regionStartKey)
    65  }
    66  
    67  func (s *KVStore) splitBatchRegionsReq(bo *Backoffer, keys [][]byte, scatter bool, tableID *int64) (*tikvrpc.Response, error) {
    68  	// equalRegionStartKey is used to filter split keys.
    69  	// If the split key is equal to the start key of the region, then the key has been split, we need to skip the split key.
    70  	groups, _, err := s.regionCache.GroupKeysByRegion(bo, keys, equalRegionStartKey)
    71  	if err != nil {
    72  		return nil, errors.Trace(err)
    73  	}
    74  
    75  	var batches []kvrpc.Batch
    76  	for regionID, groupKeys := range groups {
    77  		batches = kvrpc.AppendKeyBatches(batches, regionID, groupKeys, splitBatchRegionLimit)
    78  	}
    79  
    80  	if len(batches) == 0 {
    81  		return nil, nil
    82  	}
    83  	// The first time it enters this function.
    84  	if bo.GetTotalSleep() == 0 {
    85  		logutil.BgLogger().Info("split batch regions request",
    86  			zap.Int("split key count", len(keys)),
    87  			zap.Int("batch count", len(batches)),
    88  			zap.Uint64("first batch, region ID", batches[0].RegionID.GetID()),
    89  			zap.String("first split key", kv.StrKey(batches[0].Keys[0])))
    90  	}
    91  	if len(batches) == 1 {
    92  		resp := s.batchSendSingleRegion(bo, batches[0], scatter, tableID)
    93  		return resp.Response, errors.Trace(resp.Error)
    94  	}
    95  	ch := make(chan kvrpc.BatchResult, len(batches))
    96  	for _, batch1 := range batches {
    97  		go func(b kvrpc.Batch) {
    98  			backoffer, cancel := bo.Fork()
    99  			defer cancel()
   100  
   101  			util.WithRecovery(func() {
   102  				select {
   103  				case ch <- s.batchSendSingleRegion(backoffer, b, scatter, tableID):
   104  				case <-bo.GetCtx().Done():
   105  					ch <- kvrpc.BatchResult{Error: bo.GetCtx().Err()}
   106  				}
   107  			}, func(r interface{}) {
   108  				if r != nil {
   109  					ch <- kvrpc.BatchResult{Error: errors.Errorf("%v", r)}
   110  				}
   111  			})
   112  		}(batch1)
   113  	}
   114  
   115  	srResp := &kvrpcpb.SplitRegionResponse{Regions: make([]*metapb.Region, 0, len(keys)*2)}
   116  	for i := 0; i < len(batches); i++ {
   117  		batchResp := <-ch
   118  		if batchResp.Error != nil {
   119  			logutil.BgLogger().Info("batch split regions failed", zap.Error(batchResp.Error))
   120  			if err == nil {
   121  				err = batchResp.Error
   122  			}
   123  		}
   124  
   125  		// If the split succeeds and the scatter fails, we also need to add the region IDs.
   126  		if batchResp.Response != nil {
   127  			spResp := batchResp.Resp.(*kvrpcpb.SplitRegionResponse)
   128  			regions := spResp.GetRegions()
   129  			srResp.Regions = append(srResp.Regions, regions...)
   130  		}
   131  	}
   132  	return &tikvrpc.Response{Resp: srResp}, errors.Trace(err)
   133  }
   134  
   135  func (s *KVStore) batchSendSingleRegion(bo *Backoffer, batch kvrpc.Batch, scatter bool, tableID *int64) kvrpc.BatchResult {
   136  	if val, err := util.EvalFailpoint("mockSplitRegionTimeout"); err == nil {
   137  		if val.(bool) {
   138  			if _, ok := bo.GetCtx().Deadline(); ok {
   139  				<-bo.GetCtx().Done()
   140  			}
   141  		}
   142  	}
   143  
   144  	req := tikvrpc.NewRequest(tikvrpc.CmdSplitRegion, &kvrpcpb.SplitRegionRequest{
   145  		SplitKeys: batch.Keys,
   146  	}, kvrpcpb.Context{
   147  		Priority: kvrpcpb.CommandPri_Normal,
   148  	})
   149  
   150  	sender := locate.NewRegionRequestSender(s.regionCache, s.GetTiKVClient())
   151  	resp, err := sender.SendReq(bo, req, batch.RegionID, client.ReadTimeoutShort)
   152  
   153  	batchResp := kvrpc.BatchResult{Response: resp}
   154  	if err != nil {
   155  		batchResp.Error = errors.Trace(err)
   156  		return batchResp
   157  	}
   158  	regionErr, err := resp.GetRegionError()
   159  	if err != nil {
   160  		batchResp.Error = errors.Trace(err)
   161  		return batchResp
   162  	}
   163  	if regionErr != nil {
   164  		err := bo.Backoff(retry.BoRegionMiss, errors.New(regionErr.String()))
   165  		if err != nil {
   166  			batchResp.Error = errors.Trace(err)
   167  			return batchResp
   168  		}
   169  		resp, err = s.splitBatchRegionsReq(bo, batch.Keys, scatter, tableID)
   170  		batchResp.Response = resp
   171  		batchResp.Error = err
   172  		return batchResp
   173  	}
   174  
   175  	spResp := resp.Resp.(*kvrpcpb.SplitRegionResponse)
   176  	regions := spResp.GetRegions()
   177  	if len(regions) > 0 {
   178  		// Divide a region into n, one of them may not need to be scattered,
   179  		// so n-1 needs to be scattered to other stores.
   180  		spResp.Regions = regions[:len(regions)-1]
   181  	}
   182  	var newRegionLeft string
   183  	if len(spResp.Regions) > 0 {
   184  		newRegionLeft = logutil.Hex(spResp.Regions[0]).String()
   185  	}
   186  	logutil.BgLogger().Info("batch split regions complete",
   187  		zap.Uint64("batch region ID", batch.RegionID.GetID()),
   188  		zap.String("first at", kv.StrKey(batch.Keys[0])),
   189  		zap.String("first new region left", newRegionLeft),
   190  		zap.Int("new region count", len(spResp.Regions)))
   191  
   192  	if !scatter {
   193  		return batchResp
   194  	}
   195  
   196  	for i, r := range spResp.Regions {
   197  		if err = s.scatterRegion(bo, r.Id, tableID); err == nil {
   198  			logutil.BgLogger().Info("batch split regions, scatter region complete",
   199  				zap.Uint64("batch region ID", batch.RegionID.GetID()),
   200  				zap.String("at", kv.StrKey(batch.Keys[i])),
   201  				zap.Stringer("new region left", logutil.Hex(r)))
   202  			continue
   203  		}
   204  
   205  		logutil.BgLogger().Info("batch split regions, scatter region failed",
   206  			zap.Uint64("batch region ID", batch.RegionID.GetID()),
   207  			zap.String("at", kv.StrKey(batch.Keys[i])),
   208  			zap.Stringer("new region left", logutil.Hex(r)),
   209  			zap.Error(err))
   210  		if batchResp.Error == nil {
   211  			batchResp.Error = err
   212  		}
   213  		if _, ok := err.(*tikverr.ErrPDServerTimeout); ok {
   214  			break
   215  		}
   216  	}
   217  	return batchResp
   218  }
   219  
   220  const (
   221  	splitRegionBackoff     = 20000
   222  	maxSplitRegionsBackoff = 120000
   223  )
   224  
   225  // SplitRegions splits regions by splitKeys.
   226  func (s *KVStore) SplitRegions(ctx context.Context, splitKeys [][]byte, scatter bool, tableID *int64) (regionIDs []uint64, err error) {
   227  	bo := retry.NewBackofferWithVars(ctx, int(math.Min(float64(len(splitKeys))*splitRegionBackoff, maxSplitRegionsBackoff)), nil)
   228  	resp, err := s.splitBatchRegionsReq(bo, splitKeys, scatter, tableID)
   229  	regionIDs = make([]uint64, 0, len(splitKeys))
   230  	if resp != nil && resp.Resp != nil {
   231  		spResp := resp.Resp.(*kvrpcpb.SplitRegionResponse)
   232  		for _, r := range spResp.Regions {
   233  			regionIDs = append(regionIDs, r.Id)
   234  		}
   235  		logutil.BgLogger().Info("split regions complete", zap.Int("region count", len(regionIDs)), zap.Uint64s("region IDs", regionIDs))
   236  	}
   237  	return regionIDs, errors.Trace(err)
   238  }
   239  
   240  func (s *KVStore) scatterRegion(bo *Backoffer, regionID uint64, tableID *int64) error {
   241  	logutil.BgLogger().Info("start scatter region",
   242  		zap.Uint64("regionID", regionID))
   243  	for {
   244  		opts := make([]pd.RegionsOption, 0, 1)
   245  		if tableID != nil {
   246  			opts = append(opts, pd.WithGroup(fmt.Sprintf("%v", *tableID)))
   247  		}
   248  		_, err := s.pdClient.ScatterRegions(bo.GetCtx(), []uint64{regionID}, opts...)
   249  
   250  		if val, err2 := util.EvalFailpoint("mockScatterRegionTimeout"); err2 == nil {
   251  			if val.(bool) {
   252  				err = tikverr.NewErrPDServerTimeout("")
   253  			}
   254  		}
   255  
   256  		if err == nil {
   257  			break
   258  		}
   259  		err = bo.Backoff(retry.BoPDRPC, errors.New(err.Error()))
   260  		if err != nil {
   261  			return errors.Trace(err)
   262  		}
   263  	}
   264  	logutil.BgLogger().Debug("scatter region complete",
   265  		zap.Uint64("regionID", regionID))
   266  	return nil
   267  }
   268  
   269  const waitScatterRegionFinishBackoff = 120000
   270  
   271  // WaitScatterRegionFinish implements SplittableStore interface.
   272  // backOff is the back off time of the wait scatter region.(Milliseconds)
   273  // if backOff <= 0, the default wait scatter back off time will be used.
   274  func (s *KVStore) WaitScatterRegionFinish(ctx context.Context, regionID uint64, backOff int) error {
   275  	if backOff <= 0 {
   276  		backOff = waitScatterRegionFinishBackoff
   277  	}
   278  	logutil.BgLogger().Info("wait scatter region",
   279  		zap.Uint64("regionID", regionID), zap.Int("backoff(ms)", backOff))
   280  
   281  	bo := retry.NewBackofferWithVars(ctx, backOff, nil)
   282  	logFreq := 0
   283  	for {
   284  		resp, err := s.pdClient.GetOperator(ctx, regionID)
   285  		if err == nil && resp != nil {
   286  			if !bytes.Equal(resp.Desc, []byte("scatter-region")) || resp.Status != pdpb.OperatorStatus_RUNNING {
   287  				logutil.BgLogger().Info("wait scatter region finished",
   288  					zap.Uint64("regionID", regionID))
   289  				return nil
   290  			}
   291  			if resp.GetHeader().GetError() != nil {
   292  				err = errors.AddStack(&tikverr.PDError{
   293  					Err: resp.Header.Error,
   294  				})
   295  				logutil.BgLogger().Warn("wait scatter region error",
   296  					zap.Uint64("regionID", regionID), zap.Error(err))
   297  				return err
   298  			}
   299  			if logFreq%10 == 0 {
   300  				logutil.BgLogger().Info("wait scatter region",
   301  					zap.Uint64("regionID", regionID),
   302  					zap.String("reverse", string(resp.Desc)),
   303  					zap.String("status", pdpb.OperatorStatus_name[int32(resp.Status)]))
   304  			}
   305  			logFreq++
   306  		}
   307  		if err != nil {
   308  			err = bo.Backoff(retry.BoRegionMiss, errors.New(err.Error()))
   309  		} else {
   310  			err = bo.Backoff(retry.BoRegionMiss, errors.New("wait scatter region timeout"))
   311  		}
   312  		if err != nil {
   313  			return errors.Trace(err)
   314  		}
   315  	}
   316  }
   317  
   318  // CheckRegionInScattering uses to check whether scatter region finished.
   319  func (s *KVStore) CheckRegionInScattering(regionID uint64) (bool, error) {
   320  	bo := rangetask.NewLocateRegionBackoffer(context.Background())
   321  	for {
   322  		resp, err := s.pdClient.GetOperator(context.Background(), regionID)
   323  		if err == nil && resp != nil {
   324  			if !bytes.Equal(resp.Desc, []byte("scatter-region")) || resp.Status != pdpb.OperatorStatus_RUNNING {
   325  				return false, nil
   326  			}
   327  		}
   328  		if err != nil {
   329  			err = bo.Backoff(retry.BoRegionMiss, errors.New(err.Error()))
   330  		} else {
   331  			return true, nil
   332  		}
   333  		if err != nil {
   334  			return true, errors.Trace(err)
   335  		}
   336  	}
   337  }