github.com/KinWaiYuen/client-go/v2@v2.5.4/tikv/split_region.go (about) 1 // Copyright 2021 TiKV Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // NOTE: The code in this file is based on code from the 16 // TiDB project, licensed under the Apache License v 2.0 17 // 18 // https://github.com/pingcap/tidb/tree/cc5e161ac06827589c4966674597c137cc9e809c/store/tikv/split_region.go 19 // 20 21 // Copyright 2017 PingCAP, Inc. 22 // 23 // Licensed under the Apache License, Version 2.0 (the "License"); 24 // you may not use this file except in compliance with the License. 25 // You may obtain a copy of the License at 26 // 27 // http://www.apache.org/licenses/LICENSE-2.0 28 // 29 // Unless required by applicable law or agreed to in writing, software 30 // distributed under the License is distributed on an "AS IS" BASIS, 31 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 32 // See the License for the specific language governing permissions and 33 // limitations under the License. 34 35 package tikv 36 37 import ( 38 "bytes" 39 "context" 40 "fmt" 41 "math" 42 43 tikverr "github.com/KinWaiYuen/client-go/v2/error" 44 "github.com/KinWaiYuen/client-go/v2/internal/client" 45 "github.com/KinWaiYuen/client-go/v2/internal/kvrpc" 46 "github.com/KinWaiYuen/client-go/v2/internal/locate" 47 "github.com/KinWaiYuen/client-go/v2/internal/logutil" 48 "github.com/KinWaiYuen/client-go/v2/internal/retry" 49 "github.com/KinWaiYuen/client-go/v2/kv" 50 "github.com/KinWaiYuen/client-go/v2/tikvrpc" 51 "github.com/KinWaiYuen/client-go/v2/txnkv/rangetask" 52 "github.com/KinWaiYuen/client-go/v2/util" 53 "github.com/pingcap/errors" 54 "github.com/pingcap/kvproto/pkg/kvrpcpb" 55 "github.com/pingcap/kvproto/pkg/metapb" 56 "github.com/pingcap/kvproto/pkg/pdpb" 57 pd "github.com/tikv/pd/client" 58 "go.uber.org/zap" 59 ) 60 61 const splitBatchRegionLimit = 2048 62 63 func equalRegionStartKey(key, regionStartKey []byte) bool { 64 return bytes.Equal(key, regionStartKey) 65 } 66 67 func (s *KVStore) splitBatchRegionsReq(bo *Backoffer, keys [][]byte, scatter bool, tableID *int64) (*tikvrpc.Response, error) { 68 // equalRegionStartKey is used to filter split keys. 69 // If the split key is equal to the start key of the region, then the key has been split, we need to skip the split key. 70 groups, _, err := s.regionCache.GroupKeysByRegion(bo, keys, equalRegionStartKey) 71 if err != nil { 72 return nil, errors.Trace(err) 73 } 74 75 var batches []kvrpc.Batch 76 for regionID, groupKeys := range groups { 77 batches = kvrpc.AppendKeyBatches(batches, regionID, groupKeys, splitBatchRegionLimit) 78 } 79 80 if len(batches) == 0 { 81 return nil, nil 82 } 83 // The first time it enters this function. 84 if bo.GetTotalSleep() == 0 { 85 logutil.BgLogger().Info("split batch regions request", 86 zap.Int("split key count", len(keys)), 87 zap.Int("batch count", len(batches)), 88 zap.Uint64("first batch, region ID", batches[0].RegionID.GetID()), 89 zap.String("first split key", kv.StrKey(batches[0].Keys[0]))) 90 } 91 if len(batches) == 1 { 92 resp := s.batchSendSingleRegion(bo, batches[0], scatter, tableID) 93 return resp.Response, errors.Trace(resp.Error) 94 } 95 ch := make(chan kvrpc.BatchResult, len(batches)) 96 for _, batch1 := range batches { 97 go func(b kvrpc.Batch) { 98 backoffer, cancel := bo.Fork() 99 defer cancel() 100 101 util.WithRecovery(func() { 102 select { 103 case ch <- s.batchSendSingleRegion(backoffer, b, scatter, tableID): 104 case <-bo.GetCtx().Done(): 105 ch <- kvrpc.BatchResult{Error: bo.GetCtx().Err()} 106 } 107 }, func(r interface{}) { 108 if r != nil { 109 ch <- kvrpc.BatchResult{Error: errors.Errorf("%v", r)} 110 } 111 }) 112 }(batch1) 113 } 114 115 srResp := &kvrpcpb.SplitRegionResponse{Regions: make([]*metapb.Region, 0, len(keys)*2)} 116 for i := 0; i < len(batches); i++ { 117 batchResp := <-ch 118 if batchResp.Error != nil { 119 logutil.BgLogger().Info("batch split regions failed", zap.Error(batchResp.Error)) 120 if err == nil { 121 err = batchResp.Error 122 } 123 } 124 125 // If the split succeeds and the scatter fails, we also need to add the region IDs. 126 if batchResp.Response != nil { 127 spResp := batchResp.Resp.(*kvrpcpb.SplitRegionResponse) 128 regions := spResp.GetRegions() 129 srResp.Regions = append(srResp.Regions, regions...) 130 } 131 } 132 return &tikvrpc.Response{Resp: srResp}, errors.Trace(err) 133 } 134 135 func (s *KVStore) batchSendSingleRegion(bo *Backoffer, batch kvrpc.Batch, scatter bool, tableID *int64) kvrpc.BatchResult { 136 if val, err := util.EvalFailpoint("mockSplitRegionTimeout"); err == nil { 137 if val.(bool) { 138 if _, ok := bo.GetCtx().Deadline(); ok { 139 <-bo.GetCtx().Done() 140 } 141 } 142 } 143 144 req := tikvrpc.NewRequest(tikvrpc.CmdSplitRegion, &kvrpcpb.SplitRegionRequest{ 145 SplitKeys: batch.Keys, 146 }, kvrpcpb.Context{ 147 Priority: kvrpcpb.CommandPri_Normal, 148 }) 149 150 sender := locate.NewRegionRequestSender(s.regionCache, s.GetTiKVClient()) 151 resp, err := sender.SendReq(bo, req, batch.RegionID, client.ReadTimeoutShort) 152 153 batchResp := kvrpc.BatchResult{Response: resp} 154 if err != nil { 155 batchResp.Error = errors.Trace(err) 156 return batchResp 157 } 158 regionErr, err := resp.GetRegionError() 159 if err != nil { 160 batchResp.Error = errors.Trace(err) 161 return batchResp 162 } 163 if regionErr != nil { 164 err := bo.Backoff(retry.BoRegionMiss, errors.New(regionErr.String())) 165 if err != nil { 166 batchResp.Error = errors.Trace(err) 167 return batchResp 168 } 169 resp, err = s.splitBatchRegionsReq(bo, batch.Keys, scatter, tableID) 170 batchResp.Response = resp 171 batchResp.Error = err 172 return batchResp 173 } 174 175 spResp := resp.Resp.(*kvrpcpb.SplitRegionResponse) 176 regions := spResp.GetRegions() 177 if len(regions) > 0 { 178 // Divide a region into n, one of them may not need to be scattered, 179 // so n-1 needs to be scattered to other stores. 180 spResp.Regions = regions[:len(regions)-1] 181 } 182 var newRegionLeft string 183 if len(spResp.Regions) > 0 { 184 newRegionLeft = logutil.Hex(spResp.Regions[0]).String() 185 } 186 logutil.BgLogger().Info("batch split regions complete", 187 zap.Uint64("batch region ID", batch.RegionID.GetID()), 188 zap.String("first at", kv.StrKey(batch.Keys[0])), 189 zap.String("first new region left", newRegionLeft), 190 zap.Int("new region count", len(spResp.Regions))) 191 192 if !scatter { 193 return batchResp 194 } 195 196 for i, r := range spResp.Regions { 197 if err = s.scatterRegion(bo, r.Id, tableID); err == nil { 198 logutil.BgLogger().Info("batch split regions, scatter region complete", 199 zap.Uint64("batch region ID", batch.RegionID.GetID()), 200 zap.String("at", kv.StrKey(batch.Keys[i])), 201 zap.Stringer("new region left", logutil.Hex(r))) 202 continue 203 } 204 205 logutil.BgLogger().Info("batch split regions, scatter region failed", 206 zap.Uint64("batch region ID", batch.RegionID.GetID()), 207 zap.String("at", kv.StrKey(batch.Keys[i])), 208 zap.Stringer("new region left", logutil.Hex(r)), 209 zap.Error(err)) 210 if batchResp.Error == nil { 211 batchResp.Error = err 212 } 213 if _, ok := err.(*tikverr.ErrPDServerTimeout); ok { 214 break 215 } 216 } 217 return batchResp 218 } 219 220 const ( 221 splitRegionBackoff = 20000 222 maxSplitRegionsBackoff = 120000 223 ) 224 225 // SplitRegions splits regions by splitKeys. 226 func (s *KVStore) SplitRegions(ctx context.Context, splitKeys [][]byte, scatter bool, tableID *int64) (regionIDs []uint64, err error) { 227 bo := retry.NewBackofferWithVars(ctx, int(math.Min(float64(len(splitKeys))*splitRegionBackoff, maxSplitRegionsBackoff)), nil) 228 resp, err := s.splitBatchRegionsReq(bo, splitKeys, scatter, tableID) 229 regionIDs = make([]uint64, 0, len(splitKeys)) 230 if resp != nil && resp.Resp != nil { 231 spResp := resp.Resp.(*kvrpcpb.SplitRegionResponse) 232 for _, r := range spResp.Regions { 233 regionIDs = append(regionIDs, r.Id) 234 } 235 logutil.BgLogger().Info("split regions complete", zap.Int("region count", len(regionIDs)), zap.Uint64s("region IDs", regionIDs)) 236 } 237 return regionIDs, errors.Trace(err) 238 } 239 240 func (s *KVStore) scatterRegion(bo *Backoffer, regionID uint64, tableID *int64) error { 241 logutil.BgLogger().Info("start scatter region", 242 zap.Uint64("regionID", regionID)) 243 for { 244 opts := make([]pd.RegionsOption, 0, 1) 245 if tableID != nil { 246 opts = append(opts, pd.WithGroup(fmt.Sprintf("%v", *tableID))) 247 } 248 _, err := s.pdClient.ScatterRegions(bo.GetCtx(), []uint64{regionID}, opts...) 249 250 if val, err2 := util.EvalFailpoint("mockScatterRegionTimeout"); err2 == nil { 251 if val.(bool) { 252 err = tikverr.NewErrPDServerTimeout("") 253 } 254 } 255 256 if err == nil { 257 break 258 } 259 err = bo.Backoff(retry.BoPDRPC, errors.New(err.Error())) 260 if err != nil { 261 return errors.Trace(err) 262 } 263 } 264 logutil.BgLogger().Debug("scatter region complete", 265 zap.Uint64("regionID", regionID)) 266 return nil 267 } 268 269 const waitScatterRegionFinishBackoff = 120000 270 271 // WaitScatterRegionFinish implements SplittableStore interface. 272 // backOff is the back off time of the wait scatter region.(Milliseconds) 273 // if backOff <= 0, the default wait scatter back off time will be used. 274 func (s *KVStore) WaitScatterRegionFinish(ctx context.Context, regionID uint64, backOff int) error { 275 if backOff <= 0 { 276 backOff = waitScatterRegionFinishBackoff 277 } 278 logutil.BgLogger().Info("wait scatter region", 279 zap.Uint64("regionID", regionID), zap.Int("backoff(ms)", backOff)) 280 281 bo := retry.NewBackofferWithVars(ctx, backOff, nil) 282 logFreq := 0 283 for { 284 resp, err := s.pdClient.GetOperator(ctx, regionID) 285 if err == nil && resp != nil { 286 if !bytes.Equal(resp.Desc, []byte("scatter-region")) || resp.Status != pdpb.OperatorStatus_RUNNING { 287 logutil.BgLogger().Info("wait scatter region finished", 288 zap.Uint64("regionID", regionID)) 289 return nil 290 } 291 if resp.GetHeader().GetError() != nil { 292 err = errors.AddStack(&tikverr.PDError{ 293 Err: resp.Header.Error, 294 }) 295 logutil.BgLogger().Warn("wait scatter region error", 296 zap.Uint64("regionID", regionID), zap.Error(err)) 297 return err 298 } 299 if logFreq%10 == 0 { 300 logutil.BgLogger().Info("wait scatter region", 301 zap.Uint64("regionID", regionID), 302 zap.String("reverse", string(resp.Desc)), 303 zap.String("status", pdpb.OperatorStatus_name[int32(resp.Status)])) 304 } 305 logFreq++ 306 } 307 if err != nil { 308 err = bo.Backoff(retry.BoRegionMiss, errors.New(err.Error())) 309 } else { 310 err = bo.Backoff(retry.BoRegionMiss, errors.New("wait scatter region timeout")) 311 } 312 if err != nil { 313 return errors.Trace(err) 314 } 315 } 316 } 317 318 // CheckRegionInScattering uses to check whether scatter region finished. 319 func (s *KVStore) CheckRegionInScattering(regionID uint64) (bool, error) { 320 bo := rangetask.NewLocateRegionBackoffer(context.Background()) 321 for { 322 resp, err := s.pdClient.GetOperator(context.Background(), regionID) 323 if err == nil && resp != nil { 324 if !bytes.Equal(resp.Desc, []byte("scatter-region")) || resp.Status != pdpb.OperatorStatus_RUNNING { 325 return false, nil 326 } 327 } 328 if err != nil { 329 err = bo.Backoff(retry.BoRegionMiss, errors.New(err.Error())) 330 } else { 331 return true, nil 332 } 333 if err != nil { 334 return true, errors.Trace(err) 335 } 336 } 337 }