github.com/pingcap/tidb-lightning@v5.0.0-rc.0.20210428090220-84b649866577+incompatible/lightning/backend/localhelper.go (about) 1 // Copyright 2020 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package backend 15 16 import ( 17 "bytes" 18 "context" 19 "encoding/hex" 20 "regexp" 21 "sort" 22 "strings" 23 "time" 24 25 split "github.com/pingcap/br/pkg/restore" 26 "github.com/pingcap/br/pkg/utils" 27 "github.com/pingcap/errors" 28 sst "github.com/pingcap/kvproto/pkg/import_sstpb" 29 "github.com/pingcap/kvproto/pkg/metapb" 30 "github.com/pingcap/kvproto/pkg/pdpb" 31 "github.com/pingcap/tidb/util/codec" 32 "go.uber.org/zap" 33 34 "github.com/pingcap/tidb-lightning/lightning/log" 35 ) 36 37 const ( 38 SplitRetryTimes = 8 39 retrySplitMaxWaitTime = 4 * time.Second 40 ) 41 42 var ( 43 // the max keys count in a batch to split one region 44 maxBatchSplitKeys = 4096 45 // the base exponential backoff time 46 // the variable is only changed in unit test for running test faster. 47 splitRegionBaseBackOffTime = time.Second 48 ) 49 50 // TODO remove this file and use br internal functions 51 // This File include region split & scatter operation just like br. 52 // we can simply call br function, but we need to change some function signature of br 53 // When the ranges total size is small, we can skip the split to avoid generate empty regions. 54 func (local *local) SplitAndScatterRegionByRanges(ctx context.Context, ranges []Range, needSplit bool) error { 55 if len(ranges) == 0 { 56 return nil 57 } 58 59 minKey := codec.EncodeBytes([]byte{}, ranges[0].start) 60 maxKey := codec.EncodeBytes([]byte{}, ranges[len(ranges)-1].end) 61 62 var err error 63 scatterRegions := make([]*split.RegionInfo, 0) 64 var retryKeys [][]byte 65 waitTime := splitRegionBaseBackOffTime 66 for i := 0; i < SplitRetryTimes; i++ { 67 log.L().Info("split and scatter region", 68 log.ZapRedactBinary("minKey", minKey), 69 log.ZapRedactBinary("maxKey", maxKey), 70 zap.Int("retry", i), 71 ) 72 if i > 0 { 73 select { 74 case <-time.After(waitTime): 75 case <-ctx.Done(): 76 return ctx.Err() 77 } 78 waitTime *= 2 79 if waitTime > retrySplitMaxWaitTime { 80 waitTime = retrySplitMaxWaitTime 81 } 82 } 83 var regions []*split.RegionInfo 84 regions, err = paginateScanRegion(ctx, local.splitCli, minKey, maxKey, 128) 85 if err != nil { 86 log.L().Warn("paginate scan region failed", log.ZapRedactBinary("minKey", minKey), log.ZapRedactBinary("maxKey", maxKey), 87 log.ShortError(err), zap.Int("retry", i)) 88 continue 89 } 90 91 if len(regions) == 0 { 92 log.L().Warn("paginate scan region returns empty result", log.ZapRedactBinary("minKey", minKey), log.ZapRedactBinary("maxKey", maxKey), 93 zap.Int("retry", i)) 94 return errors.New("paginate scan region returns empty result") 95 } 96 97 log.L().Info("paginate scan region finished", log.ZapRedactBinary("minKey", minKey), log.ZapRedactBinary("maxKey", maxKey), 98 zap.Int("regions", len(regions))) 99 100 if !needSplit { 101 scatterRegions = append(scatterRegions, regions...) 102 break 103 } 104 105 regionMap := make(map[uint64]*split.RegionInfo) 106 for _, region := range regions { 107 regionMap[region.Region.GetId()] = region 108 } 109 110 var splitKeyMap map[uint64][][]byte 111 if len(retryKeys) > 0 { 112 firstKeyEnc := codec.EncodeBytes([]byte{}, retryKeys[0]) 113 lastKeyEnc := codec.EncodeBytes([]byte{}, retryKeys[len(retryKeys)-1]) 114 if bytes.Compare(firstKeyEnc, regions[0].Region.StartKey) < 0 || !beforeEnd(lastKeyEnc, regions[len(regions)-1].Region.EndKey) { 115 log.L().Warn("no valid key for split region", 116 log.ZapRedactBinary("firstKey", firstKeyEnc), log.ZapRedactBinary("lastKey", lastKeyEnc), 117 log.ZapRedactBinary("firstRegionStart", regions[0].Region.StartKey), 118 log.ZapRedactBinary("lastRegionEnd", regions[len(regions)-1].Region.EndKey)) 119 return errors.New("check split keys failed") 120 } 121 splitKeyMap = getSplitKeys(retryKeys, regions) 122 retryKeys = retryKeys[:0] 123 } else { 124 splitKeyMap = getSplitKeysByRanges(ranges, regions) 125 } 126 for regionID, keys := range splitKeyMap { 127 var newRegions []*split.RegionInfo 128 region := regionMap[regionID] 129 sort.Slice(keys, func(i, j int) bool { 130 return bytes.Compare(keys[i], keys[j]) < 0 131 }) 132 splitRegion := region 133 for j := 0; j < (len(keys)+maxBatchSplitKeys-1)/maxBatchSplitKeys; j++ { 134 start := j * maxBatchSplitKeys 135 end := utils.MinInt((j+1)*maxBatchSplitKeys, len(keys)) 136 splitRegionStart := codec.EncodeBytes([]byte{}, keys[start]) 137 splitRegionEnd := codec.EncodeBytes([]byte{}, keys[end-1]) 138 if bytes.Compare(splitRegionStart, splitRegion.Region.StartKey) < 0 || !beforeEnd(splitRegionEnd, splitRegion.Region.EndKey) { 139 log.L().Fatal("no valid key in region", 140 log.ZapRedactBinary("startKey", splitRegionStart), log.ZapRedactBinary("endKey", splitRegionEnd), 141 log.ZapRedactBinary("regionStart", splitRegion.Region.StartKey), log.ZapRedactBinary("regionEnd", splitRegion.Region.EndKey), 142 log.ZapRedactReflect("region", splitRegion)) 143 } 144 splitRegion, newRegions, err = local.BatchSplitRegions(ctx, splitRegion, keys[start:end]) 145 if err != nil { 146 if strings.Contains(err.Error(), "no valid key") { 147 for _, key := range keys { 148 log.L().Warn("no valid key", 149 log.ZapRedactBinary("startKey", region.Region.StartKey), 150 log.ZapRedactBinary("endKey", region.Region.EndKey), 151 log.ZapRedactBinary("key", codec.EncodeBytes([]byte{}, key))) 152 } 153 return errors.Trace(err) 154 } 155 log.L().Warn("split regions", log.ShortError(err), zap.Int("retry time", j+1), 156 zap.Uint64("region_id", regionID)) 157 retryKeys = append(retryKeys, keys[start:]...) 158 break 159 } else { 160 log.L().Info("batch split region", zap.Uint64("region_id", splitRegion.Region.Id), 161 zap.Int("keys", end-start), zap.Binary("firstKey", keys[start]), 162 zap.Binary("end", keys[end-1])) 163 sort.Slice(newRegions, func(i, j int) bool { 164 return bytes.Compare(newRegions[i].Region.StartKey, newRegions[j].Region.StartKey) < 0 165 }) 166 scatterRegions = append(scatterRegions, newRegions...) 167 // the region with the max start key is the region need to be further split. 168 if bytes.Compare(splitRegion.Region.StartKey, newRegions[len(newRegions)-1].Region.StartKey) < 0 { 169 splitRegion = newRegions[len(newRegions)-1] 170 } 171 } 172 } 173 } 174 if len(retryKeys) == 0 { 175 break 176 } else { 177 sort.Slice(retryKeys, func(i, j int) bool { 178 return bytes.Compare(retryKeys[i], retryKeys[j]) < 0 179 }) 180 minKey = codec.EncodeBytes([]byte{}, retryKeys[0]) 181 maxKey = codec.EncodeBytes([]byte{}, nextKey(retryKeys[len(retryKeys)-1])) 182 } 183 } 184 if err != nil { 185 return errors.Trace(err) 186 } 187 188 startTime := time.Now() 189 scatterCount := 0 190 for _, region := range scatterRegions { 191 local.waitForScatterRegion(ctx, region) 192 if time.Since(startTime) > split.ScatterWaitUpperInterval { 193 break 194 } 195 scatterCount++ 196 } 197 if scatterCount == len(scatterRegions) { 198 log.L().Info("waiting for scattering regions done", 199 zap.Int("regions", len(scatterRegions)), zap.Duration("take", time.Since(startTime))) 200 } else { 201 log.L().Info("waiting for scattering regions timeout", 202 zap.Int("scatterCount", scatterCount), 203 zap.Int("regions", len(scatterRegions)), 204 zap.Duration("take", time.Since(startTime))) 205 } 206 return nil 207 } 208 209 func paginateScanRegion( 210 ctx context.Context, client split.SplitClient, startKey, endKey []byte, limit int, 211 ) ([]*split.RegionInfo, error) { 212 if len(endKey) != 0 && bytes.Compare(startKey, endKey) >= 0 { 213 log.L().Error("startKey > endKey when paginating scan region", 214 log.ZapRedactString("startKey", hex.EncodeToString(startKey)), 215 log.ZapRedactString("endKey", hex.EncodeToString(endKey))) 216 return nil, errors.Errorf("startKey > endKey when paginating scan region") 217 } 218 219 var regions []*split.RegionInfo 220 for { 221 batch, err := client.ScanRegions(ctx, startKey, endKey, limit) 222 if err != nil { 223 return nil, errors.Trace(err) 224 } 225 regions = append(regions, batch...) 226 if len(batch) < limit { 227 // No more region 228 break 229 } 230 startKey = batch[len(batch)-1].Region.GetEndKey() 231 if len(startKey) == 0 || 232 (len(endKey) > 0 && bytes.Compare(startKey, endKey) >= 0) { 233 // All key space have scanned 234 break 235 } 236 } 237 return regions, nil 238 } 239 240 func (local *local) BatchSplitRegions(ctx context.Context, region *split.RegionInfo, keys [][]byte) (*split.RegionInfo, []*split.RegionInfo, error) { 241 region, newRegions, err := local.splitCli.BatchSplitRegionsWithOrigin(ctx, region, keys) 242 if err != nil { 243 return nil, nil, errors.Annotatef(err, "batch split regions failed") 244 } 245 var failedErr error 246 retryRegions := make([]*split.RegionInfo, 0) 247 scatterRegions := newRegions 248 waitTime := splitRegionBaseBackOffTime 249 for i := 0; i < maxRetryTimes; i++ { 250 for _, region := range scatterRegions { 251 // Wait for a while until the regions successfully splits. 252 local.waitForSplit(ctx, region.Region.Id) 253 if err = local.splitCli.ScatterRegion(ctx, region); err != nil { 254 failedErr = err 255 retryRegions = append(retryRegions, region) 256 } 257 } 258 if len(retryRegions) == 0 { 259 break 260 } 261 // the scatter operation likely fails because region replicate not finish yet 262 // pack them to one log to avoid printing a lot warn logs. 263 log.L().Warn("scatter region failed", zap.Int("regionCount", len(newRegions)), 264 zap.Int("failedCount", len(retryRegions)), zap.Error(failedErr), zap.Int("retry", i)) 265 scatterRegions = retryRegions 266 retryRegions = make([]*split.RegionInfo, 0) 267 select { 268 case <-time.After(waitTime): 269 case <-ctx.Done(): 270 return nil, nil, ctx.Err() 271 } 272 waitTime *= 2 273 } 274 275 return region, newRegions, nil 276 } 277 278 func (local *local) hasRegion(ctx context.Context, regionID uint64) (bool, error) { 279 regionInfo, err := local.splitCli.GetRegionByID(ctx, regionID) 280 if err != nil { 281 return false, err 282 } 283 return regionInfo != nil, nil 284 } 285 286 func (local *local) waitForSplit(ctx context.Context, regionID uint64) { 287 for i := 0; i < split.SplitCheckMaxRetryTimes; i++ { 288 ok, err := local.hasRegion(ctx, regionID) 289 if err != nil { 290 log.L().Info("wait for split failed", log.ShortError(err)) 291 return 292 } 293 if ok { 294 break 295 } 296 select { 297 case <-time.After(time.Second): 298 case <-ctx.Done(): 299 return 300 } 301 } 302 } 303 304 func (local *local) waitForScatterRegion(ctx context.Context, regionInfo *split.RegionInfo) { 305 regionID := regionInfo.Region.GetId() 306 for i := 0; i < split.ScatterWaitMaxRetryTimes; i++ { 307 ok, err := local.isScatterRegionFinished(ctx, regionID) 308 if err != nil { 309 log.L().Warn("scatter region failed: do not have the region", 310 log.ZapRedactStringer("region", regionInfo.Region)) 311 return 312 } 313 if ok { 314 break 315 } 316 select { 317 case <-time.After(time.Second): 318 case <-ctx.Done(): 319 return 320 } 321 } 322 } 323 324 func (local *local) isScatterRegionFinished(ctx context.Context, regionID uint64) (bool, error) { 325 resp, err := local.splitCli.GetOperator(ctx, regionID) 326 if err != nil { 327 return false, err 328 } 329 // Heartbeat may not be sent to PD 330 if respErr := resp.GetHeader().GetError(); respErr != nil { 331 if respErr.GetType() == pdpb.ErrorType_REGION_NOT_FOUND { 332 return true, nil 333 } 334 // don't return error if region replicate not complete 335 // TODO: should add a new error type to avoid this check by string matching 336 matches, _ := regexp.MatchString("region \\d+ is not fully replicated", respErr.Message) 337 if matches { 338 return false, nil 339 } 340 return false, errors.Errorf("get operator error: %s", respErr.GetType()) 341 } 342 // If the current operator of the region is not 'scatter-region', we could assume 343 // that 'scatter-operator' has finished or timeout 344 ok := string(resp.GetDesc()) != "scatter-region" || resp.GetStatus() != pdpb.OperatorStatus_RUNNING 345 return ok, nil 346 } 347 348 func getSplitKeysByRanges(ranges []Range, regions []*split.RegionInfo) map[uint64][][]byte { 349 checkKeys := make([][]byte, 0) 350 var lastEnd []byte 351 for _, rg := range ranges { 352 if !bytes.Equal(lastEnd, rg.start) { 353 checkKeys = append(checkKeys, rg.start) 354 } 355 checkKeys = append(checkKeys, rg.end) 356 lastEnd = rg.end 357 } 358 return getSplitKeys(checkKeys, regions) 359 } 360 361 func getSplitKeys(checkKeys [][]byte, regions []*split.RegionInfo) map[uint64][][]byte { 362 splitKeyMap := make(map[uint64][][]byte) 363 for _, key := range checkKeys { 364 if region := needSplit(key, regions); region != nil { 365 splitKeys, ok := splitKeyMap[region.Region.GetId()] 366 if !ok { 367 splitKeys = make([][]byte, 0, 1) 368 } 369 splitKeyMap[region.Region.GetId()] = append(splitKeys, key) 370 log.L().Debug("get key for split region", 371 zap.Binary("key", key), 372 zap.Binary("startKey", region.Region.StartKey), 373 zap.Binary("endKey", region.Region.EndKey)) 374 } 375 } 376 return splitKeyMap 377 } 378 379 // needSplit checks whether a key is necessary to split, if true returns the split region 380 func needSplit(key []byte, regions []*split.RegionInfo) *split.RegionInfo { 381 // If splitKey is the max key. 382 if len(key) == 0 { 383 return nil 384 } 385 splitKey := codec.EncodeBytes([]byte{}, key) 386 387 for _, region := range regions { 388 // If splitKey is the boundary of the region 389 if bytes.Equal(splitKey, region.Region.GetStartKey()) { 390 return nil 391 } 392 // If splitKey is in a region 393 if bytes.Compare(splitKey, region.Region.GetStartKey()) > 0 && beforeEnd(splitKey, region.Region.GetEndKey()) { 394 log.L().Debug("need split", 395 zap.Binary("splitKey", key), 396 zap.Binary("encodedKey", splitKey), 397 zap.Binary("region start", region.Region.GetStartKey()), 398 zap.Binary("region end", region.Region.GetEndKey()), 399 ) 400 return region 401 } 402 } 403 return nil 404 } 405 406 func beforeEnd(key []byte, end []byte) bool { 407 return bytes.Compare(key, end) < 0 || len(end) == 0 408 } 409 410 func insideRegion(region *metapb.Region, meta *sst.SSTMeta) bool { 411 rg := meta.GetRange() 412 return keyInsideRegion(region, rg.GetStart()) && keyInsideRegion(region, rg.GetEnd()) 413 } 414 415 func keyInsideRegion(region *metapb.Region, key []byte) bool { 416 return bytes.Compare(key, region.GetStartKey()) >= 0 && (beforeEnd(key, region.GetEndKey())) 417 } 418 419 func intersectRange(region *metapb.Region, rg Range) Range { 420 var startKey, endKey []byte 421 if len(region.StartKey) > 0 { 422 _, startKey, _ = codec.DecodeBytes(region.StartKey, []byte{}) 423 } 424 if bytes.Compare(startKey, rg.start) < 0 { 425 startKey = rg.start 426 } 427 if len(region.EndKey) > 0 { 428 _, endKey, _ = codec.DecodeBytes(region.EndKey, []byte{}) 429 } 430 if beforeEnd(rg.end, endKey) { 431 endKey = rg.end 432 } 433 434 return Range{start: startKey, end: endKey} 435 }