github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/scheduler/internal/v3/keyspan/splitter_region_count.go (about)

     1  // Copyright 2023 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package keyspan
    15  
    16  import (
    17  	"bytes"
    18  	"context"
    19  	"math"
    20  
    21  	"github.com/pingcap/log"
    22  	"github.com/pingcap/tiflow/cdc/model"
    23  	"github.com/pingcap/tiflow/cdc/processor/tablepb"
    24  	"github.com/tikv/client-go/v2/tikv"
    25  	"go.uber.org/zap"
    26  )
    27  
    28  type regionCountSplitter struct {
    29  	changefeedID    model.ChangeFeedID
    30  	regionCache     RegionCache
    31  	regionThreshold int
    32  }
    33  
    34  func newRegionCountSplitter(
    35  	changefeedID model.ChangeFeedID, regionCache RegionCache, regionThreshold int,
    36  ) *regionCountSplitter {
    37  	return &regionCountSplitter{
    38  		changefeedID:    changefeedID,
    39  		regionCache:     regionCache,
    40  		regionThreshold: regionThreshold,
    41  	}
    42  }
    43  
    44  func (m *regionCountSplitter) split(
    45  	ctx context.Context, span tablepb.Span, captureNum int,
    46  ) []tablepb.Span {
    47  	bo := tikv.NewBackoffer(ctx, 500)
    48  	regions, err := m.regionCache.ListRegionIDsInKeyRange(bo, span.StartKey, span.EndKey)
    49  	if err != nil {
    50  		log.Warn("schedulerv3: list regions failed, skip split span",
    51  			zap.String("namespace", m.changefeedID.Namespace),
    52  			zap.String("changefeed", m.changefeedID.ID),
    53  			zap.String("span", span.String()),
    54  			zap.Error(err))
    55  		return []tablepb.Span{span}
    56  	}
    57  	if len(regions) <= m.regionThreshold || captureNum == 0 {
    58  		log.Info("schedulerv3: skip split span by region count",
    59  			zap.String("namespace", m.changefeedID.Namespace),
    60  			zap.String("changefeed", m.changefeedID.ID),
    61  			zap.String("span", span.String()),
    62  			zap.Int("totalCaptures", captureNum),
    63  			zap.Int("regionCount", len(regions)),
    64  			zap.Int("regionThreshold", m.regionThreshold))
    65  		return []tablepb.Span{span}
    66  	}
    67  
    68  	stepper := newEvenlySplitStepper(
    69  		getSpansNumber(len(regions), captureNum),
    70  		len(regions))
    71  
    72  	spans := make([]tablepb.Span, 0, stepper.SpanCount())
    73  	start, end := 0, stepper.Step()
    74  	for {
    75  		startRegion, err := m.regionCache.LocateRegionByID(bo, regions[start])
    76  		if err != nil {
    77  			log.Warn("schedulerv3: get regions failed, skip split span",
    78  				zap.String("namespace", m.changefeedID.Namespace),
    79  				zap.String("changefeed", m.changefeedID.ID),
    80  				zap.String("span", span.String()),
    81  				zap.Error(err))
    82  			return []tablepb.Span{span}
    83  		}
    84  		endRegion, err := m.regionCache.LocateRegionByID(bo, regions[end-1])
    85  		if err != nil {
    86  			log.Warn("schedulerv3: get regions failed, skip split span",
    87  				zap.String("namespace", m.changefeedID.Namespace),
    88  				zap.String("changefeed", m.changefeedID.ID),
    89  				zap.String("span", span.String()),
    90  				zap.Error(err))
    91  			return []tablepb.Span{span}
    92  		}
    93  		if len(spans) > 0 &&
    94  			bytes.Compare(spans[len(spans)-1].EndKey, startRegion.StartKey) > 0 {
    95  			log.Warn("schedulerv3: list region out of order detected",
    96  				zap.String("namespace", m.changefeedID.Namespace),
    97  				zap.String("changefeed", m.changefeedID.ID),
    98  				zap.String("span", span.String()),
    99  				zap.Stringer("lastSpan", &spans[len(spans)-1]),
   100  				zap.Stringer("region", startRegion))
   101  			return []tablepb.Span{span}
   102  		}
   103  		spans = append(spans, tablepb.Span{
   104  			TableID:  span.TableID,
   105  			StartKey: startRegion.StartKey,
   106  			EndKey:   endRegion.EndKey,
   107  		})
   108  
   109  		if end == len(regions) {
   110  			break
   111  		}
   112  		start = end
   113  		step := stepper.Step()
   114  		if end+step < len(regions) {
   115  			end = end + step
   116  		} else {
   117  			end = len(regions)
   118  		}
   119  	}
   120  	// Make sure spans does not exceed [startKey, endKey).
   121  	spans[0].StartKey = span.StartKey
   122  	spans[len(spans)-1].EndKey = span.EndKey
   123  	log.Info("schedulerv3: split span by region count",
   124  		zap.String("namespace", m.changefeedID.Namespace),
   125  		zap.String("changefeed", m.changefeedID.ID),
   126  		zap.String("span", span.String()),
   127  		zap.Int("spans", len(spans)),
   128  		zap.Int("totalCaptures", captureNum),
   129  		zap.Int("regionCount", len(regions)),
   130  		zap.Int("regionThreshold", m.regionThreshold),
   131  		zap.Int("spanRegionLimit", spanRegionLimit))
   132  	return spans
   133  }
   134  
   135  type evenlySplitStepper struct {
   136  	spanCount          int
   137  	regionPerSpan      int
   138  	extraRegionPerSpan int
   139  	remain             int
   140  }
   141  
   142  func newEvenlySplitStepper(pages int, totalRegion int) evenlySplitStepper {
   143  	extraRegionPerSpan := 0
   144  	regionPerSpan, remain := totalRegion/pages, totalRegion%pages
   145  	if regionPerSpan == 0 {
   146  		regionPerSpan = 1
   147  		extraRegionPerSpan = 0
   148  		pages = totalRegion
   149  	} else if remain != 0 {
   150  		// Evenly distributes the remaining regions.
   151  		extraRegionPerSpan = int(math.Ceil(float64(remain) / float64(pages)))
   152  	}
   153  	res := evenlySplitStepper{
   154  		regionPerSpan:      regionPerSpan,
   155  		spanCount:          pages,
   156  		extraRegionPerSpan: extraRegionPerSpan,
   157  		remain:             remain,
   158  	}
   159  	log.Info("schedulerv3: evenly split stepper", zap.Any("evenlySplitStepper", res))
   160  	return res
   161  }
   162  
   163  func (e *evenlySplitStepper) SpanCount() int {
   164  	return e.spanCount
   165  }
   166  
   167  func (e *evenlySplitStepper) Step() int {
   168  	if e.remain <= 0 {
   169  		return e.regionPerSpan
   170  	}
   171  	e.remain = e.remain - e.extraRegionPerSpan
   172  	return e.regionPerSpan + e.extraRegionPerSpan
   173  }