github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/scheduler/internal/v3/keyspan/reconciler.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package keyspan
    15  
    16  import (
    17  	"context"
    18  
    19  	"github.com/pingcap/log"
    20  	"github.com/pingcap/tiflow/cdc/model"
    21  	"github.com/pingcap/tiflow/cdc/processor/tablepb"
    22  	"github.com/pingcap/tiflow/cdc/scheduler/internal/v3/compat"
    23  	"github.com/pingcap/tiflow/cdc/scheduler/internal/v3/member"
    24  	"github.com/pingcap/tiflow/cdc/scheduler/internal/v3/replication"
    25  	"github.com/pingcap/tiflow/pkg/config"
    26  	"github.com/pingcap/tiflow/pkg/errors"
    27  	"github.com/pingcap/tiflow/pkg/pdutil"
    28  	"github.com/pingcap/tiflow/pkg/spanz"
    29  	"github.com/pingcap/tiflow/pkg/upstream"
    30  	"go.uber.org/zap"
    31  )
    32  
    33  const (
    34  	// spanRegionLimit is the maximum number of regions a span can cover.
    35  	spanRegionLimit = 50000
    36  	// baseSpanNumberCoefficient is the base coefficient that use to
    37  	// multiply the number of captures to get the number of spans.
    38  	baseSpanNumberCoefficient = 3
    39  )
    40  
    41  type splitter interface {
    42  	split(
    43  		ctx context.Context, span tablepb.Span, totalCaptures int,
    44  	) []tablepb.Span
    45  }
    46  
    47  type splittedSpans struct {
    48  	byAddTable bool
    49  	spans      []tablepb.Span
    50  }
    51  
    52  // Reconciler reconciles span and table mapping, make sure spans are in
    53  // a desired state and covers all table ranges.
    54  type Reconciler struct {
    55  	tableSpans map[model.TableID]splittedSpans
    56  	spanCache  []tablepb.Span
    57  
    58  	changefeedID model.ChangeFeedID
    59  	config       *config.ChangefeedSchedulerConfig
    60  
    61  	splitter []splitter
    62  }
    63  
    64  // NewReconciler returns a Reconciler.
    65  func NewReconciler(
    66  	changefeedID model.ChangeFeedID,
    67  	up *upstream.Upstream,
    68  	config *config.ChangefeedSchedulerConfig,
    69  ) (*Reconciler, error) {
    70  	pdapi, err := pdutil.NewPDAPIClient(up.PDClient, up.SecurityConfig)
    71  	if err != nil {
    72  		return nil, errors.Trace(err)
    73  	}
    74  	return &Reconciler{
    75  		tableSpans:   make(map[int64]splittedSpans),
    76  		changefeedID: changefeedID,
    77  		config:       config,
    78  		splitter: []splitter{
    79  			// write splitter has the highest priority.
    80  			newWriteSplitter(changefeedID, pdapi, config.WriteKeyThreshold),
    81  			newRegionCountSplitter(changefeedID, up.RegionCache, config.RegionThreshold),
    82  		},
    83  	}, nil
    84  }
    85  
    86  // Reconcile spans that need to be replicated based on current cluster status.
    87  // It handles following cases:
    88  // 1. Changefeed initialization
    89  // 2. Owner switch.
    90  // 3. Owner switch after some captures fail.
    91  // 4. Add table by DDL.
    92  // 5. Drop table by DDL.
    93  // 6. Some captures fail, does NOT affect spans.
    94  func (m *Reconciler) Reconcile(
    95  	ctx context.Context,
    96  	currentTables *replication.TableRanges,
    97  	replications *spanz.BtreeMap[*replication.ReplicationSet],
    98  	aliveCaptures map[model.CaptureID]*member.CaptureStatus,
    99  	compat *compat.Compat,
   100  ) []tablepb.Span {
   101  	tablesLenEqual := currentTables.Len() == len(m.tableSpans)
   102  	allTablesFound := true
   103  	updateCache := false
   104  	currentTables.Iter(func(tableID model.TableID, tableStart, tableEnd tablepb.Span) bool {
   105  		if _, ok := m.tableSpans[tableID]; !ok {
   106  			// Find a new table.
   107  			allTablesFound = false
   108  			updateCache = true
   109  		}
   110  
   111  		// Reconcile spans from current replications.
   112  		coveredSpans, holes := replications.FindHoles(tableStart, tableEnd)
   113  		if len(coveredSpans) == 0 {
   114  			// No such spans in replications.
   115  			if _, ok := m.tableSpans[tableID]; ok {
   116  				// And we have seen such spans before, it means these spans are
   117  				// not yet be scheduled due to basic scheduler's batch add task
   118  				// rate limit.
   119  				return true
   120  			}
   121  			// And we have not seen such spans before, maybe:
   122  			// 1. it's a table being added when starting a changefeed
   123  			//    or after owner switch.
   124  			// 4. it's a new table being created by DDL when a changefeed is running.
   125  			tableSpan := spanz.TableIDToComparableSpan(tableID)
   126  			spans := []tablepb.Span{tableSpan}
   127  			if compat.CheckSpanReplicationEnabled() {
   128  				for _, splitter := range m.splitter {
   129  					spans = splitter.split(ctx, tableSpan, len(aliveCaptures))
   130  					if len(spans) > 1 {
   131  						break
   132  					}
   133  				}
   134  			}
   135  			m.tableSpans[tableID] = splittedSpans{
   136  				byAddTable: true,
   137  				spans:      spans,
   138  			}
   139  			updateCache = true
   140  		} else if len(holes) != 0 {
   141  			// There are some holes in the table span, maybe:
   142  			if spans, ok := m.tableSpans[tableID]; ok && spans.byAddTable {
   143  				// These spans are split by reconciler add table. It may be
   144  				// still in progress because of basic scheduler rate limit.
   145  				return true
   146  			}
   147  			// 3. owner switch after some captures failed.
   148  			log.Info("schedulerv3: detect owner switch after captures fail",
   149  				zap.String("changefeed", m.changefeedID.ID),
   150  				zap.String("namespace", m.changefeedID.Namespace),
   151  				zap.Int64("tableID", tableID),
   152  				zap.Int("holes", len(holes)),
   153  				zap.String("spanStart", tableStart.String()),
   154  				zap.String("spanEnd", tableEnd.String()),
   155  				zap.String("foundStart", coveredSpans[0].String()),
   156  				zap.String("foundEnd", coveredSpans[len(coveredSpans)-1].String()))
   157  			spans := make([]tablepb.Span, 0, len(coveredSpans)+len(holes))
   158  			spans = append(spans, coveredSpans...)
   159  			for _, s := range holes {
   160  				spans = append(spans, tablepb.Span{
   161  					TableID:  tableID,
   162  					StartKey: s.StartKey,
   163  					EndKey:   s.EndKey,
   164  				})
   165  				// TODO: maybe we should split holes too.
   166  			}
   167  			m.tableSpans[tableID] = splittedSpans{
   168  				byAddTable: false,
   169  				spans:      spans,
   170  			}
   171  			updateCache = true
   172  		} else {
   173  			// Found and no hole, maybe:
   174  			// 2. owner switch and no capture fails.
   175  			ss := m.tableSpans[tableID]
   176  			ss.byAddTable = false
   177  			ss.spans = ss.spans[:0]
   178  			ss.spans = append(ss.spans, coveredSpans...)
   179  			m.tableSpans[tableID] = ss
   180  		}
   181  		return true
   182  	})
   183  
   184  	// 4. Drop table by DDL.
   185  	// For most of the time, remove tables are unlikely to happen.
   186  	//
   187  	// Fast path for check whether two sets are identical:
   188  	// If the length of currentTables and tableSpan are equal,
   189  	// and for all tables in currentTables have a record in tableSpan.
   190  	if !tablesLenEqual || !allTablesFound {
   191  		// The two sets are not identical. We need to find removed tables.
   192  		// Build a tableID hash set to improve performance.
   193  		currentTableSet := make(map[model.TableID]struct{}, currentTables.Len())
   194  		currentTables.Iter(func(tableID model.TableID, _, _ tablepb.Span) bool {
   195  			currentTableSet[tableID] = struct{}{}
   196  			return true
   197  		})
   198  		for tableID := range m.tableSpans {
   199  			_, ok := currentTableSet[tableID]
   200  			if !ok {
   201  				// Found dropped table.
   202  				delete(m.tableSpans, tableID)
   203  				updateCache = true
   204  			}
   205  		}
   206  	}
   207  
   208  	if updateCache {
   209  		m.spanCache = make([]tablepb.Span, 0)
   210  		for _, ss := range m.tableSpans {
   211  			m.spanCache = append(m.spanCache, ss.spans...)
   212  		}
   213  	}
   214  	return m.spanCache
   215  }
   216  
   217  const maxSpanNumber = 100
   218  
   219  func getSpansNumber(regionNum, captureNum int) int {
   220  	coefficient := captureNum - 1
   221  	if baseSpanNumberCoefficient > coefficient {
   222  		coefficient = baseSpanNumberCoefficient
   223  	}
   224  	spanNum := 1
   225  	if regionNum > 1 {
   226  		// spanNumber = max(captureNum * coefficient, totalRegions / spanRegionLimit)
   227  		spanNum = captureNum * coefficient
   228  		if regionNum/spanRegionLimit > spanNum {
   229  			spanNum = regionNum / spanRegionLimit
   230  		}
   231  	}
   232  	if spanNum > maxSpanNumber {
   233  		spanNum = maxSpanNumber
   234  	}
   235  	return spanNum
   236  }