github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/processor/sinkmanager/table_sink_worker.go

github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/processor/sinkmanager/table_sink_worker.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package sinkmanager
    15  
    16  import (
    17  	"context"
    18  	"sync/atomic"
    19  	"time"
    20  
    21  	"github.com/pingcap/errors"
    22  	"github.com/pingcap/failpoint"
    23  	"github.com/pingcap/log"
    24  	"github.com/pingcap/tiflow/cdc/model"
    25  	"github.com/pingcap/tiflow/cdc/processor/memquota"
    26  	"github.com/pingcap/tiflow/cdc/processor/sourcemanager"
    27  	"github.com/pingcap/tiflow/cdc/processor/sourcemanager/sorter"
    28  	"github.com/pingcap/tiflow/cdc/sink/tablesink"
    29  	"github.com/prometheus/client_golang/prometheus"
    30  	"github.com/tikv/client-go/v2/oracle"
    31  	"go.uber.org/zap"
    32  )
    33  
    34  // batchID is used to advance table sink with a given CommitTs, even if not all
    35  // transactions with the same CommitTs are collected, regardless of whether splitTxn
    36  // is enabled or not. We split transactions with the same CommitTs even if splitTxn
    37  // is false, and it won't break transaction atomicity to downstream.
    38  // NOTICE:
    39  // batchID is used to distinguish different batches of the same transaction.
    40  // We need to use a global variable because the same commit ts event may be
    41  // processed at different times.
    42  // For example:
    43  //  1. The commit ts is 1000, and the start ts is 998.
    44  //  2. Keep fetching events and flush them to the sink with batch ID 1.
    45  //  3. Because we don't have enough memory quota, we need to flush the events
    46  //     and wait for the next round of processing.
    47  //  4. The next round of processing starts at commit ts 1000, and the start ts
    48  //     is 999.
    49  //  5. The batch ID restarts from 1, and the commit ts still is 1000.
    50  //  6. We flush all the events with commit ts 1000 and batch ID 1 to the sink.
    51  //  7. We release the memory quota of the events earlier because the current
    52  //     round of processing is not finished.
    53  //
    54  // Therefore, we must use a global variable to ensure that the batch ID is
    55  // monotonically increasing.
    56  // We share this variable for all workers, it is OK that the batch ID is not
    57  // strictly increasing one by one.
    58  var batchID atomic.Uint64
    59  
    60  type sinkWorker struct {
    61  	changefeedID  model.ChangeFeedID
    62  	sourceManager *sourcemanager.SourceManager
    63  	sinkMemQuota  *memquota.MemQuota
    64  	// splitTxn indicates whether to split the transaction into multiple batches.
    65  	splitTxn bool
    66  
    67  	// Metrics.
    68  	metricOutputEventCountKV prometheus.Counter
    69  }
    70  
    71  // newSinkWorker creates a new sink worker.
    72  func newSinkWorker(
    73  	changefeedID model.ChangeFeedID,
    74  	sourceManager *sourcemanager.SourceManager,
    75  	sinkQuota *memquota.MemQuota,
    76  	splitTxn bool,
    77  ) *sinkWorker {
    78  	return &sinkWorker{
    79  		changefeedID:  changefeedID,
    80  		sourceManager: sourceManager,
    81  		sinkMemQuota:  sinkQuota,
    82  		splitTxn:      splitTxn,
    83  
    84  		metricOutputEventCountKV: outputEventCount.WithLabelValues(changefeedID.Namespace, changefeedID.ID, "kv"),
    85  	}
    86  }
    87  
    88  func (w *sinkWorker) handleTasks(ctx context.Context, taskChan <-chan *sinkTask) error {
    89  	failpoint.Inject("SinkWorkerTaskHandlePause", func() { <-ctx.Done() })
    90  	for {
    91  		select {
    92  		case <-ctx.Done():
    93  			return ctx.Err()
    94  		case task := <-taskChan:
    95  			err := w.handleTask(ctx, task)
    96  			failpoint.Inject("SinkWorkerTaskError", func() {
    97  				err = errors.New("SinkWorkerTaskError")
    98  			})
    99  			if err != nil {
   100  				return err
   101  			}
   102  		}
   103  	}
   104  }
   105  
   106  func (w *sinkWorker) handleTask(ctx context.Context, task *sinkTask) (finalErr error) {
   107  	// We need to use a new batch ID for each task.
   108  	batchID.Add(1)
   109  	advancer := newTableSinkAdvancer(task, w.splitTxn, w.sinkMemQuota, requestMemSize)
   110  	// The task is finished and some required memory isn't used.
   111  	defer advancer.cleanup()
   112  
   113  	lowerBound, upperBound := validateAndAdjustBound(
   114  		w.changefeedID,
   115  		&task.span,
   116  		task.lowerBound,
   117  		task.getUpperBound(task.tableSink.getUpperBoundTs()))
   118  	advancer.lastPos = lowerBound.Prev()
   119  
   120  	allEventCount := 0
   121  
   122  	callbackIsPerformed := false
   123  	performCallback := func(pos sorter.Position) {
   124  		if !callbackIsPerformed {
   125  			task.callback(pos)
   126  			callbackIsPerformed = true
   127  		}
   128  	}
   129  
   130  	defer func() {
   131  		// Prepare some information for stale table range cleaning.
   132  		task.tableSink.updateRangeEventCounts(newRangeEventCount(advancer.lastPos, allEventCount))
   133  
   134  		// Collect metrics.
   135  		w.metricOutputEventCountKV.Add(float64(allEventCount))
   136  
   137  		log.Debug("Sink task finished",
   138  			zap.String("namespace", w.changefeedID.Namespace),
   139  			zap.String("changefeed", w.changefeedID.ID),
   140  			zap.Stringer("span", &task.span),
   141  			zap.Any("lowerBound", lowerBound),
   142  			zap.Any("upperBound", upperBound),
   143  			zap.Bool("splitTxn", w.splitTxn),
   144  			zap.Int("receivedEvents", allEventCount),
   145  			zap.Any("lastPos", advancer.lastPos),
   146  			zap.Float64("lag", time.Since(oracle.GetTimeFromTS(advancer.lastPos.CommitTs)).Seconds()),
   147  			zap.Error(finalErr))
   148  
   149  		// Otherwise we can't ensure all events before `lastPos` are emitted.
   150  		if finalErr == nil {
   151  			performCallback(advancer.lastPos)
   152  		} else {
   153  			switch errors.Cause(finalErr).(type) {
   154  			// If it's a warning, close the table sink and wait all pending
   155  			// events have been reported. Then we can continue the table
   156  			// at the checkpoint position.
   157  			case tablesink.SinkInternalError:
   158  				// After the table sink is cleared all pending events are sent out or dropped.
   159  				// So we can re-add the table into sinkMemQuota.
   160  				w.sinkMemQuota.ClearTable(task.tableSink.span)
   161  				performCallback(advancer.lastPos)
   162  				finalErr = nil
   163  			default:
   164  			}
   165  		}
   166  	}()
   167  
   168  	// lowerBound and upperBound are both closed intervals.
   169  	iter := w.sourceManager.FetchByTable(task.span, lowerBound, upperBound, w.sinkMemQuota)
   170  	defer func() {
   171  		if err := iter.Close(); err != nil {
   172  			log.Error("sink worker fails to close iterator",
   173  				zap.String("namespace", w.changefeedID.Namespace),
   174  				zap.String("changefeed", w.changefeedID.ID),
   175  				zap.Stringer("span", &task.span),
   176  				zap.Error(err))
   177  		}
   178  	}()
   179  
   180  	// 1. We have enough memory to collect events.
   181  	// 2. The task is not canceled.
   182  	for advancer.hasEnoughMem() && !task.isCanceled() {
   183  		e, pos, err := iter.Next(ctx)
   184  		if err != nil {
   185  			return errors.Trace(err)
   186  		}
   187  
   188  		// There is no more data. It means that we finish this scan task.
   189  		if e == nil {
   190  			return advancer.finish(upperBound)
   191  		}
   192  
   193  		allEventCount += 1
   194  
   195  		// Only record the last valid position.
   196  		// If the current txn is not finished, the position is not valid.
   197  		if pos.Valid() {
   198  			advancer.lastPos = pos
   199  		}
   200  
   201  		// Meet a new commit ts, we need to emit the previous events.
   202  		advancer.tryMoveToNextTxn(e.CRTs)
   203  
   204  		// NOTICE: The event can be filtered by the event filter.
   205  		if e.Row != nil {
   206  			// For all rows, we add table replicate ts, so mysql sink can determine safe-mode.
   207  			e.Row.ReplicatingTs = task.tableSink.replicateTs
   208  			x, size := handleRowChangedEvents(w.changefeedID, task.span, e)
   209  			advancer.appendEvents(x, size)
   210  		}
   211  
   212  		if err := advancer.tryAdvanceAndAcquireMem(false, pos.Valid()); err != nil {
   213  			return errors.Trace(err)
   214  		}
   215  	}
   216  
   217  	return advancer.lastTimeAdvance()
   218  }