github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/sink/tablesink/table_sink_impl.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package tablesink
    15  
    16  import (
    17  	"sort"
    18  	"sync"
    19  
    20  	"github.com/pingcap/log"
    21  	"github.com/pingcap/tiflow/cdc/model"
    22  	"github.com/pingcap/tiflow/cdc/processor/tablepb"
    23  	"github.com/pingcap/tiflow/cdc/sink/dmlsink"
    24  	"github.com/pingcap/tiflow/cdc/sink/tablesink/state"
    25  	"github.com/pingcap/tiflow/pkg/pdutil"
    26  	"github.com/prometheus/client_golang/prometheus"
    27  	"github.com/tikv/client-go/v2/oracle"
    28  	"go.uber.org/zap"
    29  )
    30  
    31  // Assert TableSink implementation
    32  var (
    33  	_ TableSink = (*EventTableSink[*model.RowChangedEvent, *dmlsink.RowChangeEventAppender])(nil)
    34  	_ TableSink = (*EventTableSink[*model.SingleTableTxn, *dmlsink.TxnEventAppender])(nil)
    35  )
    36  
    37  // LastSyncedTsRecord is used to record the last synced ts of table sink with lock
    38  // lastSyncedTs means the biggest commits of the events
    39  // that have been flushed to the downstream.
    40  type LastSyncedTsRecord struct {
    41  	sync.Mutex
    42  	lastSyncedTs model.Ts
    43  }
    44  
    45  // getLastSyncedTs get value from LastSyncedTsRecord
    46  func (r *LastSyncedTsRecord) getLastSyncedTs() model.Ts {
    47  	r.Lock()
    48  	defer r.Unlock()
    49  	return r.lastSyncedTs
    50  }
    51  
    52  // EventTableSink is a table sink that can write events.
    53  type EventTableSink[E dmlsink.TableEvent, P dmlsink.Appender[E]] struct {
    54  	changefeedID model.ChangeFeedID
    55  	span         tablepb.Span
    56  	// startTs is the initial checkpointTs of the table sink.
    57  	startTs model.Ts
    58  
    59  	maxResolvedTs   model.ResolvedTs
    60  	backendSink     dmlsink.EventSink[E]
    61  	progressTracker *progressTracker
    62  	eventAppender   P
    63  	pdClock         pdutil.Clock
    64  	// NOTICE: It is ordered by commitTs.
    65  	eventBuffer []E
    66  	state       state.TableSinkState
    67  
    68  	lastSyncedTs LastSyncedTsRecord
    69  
    70  	// For dataflow metrics.
    71  	metricsTableSinkTotalRows prometheus.Counter
    72  
    73  	metricsTableSinkFlushLagDuration prometheus.Observer
    74  }
    75  
    76  // New an eventTableSink with given backendSink and event appender.
    77  func New[E dmlsink.TableEvent, P dmlsink.Appender[E]](
    78  	changefeedID model.ChangeFeedID,
    79  	span tablepb.Span,
    80  	startTs model.Ts,
    81  	backendSink dmlsink.EventSink[E],
    82  	appender P,
    83  	pdClock pdutil.Clock,
    84  	totalRowsCounter prometheus.Counter,
    85  	flushLagDuration prometheus.Observer,
    86  ) *EventTableSink[E, P] {
    87  	return &EventTableSink[E, P]{
    88  		changefeedID:                     changefeedID,
    89  		span:                             span,
    90  		startTs:                          startTs,
    91  		maxResolvedTs:                    model.NewResolvedTs(0),
    92  		backendSink:                      backendSink,
    93  		progressTracker:                  newProgressTracker(span, defaultBufferSize),
    94  		eventAppender:                    appender,
    95  		pdClock:                          pdClock,
    96  		eventBuffer:                      make([]E, 0, 1024),
    97  		state:                            state.TableSinkSinking,
    98  		lastSyncedTs:                     LastSyncedTsRecord{lastSyncedTs: startTs},
    99  		metricsTableSinkTotalRows:        totalRowsCounter,
   100  		metricsTableSinkFlushLagDuration: flushLagDuration,
   101  	}
   102  }
   103  
   104  // AppendRowChangedEvents appends row changed or txn events to the table sink.
   105  func (e *EventTableSink[E, P]) AppendRowChangedEvents(rows ...*model.RowChangedEvent) {
   106  	e.eventBuffer = e.eventAppender.Append(e.eventBuffer, rows...)
   107  	e.metricsTableSinkTotalRows.Add(float64(len(rows)))
   108  }
   109  
   110  // UpdateResolvedTs advances the resolved ts of the table sink.
   111  func (e *EventTableSink[E, P]) UpdateResolvedTs(resolvedTs model.ResolvedTs) error {
   112  	// If resolvedTs is not greater than maxResolvedTs,
   113  	// the flush is unnecessary.
   114  	if e.maxResolvedTs.EqualOrGreater(resolvedTs) {
   115  		return nil
   116  	}
   117  	e.maxResolvedTs = resolvedTs
   118  
   119  	i := sort.Search(len(e.eventBuffer), func(i int) bool {
   120  		return e.eventBuffer[i].GetCommitTs() > resolvedTs.Ts
   121  	})
   122  	// Despite the lack of data, we have to move forward with progress.
   123  	if i == 0 {
   124  		// WriteEvents must be called to check whether the backend sink is dead
   125  		// or not, even if there is no more events. So if the backend is dead
   126  		// and re-initialized, we can know it and re-build a table sink.
   127  		e.progressTracker.addResolvedTs(resolvedTs)
   128  		if err := e.backendSink.WriteEvents(); err != nil {
   129  			return SinkInternalError{err}
   130  		}
   131  		return nil
   132  	}
   133  	resolvedEvents := e.eventBuffer[:i]
   134  
   135  	// We have to create a new slice for the rest of the elements,
   136  	// otherwise we cannot GC the flushed values as soon as possible.
   137  	e.eventBuffer = append(make([]E, 0, len(e.eventBuffer[i:])), e.eventBuffer[i:]...)
   138  
   139  	resolvedCallbackableEvents := make([]*dmlsink.CallbackableEvent[E], 0, len(resolvedEvents))
   140  	for _, ev := range resolvedEvents {
   141  		if err := ev.TrySplitAndSortUpdateEvent(e.backendSink.Scheme()); err != nil {
   142  			return SinkInternalError{err}
   143  		}
   144  		// We have to record the event ID for the callback.
   145  		postEventFlushFunc := e.progressTracker.addEvent()
   146  		evCommitTs := ev.GetCommitTs()
   147  		phyCommitTs := oracle.ExtractPhysical(evCommitTs)
   148  		ce := &dmlsink.CallbackableEvent[E]{
   149  			Event: ev,
   150  			Callback: func() {
   151  				// Due to multi workers will call this callback concurrently,
   152  				// we need to add lock to protect lastSyncedTs
   153  				// we need make a performance test for it
   154  				{
   155  					e.lastSyncedTs.Lock()
   156  					defer e.lastSyncedTs.Unlock()
   157  
   158  					if e.lastSyncedTs.lastSyncedTs < evCommitTs {
   159  						e.lastSyncedTs.lastSyncedTs = evCommitTs
   160  					}
   161  				}
   162  				pdTime := e.pdClock.CurrentTime()
   163  				currentTs := oracle.GetPhysical(pdTime)
   164  				flushLag := float64(currentTs-phyCommitTs) / 1e3
   165  				e.metricsTableSinkFlushLagDuration.Observe(flushLag)
   166  				postEventFlushFunc()
   167  			},
   168  			SinkState: &e.state,
   169  		}
   170  		resolvedCallbackableEvents = append(resolvedCallbackableEvents, ce)
   171  	}
   172  
   173  	// Do not forget to add the resolvedTs to progressTracker.
   174  	e.progressTracker.addResolvedTs(resolvedTs)
   175  	if err := e.backendSink.WriteEvents(resolvedCallbackableEvents...); err != nil {
   176  		return SinkInternalError{err}
   177  	}
   178  	return nil
   179  }
   180  
   181  // GetCheckpointTs returns the checkpoint ts of the table sink.
   182  func (e *EventTableSink[E, P]) GetCheckpointTs() model.ResolvedTs {
   183  	if e.state.Load() == state.TableSinkStopping {
   184  		if e.progressTracker.checkClosed(e.backendSink.Dead()) {
   185  			e.markAsClosed()
   186  		}
   187  	}
   188  	return e.progressTracker.advance()
   189  }
   190  
   191  // GetLastSyncedTs returns the last synced ts of table sink.
   192  // lastSyncedTs means the biggest commits of all the events
   193  // that have been flushed to the downstream.
   194  func (e *EventTableSink[E, P]) GetLastSyncedTs() model.Ts {
   195  	return e.lastSyncedTs.getLastSyncedTs()
   196  }
   197  
   198  // Close closes the table sink.
   199  // After it returns, no more events will be sent out from this capture.
   200  func (e *EventTableSink[E, P]) Close() {
   201  	e.freeze()
   202  	e.progressTracker.waitClosed(e.backendSink.Dead())
   203  	e.markAsClosed()
   204  }
   205  
   206  // AsyncClose closes the table sink asynchronously. Returns true if it's closed.
   207  func (e *EventTableSink[E, P]) AsyncClose() bool {
   208  	e.freeze()
   209  	if e.progressTracker.checkClosed(e.backendSink.Dead()) {
   210  		e.markAsClosed()
   211  		return true
   212  	}
   213  	return false
   214  }
   215  
   216  // CheckHealth checks whether the associated sink backend is healthy or not.
   217  func (e *EventTableSink[E, P]) CheckHealth() error {
   218  	if err := e.backendSink.WriteEvents(); err != nil {
   219  		return SinkInternalError{err}
   220  	}
   221  	return nil
   222  }
   223  
   224  func (e *EventTableSink[E, P]) freeze() {
   225  	// Notice: We have to set the state to stopping first,
   226  	// otherwise the progressTracker may be advanced incorrectly.
   227  	// For example, if we do not freeze it and set the state to stooping
   228  	// then the progressTracker may be advanced to the checkpointTs
   229  	// because backend sink drops some events.
   230  	e.progressTracker.freezeProcess()
   231  
   232  	for {
   233  		currentState := e.state.Load()
   234  		if currentState == state.TableSinkStopping || currentState == state.TableSinkStopped {
   235  			break
   236  		}
   237  		if e.state.CompareAndSwap(currentState, state.TableSinkStopping) {
   238  			stoppingCheckpointTs := e.GetCheckpointTs()
   239  			log.Info("Stopping table sink",
   240  				zap.String("namespace", e.changefeedID.Namespace),
   241  				zap.String("changefeed", e.changefeedID.ID),
   242  				zap.Stringer("span", &e.span),
   243  				zap.Uint64("checkpointTs", stoppingCheckpointTs.Ts))
   244  			break
   245  		}
   246  	}
   247  }
   248  
   249  func (e *EventTableSink[E, P]) markAsClosed() (modified bool) {
   250  	for {
   251  		currentState := e.state.Load()
   252  		if currentState == state.TableSinkStopped {
   253  			return
   254  		}
   255  		if e.state.CompareAndSwap(currentState, state.TableSinkStopped) {
   256  			stoppedCheckpointTs := e.GetCheckpointTs()
   257  			log.Info("Table sink stopped",
   258  				zap.String("namespace", e.changefeedID.Namespace),
   259  				zap.String("changefeed", e.changefeedID.ID),
   260  				zap.Stringer("span", &e.span),
   261  				zap.Uint64("checkpointTs", stoppedCheckpointTs.Ts))
   262  			return true
   263  		}
   264  	}
   265  }