github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/sink/dmlsink/event_appender.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License
    13  
    14  package dmlsink
    15  
    16  import (
    17  	"github.com/pingcap/log"
    18  	"github.com/pingcap/tiflow/cdc/model"
    19  	"go.uber.org/zap"
    20  )
    21  
    22  // Appender is the interface for appending events to buffer.
    23  type Appender[E TableEvent] interface {
    24  	// Append appends the event to buffer.
    25  	Append(buffer []E, rows ...*model.RowChangedEvent) []E
    26  }
    27  
    28  // Assert Appender[E TableEvent] implementation
    29  var _ Appender[*model.RowChangedEvent] = (*RowChangeEventAppender)(nil)
    30  
    31  // RowChangeEventAppender is the builder for RowChangedEvent.
    32  type RowChangeEventAppender struct{}
    33  
    34  // Append appends the given rows to the given buffer.
    35  func (r *RowChangeEventAppender) Append(
    36  	buffer []*model.RowChangedEvent,
    37  	rows ...*model.RowChangedEvent,
    38  ) []*model.RowChangedEvent {
    39  	return append(buffer, rows...)
    40  }
    41  
    42  // Assert Appender[E TableEvent] implementation
    43  var _ Appender[*model.SingleTableTxn] = (*TxnEventAppender)(nil)
    44  
    45  // TxnEventAppender is the appender for SingleTableTxn.
    46  type TxnEventAppender struct {
    47  	// TableSinkStartTs is the startTs of the table sink.
    48  	TableSinkStartTs model.Ts
    49  	// IgnoreStartTs indicates whether to ignore the startTs of the row.
    50  	// This is used by consumer to keep compatibility with the old version.
    51  	// Most of our protocols are ignoring the startTs of the row, so we
    52  	// can not use the startTs to identify a transaction.
    53  	IgnoreStartTs bool
    54  }
    55  
    56  // Append appends the given rows to the given txn buffer.
    57  // The callers of this function should **make sure** that
    58  // the commitTs and startTs of rows is **strictly increasing**.
    59  // 1. Txns ordered by commitTs and startTs.
    60  // 2. Rows are grouped into SingleTableTxn by startTs and big txn batch,
    61  // since the startTs is the unique identifier of a transaction.
    62  // After Append, the structure of the buffer is:
    63  // buffer = [Txn1[row11, row12...], Txn2[row21,row22...]...], in which:
    64  //  1. If Txn1.CommitTs < Txn2.CommitTs, then Txn1.startTs can be
    65  //     either less or larger than Txn2.startTs.
    66  //  2. If Txn1.CommitTs == Txn2.CommitTs, then Txn1.startTs must be
    67  //     **less than** Txn2.startTs.
    68  func (t *TxnEventAppender) Append(
    69  	buffer []*model.SingleTableTxn,
    70  	rows ...*model.RowChangedEvent,
    71  ) []*model.SingleTableTxn {
    72  	for _, row := range rows {
    73  		// This means no txn is in the buffer.
    74  		if len(buffer) == 0 {
    75  			txn := t.createSingleTableTxn(row)
    76  			txn.Append(row)
    77  			buffer = append(buffer, txn)
    78  			continue
    79  		}
    80  
    81  		lastTxn := buffer[len(buffer)-1]
    82  
    83  		lastCommitTs := lastTxn.GetCommitTs()
    84  		if lastCommitTs > row.CommitTs {
    85  			log.Panic("The commitTs of the emit row is less than the received row",
    86  				zap.Uint64("lastReceivedCommitTs", lastCommitTs),
    87  				zap.Any("row", row))
    88  		} else if lastCommitTs == row.CommitTs && lastTxn.StartTs > row.StartTs {
    89  			log.Panic("The startTs of the emit row is less than the received row with same CommitTs",
    90  				zap.Uint64("lastReceivedCommitTs", lastCommitTs),
    91  				zap.Uint64("lastReceivedStartTs", lastTxn.StartTs),
    92  				zap.Any("row", row))
    93  		}
    94  
    95  		// Split on big transactions or a new one. For 2 transactions,
    96  		// their commitTs can be same but startTs will be never same.
    97  		normalBoundary := row.SplitTxn || lastTxn.StartTs != row.StartTs
    98  		// NOTICE: This is a special case for compatibility with old version.
    99  		// In our lots of protocols, we are ignoring the startTs of the row,
   100  		// so we can not use the startTs to identify a transaction.
   101  		ignoreStartTsBoundary := t.IgnoreStartTs && lastCommitTs != row.CommitTs
   102  		if normalBoundary || ignoreStartTsBoundary {
   103  			buffer = append(buffer, t.createSingleTableTxn(row))
   104  		}
   105  
   106  		buffer[len(buffer)-1].Append(row)
   107  	}
   108  
   109  	return buffer
   110  }
   111  
   112  func (t *TxnEventAppender) createSingleTableTxn(
   113  	row *model.RowChangedEvent,
   114  ) *model.SingleTableTxn {
   115  	txn := &model.SingleTableTxn{
   116  		StartTs:         row.StartTs,
   117  		CommitTs:        row.CommitTs,
   118  		PhysicalTableID: row.PhysicalTableID,
   119  		TableInfo:       row.TableInfo,
   120  	}
   121  	if row.TableInfo != nil {
   122  		txn.TableInfoVersion = row.TableInfo.Version
   123  	}
   124  	// If one table is just scheduled to a new processor, the txn.TableInfoVersion should be
   125  	// greater than or equal to the startTs of table sink.
   126  	if txn.TableInfoVersion < t.TableSinkStartTs {
   127  		txn.TableInfoVersion = t.TableSinkStartTs
   128  	}
   129  	return txn
   130  }