github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/pkg/cyclic/filter.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package cyclic
    15  
    16  import (
    17  	"github.com/pingcap/log"
    18  	"github.com/pingcap/ticdc/cdc/model"
    19  	"github.com/pingcap/ticdc/pkg/cyclic/mark"
    20  	"go.uber.org/zap"
    21  )
    22  
    23  // ExtractReplicaID extracts replica ID from the given mark row.
    24  func ExtractReplicaID(markRow *model.RowChangedEvent) uint64 {
    25  	for _, c := range markRow.Columns {
    26  		if c == nil {
    27  			continue
    28  		}
    29  		if c.Name == mark.CyclicReplicaIDCol {
    30  			return c.Value.(uint64)
    31  		}
    32  	}
    33  	log.Panic("bad mark table, " + mark.CyclicReplicaIDCol + " not found")
    34  	return 0
    35  }
    36  
    37  // TxnMap maps start ts to txn may cross multiple tables.
    38  type TxnMap map[uint64]map[model.TableName][]*model.RowChangedEvent
    39  
    40  // MarkMap maps start ts to mark table rows.
    41  // There is at most one mark table row that is modified for each transaction.
    42  type MarkMap map[uint64]*model.RowChangedEvent
    43  
    44  func (m MarkMap) shouldFilterTxn(startTs uint64, filterReplicaIDs []uint64, replicaID uint64) (*model.RowChangedEvent, bool) {
    45  	markRow, markFound := m[startTs]
    46  	if !markFound {
    47  		return nil, false
    48  	}
    49  	from := ExtractReplicaID(markRow)
    50  	if from == replicaID {
    51  		log.Panic("cyclic replication loopback detected",
    52  			zap.Any("markRow", markRow),
    53  			zap.Uint64("replicaID", replicaID))
    54  	}
    55  	for i := range filterReplicaIDs {
    56  		if filterReplicaIDs[i] == from {
    57  			return markRow, true
    58  		}
    59  	}
    60  	return markRow, false
    61  }
    62  
    63  // FilterAndReduceTxns filters duplicate txns bases on filterReplicaIDs and
    64  // if the mark table dml is exist in the txn, this functiong will set the replicaID by mark table dml
    65  // if the mark table dml is not exist, this function will set the replicaID by config
    66  func FilterAndReduceTxns(
    67  	txnsMap map[model.TableID][]*model.SingleTableTxn, filterReplicaIDs []uint64, replicaID uint64,
    68  ) (skippedRowCount int) {
    69  	markMap := make(MarkMap)
    70  	for _, txns := range txnsMap {
    71  		if !mark.IsMarkTable(txns[0].Table.Schema, txns[0].Table.Table) {
    72  			continue
    73  		}
    74  		for _, txn := range txns {
    75  			for _, event := range txn.Rows {
    76  				first, ok := markMap[txn.StartTs]
    77  				if ok {
    78  					// TiKV may emit the same row multiple times.
    79  					if event.CommitTs != first.CommitTs ||
    80  						event.RowID != first.RowID {
    81  						log.Panic(
    82  							"there should be at most one mark row for each txn",
    83  							zap.Uint64("start-ts", event.StartTs),
    84  							zap.Any("first", first),
    85  							zap.Any("second", event))
    86  					}
    87  				}
    88  				markMap[event.StartTs] = event
    89  			}
    90  		}
    91  	}
    92  	for table, txns := range txnsMap {
    93  		if mark.IsMarkTable(txns[0].Table.Schema, txns[0].Table.Table) {
    94  			delete(txnsMap, table)
    95  			for i := range txns {
    96  				// For simplicity, we do not count mark table rows in statistics.
    97  				skippedRowCount += len(txns[i].Rows)
    98  			}
    99  			continue
   100  		}
   101  		filteredTxns := make([]*model.SingleTableTxn, 0, len(txns))
   102  		for _, txn := range txns {
   103  			// Check if we should skip this event
   104  			markRow, needSkip := markMap.shouldFilterTxn(txn.StartTs, filterReplicaIDs, replicaID)
   105  			if needSkip {
   106  				// Found cyclic mark, skip this event as it originly created from
   107  				// downstream.
   108  				skippedRowCount += len(txn.Rows)
   109  				continue
   110  			}
   111  			txn.ReplicaID = replicaID
   112  			if markRow != nil {
   113  				txn.ReplicaID = ExtractReplicaID(markRow)
   114  			}
   115  			filteredTxns = append(filteredTxns, txn)
   116  		}
   117  		if len(filteredTxns) == 0 {
   118  			delete(txnsMap, table)
   119  		} else {
   120  			txnsMap[table] = filteredTxns
   121  		}
   122  	}
   123  	return
   124  }