github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/pkg/cyclic/filter.go (about) 1 // Copyright 2020 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package cyclic 15 16 import ( 17 "github.com/pingcap/log" 18 "github.com/pingcap/ticdc/cdc/model" 19 "github.com/pingcap/ticdc/pkg/cyclic/mark" 20 "go.uber.org/zap" 21 ) 22 23 // ExtractReplicaID extracts replica ID from the given mark row. 24 func ExtractReplicaID(markRow *model.RowChangedEvent) uint64 { 25 for _, c := range markRow.Columns { 26 if c == nil { 27 continue 28 } 29 if c.Name == mark.CyclicReplicaIDCol { 30 return c.Value.(uint64) 31 } 32 } 33 log.Panic("bad mark table, " + mark.CyclicReplicaIDCol + " not found") 34 return 0 35 } 36 37 // TxnMap maps start ts to txn may cross multiple tables. 38 type TxnMap map[uint64]map[model.TableName][]*model.RowChangedEvent 39 40 // MarkMap maps start ts to mark table rows. 41 // There is at most one mark table row that is modified for each transaction. 42 type MarkMap map[uint64]*model.RowChangedEvent 43 44 func (m MarkMap) shouldFilterTxn(startTs uint64, filterReplicaIDs []uint64, replicaID uint64) (*model.RowChangedEvent, bool) { 45 markRow, markFound := m[startTs] 46 if !markFound { 47 return nil, false 48 } 49 from := ExtractReplicaID(markRow) 50 if from == replicaID { 51 log.Panic("cyclic replication loopback detected", 52 zap.Any("markRow", markRow), 53 zap.Uint64("replicaID", replicaID)) 54 } 55 for i := range filterReplicaIDs { 56 if filterReplicaIDs[i] == from { 57 return markRow, true 58 } 59 } 60 return markRow, false 61 } 62 63 // FilterAndReduceTxns filters duplicate txns bases on filterReplicaIDs and 64 // if the mark table dml is exist in the txn, this functiong will set the replicaID by mark table dml 65 // if the mark table dml is not exist, this function will set the replicaID by config 66 func FilterAndReduceTxns( 67 txnsMap map[model.TableID][]*model.SingleTableTxn, filterReplicaIDs []uint64, replicaID uint64, 68 ) (skippedRowCount int) { 69 markMap := make(MarkMap) 70 for _, txns := range txnsMap { 71 if !mark.IsMarkTable(txns[0].Table.Schema, txns[0].Table.Table) { 72 continue 73 } 74 for _, txn := range txns { 75 for _, event := range txn.Rows { 76 first, ok := markMap[txn.StartTs] 77 if ok { 78 // TiKV may emit the same row multiple times. 79 if event.CommitTs != first.CommitTs || 80 event.RowID != first.RowID { 81 log.Panic( 82 "there should be at most one mark row for each txn", 83 zap.Uint64("start-ts", event.StartTs), 84 zap.Any("first", first), 85 zap.Any("second", event)) 86 } 87 } 88 markMap[event.StartTs] = event 89 } 90 } 91 } 92 for table, txns := range txnsMap { 93 if mark.IsMarkTable(txns[0].Table.Schema, txns[0].Table.Table) { 94 delete(txnsMap, table) 95 for i := range txns { 96 // For simplicity, we do not count mark table rows in statistics. 97 skippedRowCount += len(txns[i].Rows) 98 } 99 continue 100 } 101 filteredTxns := make([]*model.SingleTableTxn, 0, len(txns)) 102 for _, txn := range txns { 103 // Check if we should skip this event 104 markRow, needSkip := markMap.shouldFilterTxn(txn.StartTs, filterReplicaIDs, replicaID) 105 if needSkip { 106 // Found cyclic mark, skip this event as it originly created from 107 // downstream. 108 skippedRowCount += len(txn.Rows) 109 continue 110 } 111 txn.ReplicaID = replicaID 112 if markRow != nil { 113 txn.ReplicaID = ExtractReplicaID(markRow) 114 } 115 filteredTxns = append(filteredTxns, txn) 116 } 117 if len(filteredTxns) == 0 { 118 delete(txnsMap, table) 119 } else { 120 txnsMap[table] = filteredTxns 121 } 122 } 123 return 124 }