github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/filter/filter.go (about) 1 // Copyright 2020 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package filter 15 16 import ( 17 timodel "github.com/pingcap/tidb/pkg/parser/model" 18 tfilter "github.com/pingcap/tidb/pkg/util/table-filter" 19 "github.com/pingcap/tiflow/cdc/model" 20 bf "github.com/pingcap/tiflow/pkg/binlog-filter" 21 "github.com/pingcap/tiflow/pkg/config" 22 ) 23 24 const ( 25 // SyncPointTable is the tale name use to write ts-map when sync-point is enable. 26 SyncPointTable = "syncpoint_v1" 27 // TiCDCSystemSchema is the schema only use by TiCDC. 28 TiCDCSystemSchema = "tidb_cdc" 29 ) 30 31 // ddlWhiteListMap is a map of all DDL types that can be applied to cdc's schema storage. 32 var ddlWhiteListMap = map[timodel.ActionType]bf.EventType{ 33 // schema related DDLs 34 timodel.ActionCreateSchema: bf.CreateDatabase, 35 timodel.ActionDropSchema: bf.DropDatabase, 36 timodel.ActionModifySchemaCharsetAndCollate: bf.ModifySchemaCharsetAndCollate, 37 38 // table related DDLs 39 timodel.ActionCreateTable: bf.CreateTable, 40 timodel.ActionDropTable: bf.DropTable, 41 timodel.ActionTruncateTable: bf.TruncateTable, 42 timodel.ActionRenameTable: bf.RenameTable, 43 timodel.ActionRenameTables: bf.RenameTable, 44 timodel.ActionRecoverTable: bf.RecoverTable, 45 timodel.ActionModifyTableComment: bf.ModifyTableComment, 46 timodel.ActionModifyTableCharsetAndCollate: bf.ModifyTableCharsetAndCollate, 47 48 // view related DDLs 49 timodel.ActionCreateView: bf.CreateView, 50 timodel.ActionDropView: bf.DropView, 51 52 // partition related DDLs 53 timodel.ActionAddTablePartition: bf.AddTablePartition, 54 timodel.ActionDropTablePartition: bf.DropTablePartition, 55 timodel.ActionTruncateTablePartition: bf.TruncateTablePartition, 56 timodel.ActionExchangeTablePartition: bf.ExchangePartition, 57 timodel.ActionReorganizePartition: bf.ReorganizePartition, 58 timodel.ActionAlterTablePartitioning: bf.AlterTablePartitioning, 59 timodel.ActionRemovePartitioning: bf.RemovePartitioning, 60 61 // column related DDLs 62 timodel.ActionAddColumn: bf.AddColumn, 63 timodel.ActionDropColumn: bf.DropColumn, 64 timodel.ActionModifyColumn: bf.ModifyColumn, 65 timodel.ActionSetDefaultValue: bf.SetDefaultValue, 66 67 // index related DDLs 68 timodel.ActionRebaseAutoID: bf.RebaseAutoID, 69 timodel.ActionAddPrimaryKey: bf.AddPrimaryKey, 70 timodel.ActionDropPrimaryKey: bf.DropPrimaryKey, 71 timodel.ActionAddIndex: bf.CreateIndex, 72 timodel.ActionDropIndex: bf.DropIndex, 73 timodel.ActionRenameIndex: bf.RenameIndex, 74 timodel.ActionAlterIndexVisibility: bf.AlterIndexVisibility, 75 76 // TTL related DDLs 77 timodel.ActionAlterTTLInfo: bf.AlterTTLInfo, 78 timodel.ActionAlterTTLRemove: bf.AlterTTLRemove, 79 80 // difficult to classify DDLs 81 timodel.ActionMultiSchemaChange: bf.MultiSchemaChange, 82 83 // deprecated DDLs,see https://github.com/pingcap/tidb/pull/35862. 84 // DDL types below are deprecated in TiDB v6.2.0, but we still keep them here 85 // In case that some users will use TiCDC to replicate data from TiDB v6.1.x. 86 timodel.ActionAddColumns: bf.AddColumn, 87 timodel.ActionDropColumns: bf.DropColumn, 88 } 89 90 // Filter are safe for concurrent use. 91 // TODO: find a better way to abstract this interface. 92 type Filter interface { 93 // ShouldIgnoreDMLEvent returns true if the DML event should be ignored. 94 ShouldIgnoreDMLEvent(dml *model.RowChangedEvent, rawRow model.RowChangedDatums, tableInfo *model.TableInfo) (bool, error) 95 // ShouldIgnoreDDLEvent returns true if the DDL event should be ignored. 96 // If a ddl is ignored, it will be applied to cdc's schema storage, 97 // but will not be sent to downstream. 98 ShouldIgnoreDDLEvent(ddl *model.DDLEvent) (bool, error) 99 // ShouldDiscardDDL returns true if this DDL should be discarded. 100 // If a ddl is discarded, it will neither be applied to cdc's schema storage 101 // nor sent to downstream. 102 ShouldDiscardDDL(ddlType timodel.ActionType, schema, table string) bool 103 // ShouldIgnoreTable returns true if the table should be ignored. 104 ShouldIgnoreTable(schema, table string) bool 105 // ShouldIgnoreSchema returns true if the schema should be ignored. 106 ShouldIgnoreSchema(schema string) bool 107 // Verify should only be called by create changefeed OpenAPI. 108 // Its purpose is to verify the expression filter config. 109 Verify(tableInfos []*model.TableInfo) error 110 } 111 112 // filter implements Filter. 113 type filter struct { 114 // tableFilter is used to filter in dml/ddl event by table name. 115 tableFilter tfilter.Filter 116 // dmlExprFilter is used to filter out dml event by its columns value. 117 dmlExprFilter *dmlExprFilter 118 // sqlEventFilter is used to filter out dml/ddl event by its type or query. 119 sqlEventFilter *sqlEventFilter 120 // ignoreTxnStartTs is used to filter out dml/ddl event by its starsTs. 121 ignoreTxnStartTs []uint64 122 } 123 124 // NewFilter creates a filter. 125 func NewFilter(cfg *config.ReplicaConfig, tz string) (Filter, error) { 126 f, err := VerifyTableRules(cfg.Filter) 127 if err != nil { 128 return nil, err 129 } 130 131 if !cfg.CaseSensitive { 132 f = tfilter.CaseInsensitive(f) 133 } 134 135 dmlExprFilter, err := newExprFilter(tz, cfg.Filter) 136 if err != nil { 137 return nil, err 138 } 139 sqlEventFilter, err := newSQLEventFilter(cfg.Filter) 140 if err != nil { 141 return nil, err 142 } 143 return &filter{ 144 tableFilter: f, 145 dmlExprFilter: dmlExprFilter, 146 sqlEventFilter: sqlEventFilter, 147 ignoreTxnStartTs: cfg.Filter.IgnoreTxnStartTs, 148 }, nil 149 } 150 151 // ShouldIgnoreDMLEvent checks if a DML event should be ignore by conditions below: 152 // 0. By startTs. 153 // 1. By table name. 154 // 2. By type. 155 // 3. By columns value. 156 func (f *filter) ShouldIgnoreDMLEvent( 157 dml *model.RowChangedEvent, 158 rawRow model.RowChangedDatums, 159 ti *model.TableInfo, 160 ) (bool, error) { 161 if f.shouldIgnoreStartTs(dml.StartTs) { 162 return true, nil 163 } 164 165 if f.ShouldIgnoreTable(dml.TableInfo.GetSchemaName(), dml.TableInfo.GetTableName()) { 166 return true, nil 167 } 168 169 ignoreByEventType, err := f.sqlEventFilter.shouldSkipDML(dml) 170 if err != nil { 171 return false, err 172 } 173 if ignoreByEventType { 174 return true, nil 175 } 176 return f.dmlExprFilter.shouldSkipDML(dml, rawRow, ti) 177 } 178 179 // ShouldDiscardDDL checks if a DDL should be discarded by conditions below: 180 // 0. By allow list. 181 // 1. By schema name. 182 // 2. By table name. 183 func (f *filter) ShouldDiscardDDL(ddlType timodel.ActionType, schema, table string) bool { 184 if !isAllowedDDL(ddlType) { 185 return true 186 } 187 188 if IsSchemaDDL(ddlType) { 189 return f.ShouldIgnoreSchema(schema) 190 } 191 return f.ShouldIgnoreTable(schema, table) 192 } 193 194 // ShouldIgnoreDDLEvent checks if a DDL event should be ignore by conditions below: 195 // 0. By startTs. 196 // 1. By ddl type. 197 // 2. By ddl query. 198 // 199 // If a ddl is ignored, it will be applied to cdc's schema storage, 200 // but will not be sent to downstream. 201 // Note that a ignored ddl is different from a discarded ddl. For example, suppose 202 // we have a changefeed-test with the following config: 203 // - table filter: rules = ['test.*'] 204 // - event-filters: matcher = ["test.worker"] ignore-event = ["create table"] 205 // 206 // Then, for the following DDLs: 207 // 1. `CREATE TABLE test.worker` will be ignored, but the table will be replicated by changefeed-test. 208 // 2. `CREATE TABLE other.worker` will be discarded, and the table will not be replicated by changefeed-test. 209 func (f *filter) ShouldIgnoreDDLEvent(ddl *model.DDLEvent) (bool, error) { 210 if f.shouldIgnoreStartTs(ddl.StartTs) { 211 return true, nil 212 } 213 return f.sqlEventFilter.shouldSkipDDL(ddl) 214 } 215 216 // ShouldIgnoreTable returns true if the specified table should be ignored by this changefeed. 217 // NOTICE: Set `tbl` to an empty string to test against the whole database. 218 func (f *filter) ShouldIgnoreTable(db, tbl string) bool { 219 if isSysSchema(db) { 220 return true 221 } 222 return !f.tableFilter.MatchTable(db, tbl) 223 } 224 225 // ShouldIgnoreSchema returns true if the specified schema should be ignored by this changefeed. 226 func (f *filter) ShouldIgnoreSchema(schema string) bool { 227 return isSysSchema(schema) || !f.tableFilter.MatchSchema(schema) 228 } 229 230 func (f *filter) Verify(tableInfos []*model.TableInfo) error { 231 return f.dmlExprFilter.verify(tableInfos) 232 } 233 234 func (f *filter) shouldIgnoreStartTs(ts uint64) bool { 235 for _, ignoreTs := range f.ignoreTxnStartTs { 236 if ignoreTs == ts { 237 return true 238 } 239 } 240 return false 241 } 242 243 func isAllowedDDL(actionType timodel.ActionType) bool { 244 _, ok := ddlWhiteListMap[actionType] 245 return ok 246 } 247 248 // IsSchemaDDL returns true if the action type is a schema DDL. 249 func IsSchemaDDL(actionType timodel.ActionType) bool { 250 switch actionType { 251 case timodel.ActionCreateSchema, timodel.ActionDropSchema, 252 timodel.ActionModifySchemaCharsetAndCollate: 253 return true 254 default: 255 return false 256 } 257 }