github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/filter/filter.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package filter
    15  
    16  import (
    17  	timodel "github.com/pingcap/tidb/pkg/parser/model"
    18  	tfilter "github.com/pingcap/tidb/pkg/util/table-filter"
    19  	"github.com/pingcap/tiflow/cdc/model"
    20  	bf "github.com/pingcap/tiflow/pkg/binlog-filter"
    21  	"github.com/pingcap/tiflow/pkg/config"
    22  )
    23  
    24  const (
    25  	// SyncPointTable is the tale name use to write ts-map when sync-point is enable.
    26  	SyncPointTable = "syncpoint_v1"
    27  	// TiCDCSystemSchema is the schema only use by TiCDC.
    28  	TiCDCSystemSchema = "tidb_cdc"
    29  )
    30  
    31  // ddlWhiteListMap is a map of all DDL types that can be applied to cdc's schema storage.
    32  var ddlWhiteListMap = map[timodel.ActionType]bf.EventType{
    33  	// schema related DDLs
    34  	timodel.ActionCreateSchema:                  bf.CreateDatabase,
    35  	timodel.ActionDropSchema:                    bf.DropDatabase,
    36  	timodel.ActionModifySchemaCharsetAndCollate: bf.ModifySchemaCharsetAndCollate,
    37  
    38  	// table related DDLs
    39  	timodel.ActionCreateTable:                  bf.CreateTable,
    40  	timodel.ActionDropTable:                    bf.DropTable,
    41  	timodel.ActionTruncateTable:                bf.TruncateTable,
    42  	timodel.ActionRenameTable:                  bf.RenameTable,
    43  	timodel.ActionRenameTables:                 bf.RenameTable,
    44  	timodel.ActionRecoverTable:                 bf.RecoverTable,
    45  	timodel.ActionModifyTableComment:           bf.ModifyTableComment,
    46  	timodel.ActionModifyTableCharsetAndCollate: bf.ModifyTableCharsetAndCollate,
    47  
    48  	// view related DDLs
    49  	timodel.ActionCreateView: bf.CreateView,
    50  	timodel.ActionDropView:   bf.DropView,
    51  
    52  	// partition related DDLs
    53  	timodel.ActionAddTablePartition:      bf.AddTablePartition,
    54  	timodel.ActionDropTablePartition:     bf.DropTablePartition,
    55  	timodel.ActionTruncateTablePartition: bf.TruncateTablePartition,
    56  	timodel.ActionExchangeTablePartition: bf.ExchangePartition,
    57  	timodel.ActionReorganizePartition:    bf.ReorganizePartition,
    58  	timodel.ActionAlterTablePartitioning: bf.AlterTablePartitioning,
    59  	timodel.ActionRemovePartitioning:     bf.RemovePartitioning,
    60  
    61  	// column related DDLs
    62  	timodel.ActionAddColumn:       bf.AddColumn,
    63  	timodel.ActionDropColumn:      bf.DropColumn,
    64  	timodel.ActionModifyColumn:    bf.ModifyColumn,
    65  	timodel.ActionSetDefaultValue: bf.SetDefaultValue,
    66  
    67  	// index related DDLs
    68  	timodel.ActionRebaseAutoID:         bf.RebaseAutoID,
    69  	timodel.ActionAddPrimaryKey:        bf.AddPrimaryKey,
    70  	timodel.ActionDropPrimaryKey:       bf.DropPrimaryKey,
    71  	timodel.ActionAddIndex:             bf.CreateIndex,
    72  	timodel.ActionDropIndex:            bf.DropIndex,
    73  	timodel.ActionRenameIndex:          bf.RenameIndex,
    74  	timodel.ActionAlterIndexVisibility: bf.AlterIndexVisibility,
    75  
    76  	// TTL related DDLs
    77  	timodel.ActionAlterTTLInfo:   bf.AlterTTLInfo,
    78  	timodel.ActionAlterTTLRemove: bf.AlterTTLRemove,
    79  
    80  	// difficult to classify DDLs
    81  	timodel.ActionMultiSchemaChange: bf.MultiSchemaChange,
    82  
    83  	// deprecated DDLs,see https://github.com/pingcap/tidb/pull/35862.
    84  	// DDL types below are deprecated in TiDB v6.2.0, but we still keep them here
    85  	// In case that some users will use TiCDC to replicate data from TiDB v6.1.x.
    86  	timodel.ActionAddColumns:  bf.AddColumn,
    87  	timodel.ActionDropColumns: bf.DropColumn,
    88  }
    89  
    90  // Filter are safe for concurrent use.
    91  // TODO: find a better way to abstract this interface.
    92  type Filter interface {
    93  	// ShouldIgnoreDMLEvent returns true if the DML event should be ignored.
    94  	ShouldIgnoreDMLEvent(dml *model.RowChangedEvent, rawRow model.RowChangedDatums, tableInfo *model.TableInfo) (bool, error)
    95  	// ShouldIgnoreDDLEvent returns true if the DDL event should be ignored.
    96  	// If a ddl is ignored, it will be applied to cdc's schema storage,
    97  	// but will not be sent to downstream.
    98  	ShouldIgnoreDDLEvent(ddl *model.DDLEvent) (bool, error)
    99  	// ShouldDiscardDDL returns true if this DDL should be discarded.
   100  	// If a ddl is discarded, it will neither be applied to cdc's schema storage
   101  	// nor sent to downstream.
   102  	ShouldDiscardDDL(ddlType timodel.ActionType, schema, table string) bool
   103  	// ShouldIgnoreTable returns true if the table should be ignored.
   104  	ShouldIgnoreTable(schema, table string) bool
   105  	// ShouldIgnoreSchema returns true if the schema should be ignored.
   106  	ShouldIgnoreSchema(schema string) bool
   107  	// Verify should only be called by create changefeed OpenAPI.
   108  	// Its purpose is to verify the expression filter config.
   109  	Verify(tableInfos []*model.TableInfo) error
   110  }
   111  
   112  // filter implements Filter.
   113  type filter struct {
   114  	// tableFilter is used to filter in dml/ddl event by table name.
   115  	tableFilter tfilter.Filter
   116  	// dmlExprFilter is used to filter out dml event by its columns value.
   117  	dmlExprFilter *dmlExprFilter
   118  	// sqlEventFilter is used to filter out dml/ddl event by its type or query.
   119  	sqlEventFilter *sqlEventFilter
   120  	// ignoreTxnStartTs is used to filter out dml/ddl event by its starsTs.
   121  	ignoreTxnStartTs []uint64
   122  }
   123  
   124  // NewFilter creates a filter.
   125  func NewFilter(cfg *config.ReplicaConfig, tz string) (Filter, error) {
   126  	f, err := VerifyTableRules(cfg.Filter)
   127  	if err != nil {
   128  		return nil, err
   129  	}
   130  
   131  	if !cfg.CaseSensitive {
   132  		f = tfilter.CaseInsensitive(f)
   133  	}
   134  
   135  	dmlExprFilter, err := newExprFilter(tz, cfg.Filter)
   136  	if err != nil {
   137  		return nil, err
   138  	}
   139  	sqlEventFilter, err := newSQLEventFilter(cfg.Filter)
   140  	if err != nil {
   141  		return nil, err
   142  	}
   143  	return &filter{
   144  		tableFilter:      f,
   145  		dmlExprFilter:    dmlExprFilter,
   146  		sqlEventFilter:   sqlEventFilter,
   147  		ignoreTxnStartTs: cfg.Filter.IgnoreTxnStartTs,
   148  	}, nil
   149  }
   150  
   151  // ShouldIgnoreDMLEvent checks if a DML event should be ignore by conditions below:
   152  // 0. By startTs.
   153  // 1. By table name.
   154  // 2. By type.
   155  // 3. By columns value.
   156  func (f *filter) ShouldIgnoreDMLEvent(
   157  	dml *model.RowChangedEvent,
   158  	rawRow model.RowChangedDatums,
   159  	ti *model.TableInfo,
   160  ) (bool, error) {
   161  	if f.shouldIgnoreStartTs(dml.StartTs) {
   162  		return true, nil
   163  	}
   164  
   165  	if f.ShouldIgnoreTable(dml.TableInfo.GetSchemaName(), dml.TableInfo.GetTableName()) {
   166  		return true, nil
   167  	}
   168  
   169  	ignoreByEventType, err := f.sqlEventFilter.shouldSkipDML(dml)
   170  	if err != nil {
   171  		return false, err
   172  	}
   173  	if ignoreByEventType {
   174  		return true, nil
   175  	}
   176  	return f.dmlExprFilter.shouldSkipDML(dml, rawRow, ti)
   177  }
   178  
   179  // ShouldDiscardDDL checks if a DDL should be discarded by conditions below:
   180  // 0. By allow list.
   181  // 1. By schema name.
   182  // 2. By table name.
   183  func (f *filter) ShouldDiscardDDL(ddlType timodel.ActionType, schema, table string) bool {
   184  	if !isAllowedDDL(ddlType) {
   185  		return true
   186  	}
   187  
   188  	if IsSchemaDDL(ddlType) {
   189  		return f.ShouldIgnoreSchema(schema)
   190  	}
   191  	return f.ShouldIgnoreTable(schema, table)
   192  }
   193  
   194  // ShouldIgnoreDDLEvent checks if a DDL event should be ignore by conditions below:
   195  // 0. By startTs.
   196  // 1. By ddl type.
   197  // 2. By ddl query.
   198  //
   199  // If a ddl is ignored, it will be applied to cdc's schema storage,
   200  // but will not be sent to downstream.
   201  // Note that a ignored ddl is different from a discarded ddl. For example, suppose
   202  // we have a changefeed-test with the following config:
   203  //   - table filter: rules = ['test.*']
   204  //   - event-filters: matcher = ["test.worker"] ignore-event = ["create table"]
   205  //
   206  // Then, for the following DDLs:
   207  //  1. `CREATE TABLE test.worker` will be ignored, but the table will be replicated by changefeed-test.
   208  //  2. `CREATE TABLE other.worker` will be discarded, and the table will not be replicated by changefeed-test.
   209  func (f *filter) ShouldIgnoreDDLEvent(ddl *model.DDLEvent) (bool, error) {
   210  	if f.shouldIgnoreStartTs(ddl.StartTs) {
   211  		return true, nil
   212  	}
   213  	return f.sqlEventFilter.shouldSkipDDL(ddl)
   214  }
   215  
   216  // ShouldIgnoreTable returns true if the specified table should be ignored by this changefeed.
   217  // NOTICE: Set `tbl` to an empty string to test against the whole database.
   218  func (f *filter) ShouldIgnoreTable(db, tbl string) bool {
   219  	if isSysSchema(db) {
   220  		return true
   221  	}
   222  	return !f.tableFilter.MatchTable(db, tbl)
   223  }
   224  
   225  // ShouldIgnoreSchema returns true if the specified schema should be ignored by this changefeed.
   226  func (f *filter) ShouldIgnoreSchema(schema string) bool {
   227  	return isSysSchema(schema) || !f.tableFilter.MatchSchema(schema)
   228  }
   229  
   230  func (f *filter) Verify(tableInfos []*model.TableInfo) error {
   231  	return f.dmlExprFilter.verify(tableInfos)
   232  }
   233  
   234  func (f *filter) shouldIgnoreStartTs(ts uint64) bool {
   235  	for _, ignoreTs := range f.ignoreTxnStartTs {
   236  		if ignoreTs == ts {
   237  			return true
   238  		}
   239  	}
   240  	return false
   241  }
   242  
   243  func isAllowedDDL(actionType timodel.ActionType) bool {
   244  	_, ok := ddlWhiteListMap[actionType]
   245  	return ok
   246  }
   247  
   248  // IsSchemaDDL returns true if the action type is a schema DDL.
   249  func IsSchemaDDL(actionType timodel.ActionType) bool {
   250  	switch actionType {
   251  	case timodel.ActionCreateSchema, timodel.ActionDropSchema,
   252  		timodel.ActionModifySchemaCharsetAndCollate:
   253  		return true
   254  	default:
   255  		return false
   256  	}
   257  }