github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/syncer/binlogstream/binlog_locations.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package binlogstream
    15  
    16  import (
    17  	"fmt"
    18  	"strings"
    19  
    20  	"github.com/go-mysql-org/go-mysql/replication"
    21  	"github.com/pingcap/tiflow/dm/pkg/binlog"
    22  	"github.com/pingcap/tiflow/dm/pkg/binlog/event"
    23  	"github.com/pingcap/tiflow/dm/pkg/gtid"
    24  	"github.com/pingcap/tiflow/dm/pkg/log"
    25  	"go.uber.org/zap"
    26  )
    27  
    28  func isDataEvent(e *replication.BinlogEvent) bool {
    29  	switch e.Event.(type) {
    30  	case *replication.TableMapEvent,
    31  		*replication.RowsEvent,
    32  		*replication.QueryEvent,
    33  		*replication.TransactionPayloadEvent:
    34  		return true
    35  	}
    36  	return false
    37  }
    38  
    39  // locations provides curStartLocation, curEndLocation, txnEndLocation for binlog
    40  // events.
    41  //
    42  //   - for the event which isDataEvent:
    43  //     |          +-------------+
    44  //     |      ... |current event| ...
    45  //     |     ^    +-------------+    ^
    46  //     |     |                       |
    47  //     | curStartLocation        curEndLocation
    48  //
    49  //     there may be more events between curStartLocation and curEndLocation due
    50  //     to the limitation of binlog or implementation of DM, but in such scenario,
    51  //     those events should always belong to one transaction.
    52  //
    53  //   - for RotateEvent:
    54  //     the binlog filename of curEndLocation and txnEndLocation will be updated
    55  //     to the new NextLogName in RotateEvent.
    56  //
    57  //   - else:
    58  //     we do not guarantee the behaviour of 3 locations of this struct.
    59  type locations struct {
    60  	// curStartLocation is used when
    61  	// - display a meaningful location
    62  	// - match the injected location by handle-error
    63  	// - save table checkpoint of DML
    64  	// curEndLocation is used when
    65  	// - handle end location of DDL, when save table checkpoint or shard-resync
    66  	curStartLocation binlog.Location
    67  	curEndLocation   binlog.Location
    68  
    69  	// txnEndLocation is the end location of last seen transaction. If current event is the last event of a txn,
    70  	// txnEndLocation will be assigned from curEndLocation
    71  	// it is used when
    72  	// - reset binlog replication for a finer granularity
    73  	// - save global checkpoint
    74  	txnEndLocation binlog.Location
    75  }
    76  
    77  func (l *locations) reset(loc binlog.Location) {
    78  	// need to clone location to avoid the modification leaking outside
    79  	clone := loc.Clone()
    80  	l.curStartLocation = clone
    81  	l.curEndLocation = clone
    82  	l.txnEndLocation = clone
    83  }
    84  
    85  // String implements fmt.Stringer.
    86  func (l *locations) String() string {
    87  	return fmt.Sprintf("curStartLocation: %s, curEndLocation: %s, txnEndLocation: %s",
    88  		l.curStartLocation.String(), l.curEndLocation.String(), l.txnEndLocation.String())
    89  }
    90  
    91  // updateHookFunc is used to run some logic before locationRecorder.update.
    92  type updateHookFunc func()
    93  
    94  // locationRecorder can maintain locations along with update(BinlogEvent). For the
    95  // properties of locations see comments of locations struct.
    96  // locationRecorder is not concurrent-safe.
    97  type locationRecorder struct {
    98  	*locations
    99  
   100  	// DML will also generate a query event if user set session binlog_format='statement', we use this field to
   101  	// distinguish DML query event.
   102  	inDMLQuery bool
   103  
   104  	preUpdateHook []updateHookFunc
   105  }
   106  
   107  func newLocationRecorder() *locationRecorder {
   108  	return &locationRecorder{
   109  		locations: &locations{},
   110  	}
   111  }
   112  
   113  func (l *locationRecorder) saveTxnEndLocation() {
   114  	l.txnEndLocation = l.curEndLocation.Clone()
   115  }
   116  
   117  // shouldUpdatePos returns true when the given event is from a real upstream writing, returns false when the event is
   118  // header, heartbeat, etc.
   119  func shouldUpdatePos(e *replication.BinlogEvent) bool {
   120  	switch e.Header.EventType {
   121  	case replication.FORMAT_DESCRIPTION_EVENT, replication.HEARTBEAT_EVENT, replication.IGNORABLE_EVENT,
   122  		replication.PREVIOUS_GTIDS_EVENT, replication.MARIADB_GTID_LIST_EVENT:
   123  		return false
   124  	}
   125  	//nolint:gosimple
   126  	if e.Header.Flags&replication.LOG_EVENT_ARTIFICIAL_F != 0 {
   127  		// ignore events with LOG_EVENT_ARTIFICIAL_F flag(0x0020) set
   128  		// ref: https://dev.mysql.com/doc/internals/en/binlog-event-flag.html
   129  		return false
   130  	}
   131  
   132  	return true
   133  }
   134  
   135  func (l *locationRecorder) updateCurStartGTID() {
   136  	gset := l.curEndLocation.GetGTID()
   137  	if gset == nil {
   138  		return
   139  	}
   140  	err := l.curStartLocation.SetGTID(gset)
   141  	if err != nil {
   142  		log.L().DPanic("failed to set GTID set",
   143  			zap.Any("GTID set", gset),
   144  			zap.Error(err))
   145  	}
   146  }
   147  
   148  func (l *locationRecorder) setCurEndGTID(gtidStr string) {
   149  	gset := l.curEndLocation.GetGTID()
   150  
   151  	if gset == nil {
   152  		gset, _ = gtid.ParserGTID("", gtidStr)
   153  		_ = l.curEndLocation.SetGTID(gset)
   154  		return
   155  	}
   156  
   157  	clone := gset.Clone()
   158  	err := clone.Update(gtidStr)
   159  	if err != nil {
   160  		log.L().DPanic("failed to update GTID set",
   161  			zap.String("GTID", gtidStr),
   162  			zap.Error(err))
   163  		return
   164  	}
   165  
   166  	err = l.curEndLocation.SetGTID(clone)
   167  	if err != nil {
   168  		log.L().DPanic("failed to set GTID set",
   169  			zap.String("GTID", gtidStr),
   170  			zap.Error(err))
   171  	}
   172  }
   173  
   174  // update maintains the member of locationRecorder as their definitions.
   175  // - curStartLocation is assigned to curEndLocation
   176  // - curEndLocation is tried to be updated in-place
   177  // - txnEndLocation is assigned to curEndLocation when `e` is the last event of a transaction.
   178  func (l *locationRecorder) update(e *replication.BinlogEvent) {
   179  	for _, f := range l.preUpdateHook {
   180  		f()
   181  	}
   182  	// reset to zero value of slice after executed
   183  	l.preUpdateHook = nil
   184  
   185  	// GTID part is maintained separately
   186  	l.curStartLocation.Position = l.curEndLocation.Position
   187  	l.curStartLocation.Suffix = l.curEndLocation.Suffix
   188  
   189  	if event, ok := e.Event.(*replication.RotateEvent); ok {
   190  		nextName := string(event.NextLogName)
   191  		if l.curEndLocation.Position.Name != nextName {
   192  			l.curEndLocation.Position.Name = nextName
   193  			l.curEndLocation.Position.Pos = binlog.FileHeaderLen
   194  			l.saveTxnEndLocation()
   195  		}
   196  		return
   197  	}
   198  
   199  	if !shouldUpdatePos(e) {
   200  		return
   201  	}
   202  
   203  	l.curEndLocation.Position.Pos = e.Header.LogPos
   204  
   205  	switch ev := e.Event.(type) {
   206  	case *replication.GTIDEvent:
   207  		// following event should have new GTID set as end location
   208  		gtidStr, err := event.GetGTIDStr(e)
   209  		if err != nil {
   210  			log.L().DPanic("failed to get GTID from event",
   211  				zap.Any("event", e),
   212  				zap.Error(err))
   213  			break
   214  		}
   215  		l.preUpdateHook = append(l.preUpdateHook, func() {
   216  			l.setCurEndGTID(gtidStr)
   217  		})
   218  	case *replication.MariadbGTIDEvent:
   219  		// following event should have new GTID set as end location
   220  		gtidStr, err := event.GetGTIDStr(e)
   221  		if err != nil {
   222  			log.L().DPanic("failed to get GTID from event",
   223  				zap.Any("event", e),
   224  				zap.Error(err))
   225  			break
   226  		}
   227  		l.preUpdateHook = append(l.preUpdateHook, func() {
   228  			l.setCurEndGTID(gtidStr)
   229  		})
   230  
   231  		if !ev.IsDDL() {
   232  			l.inDMLQuery = true
   233  		}
   234  	case *replication.XIDEvent:
   235  		// for transactional engines like InnoDB, COMMIT is xid event
   236  		l.saveTxnEndLocation()
   237  		l.inDMLQuery = false
   238  
   239  		// next event can update its GTID set of start location
   240  		l.preUpdateHook = append(l.preUpdateHook, l.updateCurStartGTID)
   241  	case *replication.QueryEvent:
   242  		query := strings.TrimSpace(string(ev.Query))
   243  		switch query {
   244  		case "BEGIN":
   245  			// MySQL will write a "BEGIN" query event when it starts a DML transaction, we use this event to distinguish
   246  			// DML query event which comes from a session binlog_format = STATEMENT.
   247  			// But MariaDB will not write "BEGIN" query event, we simply hope user should not do that.
   248  			l.inDMLQuery = true
   249  		case "COMMIT":
   250  			// for non-transactional engines like MyISAM, COMMIT is query event
   251  			l.inDMLQuery = false
   252  		}
   253  
   254  		if l.inDMLQuery {
   255  			return
   256  		}
   257  
   258  		// next event can update its GTID set of start location
   259  		l.preUpdateHook = append(l.preUpdateHook, l.updateCurStartGTID)
   260  
   261  		l.saveTxnEndLocation()
   262  	}
   263  }