github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/syncer/binlogstream/binlog_locations.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package binlogstream 15 16 import ( 17 "fmt" 18 "strings" 19 20 "github.com/go-mysql-org/go-mysql/replication" 21 "github.com/pingcap/tiflow/dm/pkg/binlog" 22 "github.com/pingcap/tiflow/dm/pkg/binlog/event" 23 "github.com/pingcap/tiflow/dm/pkg/gtid" 24 "github.com/pingcap/tiflow/dm/pkg/log" 25 "go.uber.org/zap" 26 ) 27 28 func isDataEvent(e *replication.BinlogEvent) bool { 29 switch e.Event.(type) { 30 case *replication.TableMapEvent, 31 *replication.RowsEvent, 32 *replication.QueryEvent, 33 *replication.TransactionPayloadEvent: 34 return true 35 } 36 return false 37 } 38 39 // locations provides curStartLocation, curEndLocation, txnEndLocation for binlog 40 // events. 41 // 42 // - for the event which isDataEvent: 43 // | +-------------+ 44 // | ... |current event| ... 45 // | ^ +-------------+ ^ 46 // | | | 47 // | curStartLocation curEndLocation 48 // 49 // there may be more events between curStartLocation and curEndLocation due 50 // to the limitation of binlog or implementation of DM, but in such scenario, 51 // those events should always belong to one transaction. 52 // 53 // - for RotateEvent: 54 // the binlog filename of curEndLocation and txnEndLocation will be updated 55 // to the new NextLogName in RotateEvent. 56 // 57 // - else: 58 // we do not guarantee the behaviour of 3 locations of this struct. 59 type locations struct { 60 // curStartLocation is used when 61 // - display a meaningful location 62 // - match the injected location by handle-error 63 // - save table checkpoint of DML 64 // curEndLocation is used when 65 // - handle end location of DDL, when save table checkpoint or shard-resync 66 curStartLocation binlog.Location 67 curEndLocation binlog.Location 68 69 // txnEndLocation is the end location of last seen transaction. If current event is the last event of a txn, 70 // txnEndLocation will be assigned from curEndLocation 71 // it is used when 72 // - reset binlog replication for a finer granularity 73 // - save global checkpoint 74 txnEndLocation binlog.Location 75 } 76 77 func (l *locations) reset(loc binlog.Location) { 78 // need to clone location to avoid the modification leaking outside 79 clone := loc.Clone() 80 l.curStartLocation = clone 81 l.curEndLocation = clone 82 l.txnEndLocation = clone 83 } 84 85 // String implements fmt.Stringer. 86 func (l *locations) String() string { 87 return fmt.Sprintf("curStartLocation: %s, curEndLocation: %s, txnEndLocation: %s", 88 l.curStartLocation.String(), l.curEndLocation.String(), l.txnEndLocation.String()) 89 } 90 91 // updateHookFunc is used to run some logic before locationRecorder.update. 92 type updateHookFunc func() 93 94 // locationRecorder can maintain locations along with update(BinlogEvent). For the 95 // properties of locations see comments of locations struct. 96 // locationRecorder is not concurrent-safe. 97 type locationRecorder struct { 98 *locations 99 100 // DML will also generate a query event if user set session binlog_format='statement', we use this field to 101 // distinguish DML query event. 102 inDMLQuery bool 103 104 preUpdateHook []updateHookFunc 105 } 106 107 func newLocationRecorder() *locationRecorder { 108 return &locationRecorder{ 109 locations: &locations{}, 110 } 111 } 112 113 func (l *locationRecorder) saveTxnEndLocation() { 114 l.txnEndLocation = l.curEndLocation.Clone() 115 } 116 117 // shouldUpdatePos returns true when the given event is from a real upstream writing, returns false when the event is 118 // header, heartbeat, etc. 119 func shouldUpdatePos(e *replication.BinlogEvent) bool { 120 switch e.Header.EventType { 121 case replication.FORMAT_DESCRIPTION_EVENT, replication.HEARTBEAT_EVENT, replication.IGNORABLE_EVENT, 122 replication.PREVIOUS_GTIDS_EVENT, replication.MARIADB_GTID_LIST_EVENT: 123 return false 124 } 125 //nolint:gosimple 126 if e.Header.Flags&replication.LOG_EVENT_ARTIFICIAL_F != 0 { 127 // ignore events with LOG_EVENT_ARTIFICIAL_F flag(0x0020) set 128 // ref: https://dev.mysql.com/doc/internals/en/binlog-event-flag.html 129 return false 130 } 131 132 return true 133 } 134 135 func (l *locationRecorder) updateCurStartGTID() { 136 gset := l.curEndLocation.GetGTID() 137 if gset == nil { 138 return 139 } 140 err := l.curStartLocation.SetGTID(gset) 141 if err != nil { 142 log.L().DPanic("failed to set GTID set", 143 zap.Any("GTID set", gset), 144 zap.Error(err)) 145 } 146 } 147 148 func (l *locationRecorder) setCurEndGTID(gtidStr string) { 149 gset := l.curEndLocation.GetGTID() 150 151 if gset == nil { 152 gset, _ = gtid.ParserGTID("", gtidStr) 153 _ = l.curEndLocation.SetGTID(gset) 154 return 155 } 156 157 clone := gset.Clone() 158 err := clone.Update(gtidStr) 159 if err != nil { 160 log.L().DPanic("failed to update GTID set", 161 zap.String("GTID", gtidStr), 162 zap.Error(err)) 163 return 164 } 165 166 err = l.curEndLocation.SetGTID(clone) 167 if err != nil { 168 log.L().DPanic("failed to set GTID set", 169 zap.String("GTID", gtidStr), 170 zap.Error(err)) 171 } 172 } 173 174 // update maintains the member of locationRecorder as their definitions. 175 // - curStartLocation is assigned to curEndLocation 176 // - curEndLocation is tried to be updated in-place 177 // - txnEndLocation is assigned to curEndLocation when `e` is the last event of a transaction. 178 func (l *locationRecorder) update(e *replication.BinlogEvent) { 179 for _, f := range l.preUpdateHook { 180 f() 181 } 182 // reset to zero value of slice after executed 183 l.preUpdateHook = nil 184 185 // GTID part is maintained separately 186 l.curStartLocation.Position = l.curEndLocation.Position 187 l.curStartLocation.Suffix = l.curEndLocation.Suffix 188 189 if event, ok := e.Event.(*replication.RotateEvent); ok { 190 nextName := string(event.NextLogName) 191 if l.curEndLocation.Position.Name != nextName { 192 l.curEndLocation.Position.Name = nextName 193 l.curEndLocation.Position.Pos = binlog.FileHeaderLen 194 l.saveTxnEndLocation() 195 } 196 return 197 } 198 199 if !shouldUpdatePos(e) { 200 return 201 } 202 203 l.curEndLocation.Position.Pos = e.Header.LogPos 204 205 switch ev := e.Event.(type) { 206 case *replication.GTIDEvent: 207 // following event should have new GTID set as end location 208 gtidStr, err := event.GetGTIDStr(e) 209 if err != nil { 210 log.L().DPanic("failed to get GTID from event", 211 zap.Any("event", e), 212 zap.Error(err)) 213 break 214 } 215 l.preUpdateHook = append(l.preUpdateHook, func() { 216 l.setCurEndGTID(gtidStr) 217 }) 218 case *replication.MariadbGTIDEvent: 219 // following event should have new GTID set as end location 220 gtidStr, err := event.GetGTIDStr(e) 221 if err != nil { 222 log.L().DPanic("failed to get GTID from event", 223 zap.Any("event", e), 224 zap.Error(err)) 225 break 226 } 227 l.preUpdateHook = append(l.preUpdateHook, func() { 228 l.setCurEndGTID(gtidStr) 229 }) 230 231 if !ev.IsDDL() { 232 l.inDMLQuery = true 233 } 234 case *replication.XIDEvent: 235 // for transactional engines like InnoDB, COMMIT is xid event 236 l.saveTxnEndLocation() 237 l.inDMLQuery = false 238 239 // next event can update its GTID set of start location 240 l.preUpdateHook = append(l.preUpdateHook, l.updateCurStartGTID) 241 case *replication.QueryEvent: 242 query := strings.TrimSpace(string(ev.Query)) 243 switch query { 244 case "BEGIN": 245 // MySQL will write a "BEGIN" query event when it starts a DML transaction, we use this event to distinguish 246 // DML query event which comes from a session binlog_format = STATEMENT. 247 // But MariaDB will not write "BEGIN" query event, we simply hope user should not do that. 248 l.inDMLQuery = true 249 case "COMMIT": 250 // for non-transactional engines like MyISAM, COMMIT is query event 251 l.inDMLQuery = false 252 } 253 254 if l.inDMLQuery { 255 return 256 } 257 258 // next event can update its GTID set of start location 259 l.preUpdateHook = append(l.preUpdateHook, l.updateCurStartGTID) 260 261 l.saveTxnEndLocation() 262 } 263 }