github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/relay/file_util.go (about) 1 // Copyright 2019 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package relay 15 16 import ( 17 "bytes" 18 "context" 19 "io" 20 "os" 21 "strings" 22 "time" 23 24 gmysql "github.com/go-mysql-org/go-mysql/mysql" 25 "github.com/go-mysql-org/go-mysql/replication" 26 "github.com/pingcap/tidb/pkg/parser" 27 "github.com/pingcap/tiflow/dm/pkg/binlog/event" 28 "github.com/pingcap/tiflow/dm/pkg/binlog/reader" 29 "github.com/pingcap/tiflow/dm/pkg/gtid" 30 parserpkg "github.com/pingcap/tiflow/dm/pkg/parser" 31 "github.com/pingcap/tiflow/dm/pkg/terror" 32 ) 33 34 // checkBinlogHeaderExist checks if the file has a binlog file header. 35 // It is not safe if there other routine is writing the file. 36 func checkBinlogHeaderExist(filename string) (bool, error) { 37 f, err := os.Open(filename) 38 if err != nil { 39 return false, terror.Annotatef(terror.ErrRelayWriterFileOperate.New(err.Error()), "open file %s", filename) 40 } 41 defer f.Close() 42 43 return checkBinlogHeaderExistFd(f) 44 } 45 46 // checkBinlogHeaderExistFd checks if the file has a binlog file header. 47 // It is not safe if there other routine is writing the file. 48 func checkBinlogHeaderExistFd(fd *os.File) (bool, error) { 49 fileHeaderLen := len(replication.BinLogFileHeader) 50 buff := make([]byte, fileHeaderLen) 51 n, err := fd.Read(buff) 52 if err != nil { 53 if n == 0 && err == io.EOF { 54 return false, nil // empty file 55 } 56 return false, terror.Annotate(terror.ErrRelayCheckBinlogFileHeaderExist.New(err.Error()), "read binlog header") 57 } else if n != fileHeaderLen { 58 return false, terror.ErrRelayCheckBinlogFileHeaderExist.Generatef("binlog file %s has no enough data, only got % X", fd.Name(), buff[:n]) 59 } 60 61 if !bytes.Equal(buff, replication.BinLogFileHeader) { 62 return false, terror.ErrRelayCheckBinlogFileHeaderExist.Generatef("binlog file %s header not valid, got % X, expect % X", fd.Name(), buff, replication.BinLogFileHeader) 63 } 64 return true, nil 65 } 66 67 // checkFormatDescriptionEventExist checks if the file has a valid FormatDescriptionEvent. 68 // It is not safe if there other routine is writing the file. 69 func checkFormatDescriptionEventExist(filename string) (bool, error) { 70 f, err := os.Open(filename) 71 if err != nil { 72 return false, terror.Annotatef(terror.ErrRelayCheckFormatDescEventExist.New(err.Error()), "open file %s", filename) 73 } 74 defer f.Close() 75 76 // FormatDescriptionEvent always follows the binlog file header 77 exist, err := checkBinlogHeaderExistFd(f) 78 if err != nil { 79 return false, terror.Annotatef(err, "check binlog file header for %s", filename) 80 } else if !exist { 81 return false, terror.ErrRelayCheckFormatDescEventExist.Generatef("no binlog file header at the beginning for %s", filename) 82 } 83 84 // check whether only the file header 85 fileHeaderLen := len(replication.BinLogFileHeader) 86 fs, err := f.Stat() 87 if err != nil { 88 return false, terror.Annotatef(terror.ErrRelayCheckFormatDescEventExist.New(err.Error()), "get stat for %s", filename) 89 } else if fs.Size() == int64(fileHeaderLen) { 90 return false, nil // only the file header 91 } 92 93 // seek to the beginning of the FormatDescriptionEvent 94 _, err = f.Seek(int64(fileHeaderLen), io.SeekStart) 95 if err != nil { 96 return false, terror.Annotatef(terror.ErrRelayCheckFormatDescEventExist.New(err.Error()), "seek to %d for %s", fileHeaderLen, filename) 97 } 98 99 // parse a FormatDescriptionEvent 100 var found bool 101 onEventFunc := func(e *replication.BinlogEvent) error { 102 if e.Header.EventType != replication.FORMAT_DESCRIPTION_EVENT { 103 return terror.ErrRelayCheckFormatDescEventExist.Generatef("got %+v, expect FormatDescriptionEvent", e.Header) 104 } else if (e.Header.LogPos - e.Header.EventSize) != uint32(fileHeaderLen) { 105 return terror.ErrRelayCheckFormatDescEventExist.Generatef("wrong offset %d for FormatDescriptionEvent, should be %d", e.Header.LogPos, fileHeaderLen) 106 } 107 found = true 108 return nil 109 } 110 111 // only parse single event 112 eof, err := replication.NewBinlogParser().ParseSingleEvent(f, onEventFunc) 113 switch { 114 case found: 115 return found, nil // if found is true, we return `true` even meet an error, because FormatDescriptionEvent exists. 116 case err != nil: 117 return false, terror.ErrRelayCheckFormatDescEventParseEv.Delegate(err, filename) 118 case eof: 119 return false, terror.ErrRelayCheckFormatDescEventParseEv.Delegate(io.EOF, filename) 120 } 121 return found, nil 122 } 123 124 // checkIsDuplicateEvent checks if the event is a duplicate event in the file. 125 // It is not safe if there other routine is writing the file. 126 // NOTE: handle cases when file size > 4GB. 127 func checkIsDuplicateEvent(filename string, ev *replication.BinlogEvent) (bool, error) { 128 // 1. check event start/end pos with the file size, and it's enough for most cases 129 fs, err := os.Stat(filename) 130 if err != nil { 131 return false, terror.Annotatef(terror.ErrRelayCheckIsDuplicateEvent.New(err.Error()), "get stat for %s", filename) 132 } 133 evStartPos := int64(ev.Header.LogPos - ev.Header.EventSize) 134 evEndPos := int64(ev.Header.LogPos) 135 if fs.Size() <= evStartPos { 136 return false, nil // the event not in the file 137 } else if fs.Size() < evEndPos { 138 // the file can not hold the whole event, often because the file is corrupt 139 return false, terror.ErrRelayCheckIsDuplicateEvent.Generatef( 140 "file size %d is between event's start pos (%d) and end pos (%d)", 141 fs.Size(), evStartPos, evEndPos) 142 } 143 144 // 2. compare the file data with the raw data of the event 145 f, err := os.Open(filename) 146 if err != nil { 147 return false, terror.Annotate(terror.ErrRelayCheckIsDuplicateEvent.New(err.Error()), "open binlog file") 148 } 149 defer f.Close() 150 buf := make([]byte, ev.Header.EventSize) 151 _, err = f.ReadAt(buf, evStartPos) 152 if err != nil { 153 return false, terror.Annotatef(terror.ErrRelayCheckIsDuplicateEvent.New(err.Error()), "read data from %d in %s with length %d", evStartPos, filename, len(buf)) 154 } else if !bytes.Equal(buf, ev.RawData) { 155 return false, terror.ErrRelayCheckIsDuplicateEvent.Generatef("event from %d in %s diff from passed-in event %+v", evStartPos, filename, ev.Header) 156 } 157 158 // duplicate in the file 159 return true, nil 160 } 161 162 // getTxnPosGTIDs gets position/GTID set for all completed transactions from a binlog file. 163 // It is not safe if there other routine is writing the file. 164 // NOTE: we use a int64 rather than a uint32 to represent the latest transaction's end log pos. 165 func getTxnPosGTIDs(ctx context.Context, filename string, p *parser.Parser) (int64, gmysql.GTIDSet, error) { 166 // use a FileReader to parse the binlog file. 167 rCfg := &reader.FileReaderConfig{ 168 EnableRawMode: false, // in order to get GTID set, we always disable RawMode. 169 } 170 startPos := gmysql.Position{Name: filename, Pos: 0} // always start from the file header 171 r := reader.NewFileReader(rCfg) 172 defer r.Close() 173 err := r.StartSyncByPos(startPos) // we always parse the file by pos 174 if err != nil { 175 return 0, nil, terror.Annotatef(err, "start sync by pos %s for %s", startPos, filename) 176 } 177 178 var ( 179 latestPos int64 180 latestGSet gmysql.GTIDSet 181 nextGTIDStr string // can be recorded if the coming transaction completed 182 ) 183 for { 184 var e *replication.BinlogEvent 185 ctx2, cancel2 := context.WithTimeout(ctx, time.Second) 186 e, err = r.GetEvent(ctx2) 187 cancel2() 188 if err != nil { 189 break // now, we stop to parse for any errors even is context done 190 } 191 192 // NOTE: only update pos/GTID set for DDL/XID to get an complete transaction. 193 switch ev := e.Event.(type) { 194 case *replication.FormatDescriptionEvent: 195 latestPos = int64(e.Header.LogPos) 196 case *replication.QueryEvent: 197 isDDL := parserpkg.CheckIsDDL(string(ev.Query), p) 198 originSQL := strings.TrimSpace(string(ev.Query)) 199 if isDDL || originSQL == "COMMIT" { 200 if latestGSet != nil { // GTID may not be enabled in the binlog 201 err = latestGSet.Update(nextGTIDStr) 202 if err != nil { 203 return 0, nil, terror.ErrRelayUpdateGTID.Delegate(err, latestGSet, nextGTIDStr) 204 } 205 } 206 latestPos = int64(e.Header.LogPos) 207 } 208 case *replication.XIDEvent: 209 if latestGSet != nil { // GTID may not be enabled in the binlog 210 err = latestGSet.Update(nextGTIDStr) 211 if err != nil { 212 return 0, nil, terror.ErrRelayUpdateGTID.Delegate(err, latestGSet, nextGTIDStr) 213 } 214 } 215 latestPos = int64(e.Header.LogPos) 216 case *replication.GTIDEvent: 217 if latestGSet == nil { 218 return 0, nil, terror.ErrRelayNeedPrevGTIDEvBeforeGTIDEv.Generate(e.Header) 219 } 220 nextGTIDStr, err = event.GetGTIDStr(e) 221 if err != nil { 222 return 0, nil, err 223 } 224 case *replication.MariadbGTIDEvent: 225 if latestGSet == nil { 226 return 0, nil, terror.ErrRelayNeedMaGTIDListEvBeforeGTIDEv.Generate(e.Header) 227 } 228 nextGTIDStr, err = event.GetGTIDStr(e) 229 if err != nil { 230 return 0, nil, err 231 } 232 case *replication.PreviousGTIDsEvent: 233 // if GTID enabled, we can get a PreviousGTIDEvent after the FormatDescriptionEvent 234 // ref: https://github.com/mysql/mysql-server/blob/8cc757da3d87bf4a1f07dcfb2d3c96fed3806870/sql/binlog.cc#L4549 235 // ref: https://github.com/mysql/mysql-server/blob/8cc757da3d87bf4a1f07dcfb2d3c96fed3806870/sql/binlog.cc#L5161 236 latestGSet, err = gtid.ParserGTID(gmysql.MySQLFlavor, ev.GTIDSets) 237 if err != nil { 238 return 0, nil, err 239 } 240 latestPos = int64(e.Header.LogPos) 241 case *replication.MariadbGTIDListEvent: 242 // a MariadbGTIDListEvent logged in every binlog to record the current replication state if GTID enabled 243 // ref: https://mariadb.com/kb/en/library/gtid_list_event/ 244 latestGSet, err = event.GTIDsFromMariaDBGTIDListEvent(e) 245 if err != nil { 246 return 0, nil, terror.Annotatef(err, "get GTID set from MariadbGTIDListEvent %+v", e.Header) 247 } 248 latestPos = int64(e.Header.LogPos) 249 } 250 } 251 252 return latestPos, latestGSet, ctx.Err() // return the error if the context is done. 253 }