github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/relay/file_util.go (about)

     1  // Copyright 2019 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package relay
    15  
    16  import (
    17  	"bytes"
    18  	"context"
    19  	"io"
    20  	"os"
    21  	"strings"
    22  	"time"
    23  
    24  	gmysql "github.com/go-mysql-org/go-mysql/mysql"
    25  	"github.com/go-mysql-org/go-mysql/replication"
    26  	"github.com/pingcap/tidb/pkg/parser"
    27  	"github.com/pingcap/tiflow/dm/pkg/binlog/event"
    28  	"github.com/pingcap/tiflow/dm/pkg/binlog/reader"
    29  	"github.com/pingcap/tiflow/dm/pkg/gtid"
    30  	parserpkg "github.com/pingcap/tiflow/dm/pkg/parser"
    31  	"github.com/pingcap/tiflow/dm/pkg/terror"
    32  )
    33  
    34  // checkBinlogHeaderExist checks if the file has a binlog file header.
    35  // It is not safe if there other routine is writing the file.
    36  func checkBinlogHeaderExist(filename string) (bool, error) {
    37  	f, err := os.Open(filename)
    38  	if err != nil {
    39  		return false, terror.Annotatef(terror.ErrRelayWriterFileOperate.New(err.Error()), "open file %s", filename)
    40  	}
    41  	defer f.Close()
    42  
    43  	return checkBinlogHeaderExistFd(f)
    44  }
    45  
    46  // checkBinlogHeaderExistFd checks if the file has a binlog file header.
    47  // It is not safe if there other routine is writing the file.
    48  func checkBinlogHeaderExistFd(fd *os.File) (bool, error) {
    49  	fileHeaderLen := len(replication.BinLogFileHeader)
    50  	buff := make([]byte, fileHeaderLen)
    51  	n, err := fd.Read(buff)
    52  	if err != nil {
    53  		if n == 0 && err == io.EOF {
    54  			return false, nil // empty file
    55  		}
    56  		return false, terror.Annotate(terror.ErrRelayCheckBinlogFileHeaderExist.New(err.Error()), "read binlog header")
    57  	} else if n != fileHeaderLen {
    58  		return false, terror.ErrRelayCheckBinlogFileHeaderExist.Generatef("binlog file %s has no enough data, only got % X", fd.Name(), buff[:n])
    59  	}
    60  
    61  	if !bytes.Equal(buff, replication.BinLogFileHeader) {
    62  		return false, terror.ErrRelayCheckBinlogFileHeaderExist.Generatef("binlog file %s header not valid, got % X, expect % X", fd.Name(), buff, replication.BinLogFileHeader)
    63  	}
    64  	return true, nil
    65  }
    66  
    67  // checkFormatDescriptionEventExist checks if the file has a valid FormatDescriptionEvent.
    68  // It is not safe if there other routine is writing the file.
    69  func checkFormatDescriptionEventExist(filename string) (bool, error) {
    70  	f, err := os.Open(filename)
    71  	if err != nil {
    72  		return false, terror.Annotatef(terror.ErrRelayCheckFormatDescEventExist.New(err.Error()), "open file %s", filename)
    73  	}
    74  	defer f.Close()
    75  
    76  	// FormatDescriptionEvent always follows the binlog file header
    77  	exist, err := checkBinlogHeaderExistFd(f)
    78  	if err != nil {
    79  		return false, terror.Annotatef(err, "check binlog file header for %s", filename)
    80  	} else if !exist {
    81  		return false, terror.ErrRelayCheckFormatDescEventExist.Generatef("no binlog file header at the beginning for %s", filename)
    82  	}
    83  
    84  	// check whether only the file header
    85  	fileHeaderLen := len(replication.BinLogFileHeader)
    86  	fs, err := f.Stat()
    87  	if err != nil {
    88  		return false, terror.Annotatef(terror.ErrRelayCheckFormatDescEventExist.New(err.Error()), "get stat for %s", filename)
    89  	} else if fs.Size() == int64(fileHeaderLen) {
    90  		return false, nil // only the file header
    91  	}
    92  
    93  	// seek to the beginning of the FormatDescriptionEvent
    94  	_, err = f.Seek(int64(fileHeaderLen), io.SeekStart)
    95  	if err != nil {
    96  		return false, terror.Annotatef(terror.ErrRelayCheckFormatDescEventExist.New(err.Error()), "seek to %d for %s", fileHeaderLen, filename)
    97  	}
    98  
    99  	// parse a FormatDescriptionEvent
   100  	var found bool
   101  	onEventFunc := func(e *replication.BinlogEvent) error {
   102  		if e.Header.EventType != replication.FORMAT_DESCRIPTION_EVENT {
   103  			return terror.ErrRelayCheckFormatDescEventExist.Generatef("got %+v, expect FormatDescriptionEvent", e.Header)
   104  		} else if (e.Header.LogPos - e.Header.EventSize) != uint32(fileHeaderLen) {
   105  			return terror.ErrRelayCheckFormatDescEventExist.Generatef("wrong offset %d for FormatDescriptionEvent, should be %d", e.Header.LogPos, fileHeaderLen)
   106  		}
   107  		found = true
   108  		return nil
   109  	}
   110  
   111  	// only parse single event
   112  	eof, err := replication.NewBinlogParser().ParseSingleEvent(f, onEventFunc)
   113  	switch {
   114  	case found:
   115  		return found, nil // if found is true, we return `true` even meet an error, because FormatDescriptionEvent exists.
   116  	case err != nil:
   117  		return false, terror.ErrRelayCheckFormatDescEventParseEv.Delegate(err, filename)
   118  	case eof:
   119  		return false, terror.ErrRelayCheckFormatDescEventParseEv.Delegate(io.EOF, filename)
   120  	}
   121  	return found, nil
   122  }
   123  
   124  // checkIsDuplicateEvent checks if the event is a duplicate event in the file.
   125  // It is not safe if there other routine is writing the file.
   126  // NOTE: handle cases when file size > 4GB.
   127  func checkIsDuplicateEvent(filename string, ev *replication.BinlogEvent) (bool, error) {
   128  	// 1. check event start/end pos with the file size, and it's enough for most cases
   129  	fs, err := os.Stat(filename)
   130  	if err != nil {
   131  		return false, terror.Annotatef(terror.ErrRelayCheckIsDuplicateEvent.New(err.Error()), "get stat for %s", filename)
   132  	}
   133  	evStartPos := int64(ev.Header.LogPos - ev.Header.EventSize)
   134  	evEndPos := int64(ev.Header.LogPos)
   135  	if fs.Size() <= evStartPos {
   136  		return false, nil // the event not in the file
   137  	} else if fs.Size() < evEndPos {
   138  		// the file can not hold the whole event, often because the file is corrupt
   139  		return false, terror.ErrRelayCheckIsDuplicateEvent.Generatef(
   140  			"file size %d is between event's start pos (%d) and end pos (%d)",
   141  			fs.Size(), evStartPos, evEndPos)
   142  	}
   143  
   144  	// 2. compare the file data with the raw data of the event
   145  	f, err := os.Open(filename)
   146  	if err != nil {
   147  		return false, terror.Annotate(terror.ErrRelayCheckIsDuplicateEvent.New(err.Error()), "open binlog file")
   148  	}
   149  	defer f.Close()
   150  	buf := make([]byte, ev.Header.EventSize)
   151  	_, err = f.ReadAt(buf, evStartPos)
   152  	if err != nil {
   153  		return false, terror.Annotatef(terror.ErrRelayCheckIsDuplicateEvent.New(err.Error()), "read data from %d in %s with length %d", evStartPos, filename, len(buf))
   154  	} else if !bytes.Equal(buf, ev.RawData) {
   155  		return false, terror.ErrRelayCheckIsDuplicateEvent.Generatef("event from %d in %s diff from passed-in event %+v", evStartPos, filename, ev.Header)
   156  	}
   157  
   158  	// duplicate in the file
   159  	return true, nil
   160  }
   161  
   162  // getTxnPosGTIDs gets position/GTID set for all completed transactions from a binlog file.
   163  // It is not safe if there other routine is writing the file.
   164  // NOTE: we use a int64 rather than a uint32 to represent the latest transaction's end log pos.
   165  func getTxnPosGTIDs(ctx context.Context, filename string, p *parser.Parser) (int64, gmysql.GTIDSet, error) {
   166  	// use a FileReader to parse the binlog file.
   167  	rCfg := &reader.FileReaderConfig{
   168  		EnableRawMode: false, // in order to get GTID set, we always disable RawMode.
   169  	}
   170  	startPos := gmysql.Position{Name: filename, Pos: 0} // always start from the file header
   171  	r := reader.NewFileReader(rCfg)
   172  	defer r.Close()
   173  	err := r.StartSyncByPos(startPos) // we always parse the file by pos
   174  	if err != nil {
   175  		return 0, nil, terror.Annotatef(err, "start sync by pos %s for %s", startPos, filename)
   176  	}
   177  
   178  	var (
   179  		latestPos   int64
   180  		latestGSet  gmysql.GTIDSet
   181  		nextGTIDStr string // can be recorded if the coming transaction completed
   182  	)
   183  	for {
   184  		var e *replication.BinlogEvent
   185  		ctx2, cancel2 := context.WithTimeout(ctx, time.Second)
   186  		e, err = r.GetEvent(ctx2)
   187  		cancel2()
   188  		if err != nil {
   189  			break // now, we stop to parse for any errors even is context done
   190  		}
   191  
   192  		// NOTE: only update pos/GTID set for DDL/XID to get an complete transaction.
   193  		switch ev := e.Event.(type) {
   194  		case *replication.FormatDescriptionEvent:
   195  			latestPos = int64(e.Header.LogPos)
   196  		case *replication.QueryEvent:
   197  			isDDL := parserpkg.CheckIsDDL(string(ev.Query), p)
   198  			originSQL := strings.TrimSpace(string(ev.Query))
   199  			if isDDL || originSQL == "COMMIT" {
   200  				if latestGSet != nil { // GTID may not be enabled in the binlog
   201  					err = latestGSet.Update(nextGTIDStr)
   202  					if err != nil {
   203  						return 0, nil, terror.ErrRelayUpdateGTID.Delegate(err, latestGSet, nextGTIDStr)
   204  					}
   205  				}
   206  				latestPos = int64(e.Header.LogPos)
   207  			}
   208  		case *replication.XIDEvent:
   209  			if latestGSet != nil { // GTID may not be enabled in the binlog
   210  				err = latestGSet.Update(nextGTIDStr)
   211  				if err != nil {
   212  					return 0, nil, terror.ErrRelayUpdateGTID.Delegate(err, latestGSet, nextGTIDStr)
   213  				}
   214  			}
   215  			latestPos = int64(e.Header.LogPos)
   216  		case *replication.GTIDEvent:
   217  			if latestGSet == nil {
   218  				return 0, nil, terror.ErrRelayNeedPrevGTIDEvBeforeGTIDEv.Generate(e.Header)
   219  			}
   220  			nextGTIDStr, err = event.GetGTIDStr(e)
   221  			if err != nil {
   222  				return 0, nil, err
   223  			}
   224  		case *replication.MariadbGTIDEvent:
   225  			if latestGSet == nil {
   226  				return 0, nil, terror.ErrRelayNeedMaGTIDListEvBeforeGTIDEv.Generate(e.Header)
   227  			}
   228  			nextGTIDStr, err = event.GetGTIDStr(e)
   229  			if err != nil {
   230  				return 0, nil, err
   231  			}
   232  		case *replication.PreviousGTIDsEvent:
   233  			// if GTID enabled, we can get a PreviousGTIDEvent after the FormatDescriptionEvent
   234  			// ref: https://github.com/mysql/mysql-server/blob/8cc757da3d87bf4a1f07dcfb2d3c96fed3806870/sql/binlog.cc#L4549
   235  			// ref: https://github.com/mysql/mysql-server/blob/8cc757da3d87bf4a1f07dcfb2d3c96fed3806870/sql/binlog.cc#L5161
   236  			latestGSet, err = gtid.ParserGTID(gmysql.MySQLFlavor, ev.GTIDSets)
   237  			if err != nil {
   238  				return 0, nil, err
   239  			}
   240  			latestPos = int64(e.Header.LogPos)
   241  		case *replication.MariadbGTIDListEvent:
   242  			// a MariadbGTIDListEvent logged in every binlog to record the current replication state if GTID enabled
   243  			// ref: https://mariadb.com/kb/en/library/gtid_list_event/
   244  			latestGSet, err = event.GTIDsFromMariaDBGTIDListEvent(e)
   245  			if err != nil {
   246  				return 0, nil, terror.Annotatef(err, "get GTID set from MariadbGTIDListEvent %+v", e.Header)
   247  			}
   248  			latestPos = int64(e.Header.LogPos)
   249  		}
   250  	}
   251  
   252  	return latestPos, latestGSet, ctx.Err() // return the error if the context is done.
   253  }