github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/relay/relay.go (about)

     1  // Copyright 2019 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package relay
    15  
    16  import (
    17  	"context"
    18  	"crypto/tls"
    19  	"fmt"
    20  	"os"
    21  	"path/filepath"
    22  	"strings"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/go-mysql-org/go-mysql/mysql"
    27  	"github.com/go-mysql-org/go-mysql/replication"
    28  	"github.com/pingcap/errors"
    29  	"github.com/pingcap/failpoint"
    30  	"github.com/pingcap/tidb/pkg/parser"
    31  	"github.com/pingcap/tidb/pkg/util"
    32  	"github.com/pingcap/tiflow/dm/config"
    33  	"github.com/pingcap/tiflow/dm/config/dbconfig"
    34  	"github.com/pingcap/tiflow/dm/pb"
    35  	"github.com/pingcap/tiflow/dm/pkg/binlog"
    36  	"github.com/pingcap/tiflow/dm/pkg/binlog/common"
    37  	binlogReader "github.com/pingcap/tiflow/dm/pkg/binlog/reader"
    38  	"github.com/pingcap/tiflow/dm/pkg/conn"
    39  	tcontext "github.com/pingcap/tiflow/dm/pkg/context"
    40  	"github.com/pingcap/tiflow/dm/pkg/gtid"
    41  	"github.com/pingcap/tiflow/dm/pkg/log"
    42  	parserpkg "github.com/pingcap/tiflow/dm/pkg/parser"
    43  	pkgstreamer "github.com/pingcap/tiflow/dm/pkg/streamer"
    44  	"github.com/pingcap/tiflow/dm/pkg/terror"
    45  	"github.com/pingcap/tiflow/dm/pkg/utils"
    46  	"github.com/pingcap/tiflow/dm/unit"
    47  	"go.uber.org/atomic"
    48  	"go.uber.org/zap"
    49  )
    50  
    51  // used to fill RelayLogInfo.
    52  var fakeRelayTaskName = "relay"
    53  
    54  const (
    55  	flushMetaInterval           = 30 * time.Second
    56  	getMasterStatusInterval     = 30 * time.Second
    57  	trimUUIDsInterval           = 1 * time.Hour
    58  	showStatusConnectionTimeout = "1m"
    59  
    60  	// dumpFlagSendAnnotateRowsEvent (BINLOG_SEND_ANNOTATE_ROWS_EVENT) request the MariaDB master to send Annotate_rows_log_event back.
    61  	dumpFlagSendAnnotateRowsEvent uint16 = 0x02
    62  )
    63  
    64  // NewRelay creates an instance of Relay.
    65  var NewRelay = NewRealRelay
    66  
    67  var _ Process = &Relay{}
    68  
    69  // Listener defines a binlog event listener of relay log.
    70  type Listener interface {
    71  	// OnEvent get called when relay processed an event successfully.
    72  	OnEvent(e *replication.BinlogEvent)
    73  }
    74  
    75  // Process defines mysql-like relay log process unit.
    76  type Process interface {
    77  	// Init initial relat log unit
    78  	Init(ctx context.Context) (err error)
    79  	// Process run background logic of relay log unit
    80  	Process(ctx context.Context) pb.ProcessResult
    81  	// ActiveRelayLog returns the earliest active relay log info in this operator
    82  	ActiveRelayLog() *pkgstreamer.RelayLogInfo
    83  	// Reload reloads config
    84  	Reload(newCfg *Config) error
    85  	// Update updates config
    86  	Update(cfg *config.SubTaskConfig) error
    87  	// Resume resumes paused relay log process unit
    88  	Resume(ctx context.Context, pr chan pb.ProcessResult)
    89  	// Pause pauses a running relay log process unit
    90  	Pause()
    91  	// Error returns error message if having one
    92  	Error() interface{}
    93  	// Status returns status of relay log process unit.
    94  	Status(sourceStatus *binlog.SourceStatus) interface{}
    95  	// Close does some clean works
    96  	Close()
    97  	// IsClosed returns whether relay log process unit was closed
    98  	IsClosed() bool
    99  	// SaveMeta save relay meta
   100  	SaveMeta(pos mysql.Position, gset mysql.GTIDSet) error
   101  	// ResetMeta reset relay meta
   102  	ResetMeta()
   103  	// PurgeRelayDir will clear all contents under w.cfg.RelayDir
   104  	PurgeRelayDir() error
   105  	// RegisterListener registers a relay listener
   106  	RegisterListener(el Listener)
   107  	// UnRegisterListener unregisters a relay listener
   108  	UnRegisterListener(el Listener)
   109  	// NewReader creates a new relay reader
   110  	NewReader(logger log.Logger, cfg *BinlogReaderConfig) *BinlogReader
   111  	// IsActive check whether given uuid+filename is active binlog file, if true return current file offset
   112  	IsActive(uuid, filename string) (bool, int64)
   113  }
   114  
   115  // Relay relays mysql binlog to local file.
   116  type Relay struct {
   117  	db        *conn.BaseDB
   118  	cfg       *Config
   119  	syncerCfg replication.BinlogSyncerConfig
   120  
   121  	meta   Meta
   122  	closed atomic.Bool
   123  	sync.RWMutex
   124  
   125  	logger log.Logger
   126  
   127  	activeRelayLog struct {
   128  		sync.RWMutex
   129  		info *pkgstreamer.RelayLogInfo
   130  	}
   131  
   132  	writer    Writer
   133  	listeners map[Listener]struct{} // make it a set to make it easier to remove listener
   134  }
   135  
   136  // NewRealRelay creates an instance of Relay.
   137  func NewRealRelay(cfg *Config) Process {
   138  	r := &Relay{
   139  		cfg:       cfg,
   140  		meta:      NewLocalMeta(cfg.Flavor, cfg.RelayDir),
   141  		logger:    log.With(zap.String("component", "relay log")),
   142  		listeners: make(map[Listener]struct{}),
   143  	}
   144  	r.writer = NewFileWriter(r.logger, cfg.RelayDir)
   145  	return r
   146  }
   147  
   148  // Init implements the dm.Unit interface.
   149  // NOTE when Init encounters an error, it will make DM-worker exit when it boots up and assigned relay.
   150  func (r *Relay) Init(ctx context.Context) (err error) {
   151  	return reportRelayLogSpaceInBackground(ctx, r.cfg.RelayDir)
   152  }
   153  
   154  // Process implements the dm.Unit interface.
   155  func (r *Relay) Process(ctx context.Context) pb.ProcessResult {
   156  	relayExitWithErrorCounter.WithLabelValues("true").Add(0)
   157  	relayExitWithErrorCounter.WithLabelValues("false").Add(0)
   158  	errs := make([]*pb.ProcessError, 0, 1)
   159  	err := r.process(ctx)
   160  	if err != nil && errors.Cause(err) != replication.ErrSyncClosed {
   161  		r.logger.Error("process exit", zap.Error(err))
   162  		// TODO: add specified error type instead of pb.ErrorType_UnknownError
   163  		processError := unit.NewProcessError(err)
   164  		resumable := fmt.Sprintf("%t", unit.IsResumableRelayError(processError))
   165  		relayExitWithErrorCounter.WithLabelValues(resumable).Inc()
   166  		errs = append(errs, processError)
   167  	}
   168  
   169  	isCanceled := false
   170  	if len(errs) == 0 {
   171  		select {
   172  		case <-ctx.Done():
   173  			isCanceled = true
   174  		default:
   175  		}
   176  	}
   177  	return pb.ProcessResult{
   178  		IsCanceled: isCanceled,
   179  		Errors:     errs,
   180  	}
   181  }
   182  
   183  func (r *Relay) process(ctx context.Context) error {
   184  	err := r.setSyncConfig()
   185  	if err != nil {
   186  		return err
   187  	}
   188  
   189  	db, err := conn.GetUpstreamDB(&r.cfg.From)
   190  	if err != nil {
   191  		return terror.WithScope(err, terror.ScopeUpstream)
   192  	}
   193  	r.db = db
   194  
   195  	if err2 := os.MkdirAll(r.cfg.RelayDir, 0o700); err2 != nil {
   196  		return terror.ErrRelayMkdir.Delegate(err2)
   197  	}
   198  
   199  	err = r.meta.Load()
   200  	if err != nil {
   201  		return err
   202  	}
   203  
   204  	parser2, err := conn.GetParser(tcontext.NewContext(ctx, log.L()), r.db) // refine to use user config later
   205  	if err != nil {
   206  		return err
   207  	}
   208  
   209  	isNew, err := isNewServer(ctx, r.meta.SubDir(), r.db, r.cfg.Flavor)
   210  	if err != nil {
   211  		return err
   212  	}
   213  
   214  	failpoint.Inject("NewUpstreamServer", func(_ failpoint.Value) {
   215  		// test a bug which caused by upstream switching
   216  		isNew = true
   217  	})
   218  
   219  	if isNew {
   220  		// re-setup meta for new server or new source
   221  		err = r.reSetupMeta(ctx)
   222  		if err != nil {
   223  			return err
   224  		}
   225  	} else {
   226  		// connected to last source
   227  		r.updateMetricsRelaySubDirIndex()
   228  		// if not a new server, try to recover the latest relay log file.
   229  		err = r.tryRecoverLatestFile(ctx, parser2)
   230  		if err != nil {
   231  			return err
   232  		}
   233  
   234  		// resuming will take the risk that upstream has purge the binlog relay is needed.
   235  		// when this worker is down, HA may schedule the source to other workers and forward the sync progress,
   236  		// and then when the source is scheduled back to this worker, we could start relay from sync checkpoint's
   237  		// location which is newer, and now could purge the outdated relay logs.
   238  		//
   239  		// locations in `r.cfg` is set to min needed location of subtasks (higher priority) or source config specified
   240  		isRelayMetaOutdated := false
   241  		neededBinlogName := r.cfg.BinLogName
   242  		neededBinlogGset, err2 := gtid.ParserGTID(r.cfg.Flavor, r.cfg.BinlogGTID)
   243  		if err2 != nil {
   244  			return err2
   245  		}
   246  		if r.cfg.EnableGTID {
   247  			_, metaGset := r.meta.GTID()
   248  			if neededBinlogGset.Contain(metaGset) && !neededBinlogGset.Equal(metaGset) {
   249  				isRelayMetaOutdated = true
   250  			}
   251  		} else {
   252  			_, metaPos := r.meta.Pos()
   253  			if mysql.CompareBinlogFileName(neededBinlogName, metaPos.Name) > 0 {
   254  				isRelayMetaOutdated = true
   255  			}
   256  		}
   257  
   258  		if isRelayMetaOutdated {
   259  			uuidWithSuffix := r.meta.SubDir() // only change after switch
   260  			err2 = r.PurgeRelayDir()
   261  			if err2 != nil {
   262  				return err2
   263  			}
   264  			r.ResetMeta()
   265  
   266  			uuid, _, err3 := utils.ParseRelaySubDir(uuidWithSuffix)
   267  			if err3 != nil {
   268  				r.logger.Error("parse suffix for UUID when relay meta outdated", zap.String("UUID", uuidWithSuffix), zap.Error(err))
   269  				return err3
   270  			}
   271  
   272  			pos := &mysql.Position{Name: neededBinlogName, Pos: binlog.MinPosition.Pos}
   273  			err = r.meta.AddDir(uuid, pos, neededBinlogGset, r.cfg.UUIDSuffix)
   274  			if err != nil {
   275  				return err
   276  			}
   277  			err = r.meta.Load()
   278  			if err != nil {
   279  				return err
   280  			}
   281  		}
   282  	}
   283  
   284  	reader2, err := r.setUpReader(ctx)
   285  	if err != nil {
   286  		return err
   287  	}
   288  	defer func() {
   289  		if reader2 != nil {
   290  			err = reader2.Close()
   291  			if err != nil {
   292  				r.logger.Error("fail to close binlog event reader", zap.Error(err))
   293  			}
   294  		}
   295  	}()
   296  
   297  	uuid, pos := r.meta.Pos()
   298  	r.writer.Init(uuid, pos.Name)
   299  	r.logger.Info("started underlying writer", zap.String("UUID", uuid), zap.String("filename", pos.Name))
   300  	defer func() {
   301  		err = r.writer.Close()
   302  		if err != nil {
   303  			r.logger.Error("fail to close binlog event writer", zap.Error(err))
   304  		}
   305  	}()
   306  
   307  	readerRetry, err := NewReaderRetry(r.cfg.ReaderRetry)
   308  	if err != nil {
   309  		return err
   310  	}
   311  
   312  	go r.doIntervalOps(ctx)
   313  
   314  	// handles binlog events with retry mechanism.
   315  	// it only do the retry for some binlog reader error now.
   316  	for {
   317  		err = r.handleEvents(ctx, reader2, parser2)
   318  		if err == nil {
   319  			return nil
   320  		} else if !readerRetry.Check(ctx, err) {
   321  			return err
   322  		}
   323  
   324  		r.logger.Warn("receive retryable error for binlog reader", log.ShortError(err))
   325  		err = reader2.Close() // close the previous reader
   326  		if err != nil {
   327  			r.logger.Error("fail to close binlog event reader", zap.Error(err))
   328  		}
   329  		reader2, err = r.setUpReader(ctx) // setup a new one
   330  		if err != nil {
   331  			return err
   332  		}
   333  		r.logger.Info("retrying to read binlog")
   334  	}
   335  }
   336  
   337  // PurgeRelayDir implements the dm.Unit interface.
   338  func (r *Relay) PurgeRelayDir() error {
   339  	dir := r.cfg.RelayDir
   340  	d, err := os.Open(dir)
   341  	r.logger.Info("will try purge whole relay dir for new relay log", zap.String("relayDir", dir))
   342  	// fail to open dir, return directly
   343  	if err != nil {
   344  		if err == os.ErrNotExist {
   345  			return nil
   346  		}
   347  		return err
   348  	}
   349  	defer d.Close()
   350  	names, err := d.Readdirnames(-1)
   351  	if err != nil {
   352  		return err
   353  	}
   354  	for _, name := range names {
   355  		err = os.RemoveAll(filepath.Join(dir, name))
   356  		if err != nil {
   357  			return err
   358  		}
   359  	}
   360  	r.logger.Info("relay dir is purged to be ready for new relay log", zap.String("relayDir", dir))
   361  	return nil
   362  }
   363  
   364  // tryRecoverLatestFile tries to recover latest relay log file with corrupt/incomplete binlog events/transactions.
   365  func (r *Relay) tryRecoverLatestFile(ctx context.Context, parser2 *parser.Parser) error {
   366  	var (
   367  		uuid, latestPos = r.meta.Pos()
   368  		_, latestGTID   = r.meta.GTID()
   369  	)
   370  
   371  	if latestPos.Compare(minCheckpoint) <= 0 {
   372  		r.logger.Warn("no relay log file need to recover", zap.Stringer("position", latestPos), log.WrapStringerField("gtid set", latestGTID))
   373  		return nil
   374  	}
   375  
   376  	// setup a special writer to do the recovering
   377  	binlogDir := r.meta.Dir()
   378  	r.logger.Info("started recover", zap.String("binlog dir", binlogDir), zap.String("filename", latestPos.Name))
   379  
   380  	// NOTE: recover a relay log file with too many binlog events may take a little long time.
   381  	result, err := r.doRecovering(ctx, r.meta.Dir(), latestPos.Name, parser2)
   382  	if err == nil {
   383  		relayLogHasMore := result.LatestPos.Compare(latestPos) > 0 ||
   384  			(result.LatestGTIDs != nil && !result.LatestGTIDs.Equal(latestGTID) && result.LatestGTIDs.Contain(latestGTID))
   385  
   386  		if result.Truncated || relayLogHasMore {
   387  			r.logger.Warn("relay log file recovered",
   388  				zap.Stringer("from position", latestPos), zap.Stringer("to position", result.LatestPos), log.WrapStringerField("from GTID set", latestGTID), log.WrapStringerField("to GTID set", result.LatestGTIDs))
   389  
   390  			if result.LatestGTIDs != nil {
   391  				dbConn, err2 := r.db.GetBaseConn(ctx)
   392  				if err2 != nil {
   393  					return err2
   394  				}
   395  				defer r.db.CloseConnWithoutErr(dbConn)
   396  				result.LatestGTIDs, err2 = conn.AddGSetWithPurged(ctx, result.LatestGTIDs, dbConn)
   397  				if err2 != nil {
   398  					return err2
   399  				}
   400  				latestGTID, err2 = conn.AddGSetWithPurged(ctx, latestGTID, dbConn)
   401  				if err2 != nil {
   402  					return err2
   403  				}
   404  			}
   405  
   406  			if result.LatestGTIDs != nil && !result.LatestGTIDs.Equal(latestGTID) {
   407  				if result.LatestGTIDs.Contain(latestGTID) {
   408  					r.logger.Warn("some GTIDs are missing in the meta data, this is usually due to the process was interrupted while writing the meta data. force to update GTIDs",
   409  						log.WrapStringerField("from GTID set", latestGTID), log.WrapStringerField("to GTID set", result.LatestGTIDs))
   410  				}
   411  				latestGTID = result.LatestGTIDs.Clone()
   412  			}
   413  
   414  			err = r.SaveMeta(result.LatestPos, latestGTID)
   415  			if err != nil {
   416  				return terror.Annotatef(err, "save position %s, GTID sets %v after recovered", result.LatestPos, result.LatestGTIDs)
   417  			}
   418  		}
   419  	}
   420  	return terror.Annotatef(err, "recover for UUID %s with relay dir %s, filename %s", uuid, binlogDir, latestPos.Name)
   421  }
   422  
   423  // recoverResult represents a result for a binlog recover operation.
   424  type recoverResult struct {
   425  	// if truncate trailing incomplete events during recovering in relay log
   426  	Truncated bool
   427  	// the latest binlog position after recover operation has done.
   428  	LatestPos mysql.Position
   429  	// the latest binlog GTID set after recover operation has done.
   430  	LatestGTIDs mysql.GTIDSet
   431  }
   432  
   433  // doRecovering tries to recover the current binlog file.
   434  // 1. read events from the file
   435  // 2.
   436  //
   437  //	a. update the position with the event's position if the transaction finished
   438  //	b. update the GTID set with the event's GTID if the transaction finished
   439  //
   440  // 3. truncate any incomplete events/transactions
   441  // now, we think a transaction finished if we received a XIDEvent or DDL in QueryEvent
   442  // NOTE: handle cases when file size > 4GB.
   443  func (r *Relay) doRecovering(ctx context.Context, binlogDir, filename string, parser *parser.Parser) (recoverResult, error) {
   444  	fullName := filepath.Join(binlogDir, filename)
   445  	fs, err := os.Stat(fullName)
   446  	if (err != nil && os.IsNotExist(err)) || (err == nil && len(filename) == 0) {
   447  		return recoverResult{}, nil // no file need to recover
   448  	} else if err != nil {
   449  		return recoverResult{}, terror.ErrRelayWriterGetFileStat.Delegate(err, fullName)
   450  	}
   451  
   452  	// get latest pos/GTID set for all completed transactions from the file
   453  	latestPos, latestGTIDs, err := getTxnPosGTIDs(ctx, fullName, parser)
   454  	if err != nil {
   455  		return recoverResult{}, terror.Annotatef(err, "get latest pos/GTID set from %s", fullName)
   456  	}
   457  
   458  	// mock file truncated by recover
   459  	failpoint.Inject("MockRecoverRelayWriter", func() {
   460  		r.logger.Info("mock recover relay writer")
   461  		failpoint.Goto("bypass")
   462  	})
   463  
   464  	// in most cases, we think the file is fine, so compare the size is simpler.
   465  	if fs.Size() == latestPos {
   466  		return recoverResult{
   467  			Truncated:   false,
   468  			LatestPos:   mysql.Position{Name: filename, Pos: uint32(latestPos)},
   469  			LatestGTIDs: latestGTIDs,
   470  		}, nil
   471  	} else if fs.Size() < latestPos {
   472  		return recoverResult{}, terror.ErrRelayWriterLatestPosGTFileSize.Generate(latestPos, fs.Size())
   473  	}
   474  
   475  	failpoint.Label("bypass")
   476  
   477  	// truncate the file
   478  	f, err := os.OpenFile(fullName, os.O_WRONLY, 0o600)
   479  	if err != nil {
   480  		return recoverResult{}, terror.Annotatef(terror.ErrRelayWriterFileOperate.New(err.Error()), "open %s", fullName)
   481  	}
   482  	defer f.Close()
   483  	err = f.Truncate(latestPos)
   484  	if err != nil {
   485  		return recoverResult{}, terror.Annotatef(terror.ErrRelayWriterFileOperate.New(err.Error()), "truncate %s to %d", fullName, latestPos)
   486  	}
   487  
   488  	return recoverResult{
   489  		Truncated:   true,
   490  		LatestPos:   mysql.Position{Name: filename, Pos: uint32(latestPos)},
   491  		LatestGTIDs: latestGTIDs,
   492  	}, nil
   493  }
   494  
   495  const (
   496  	ignoreReasonHeartbeat      = "heartbeat event"
   497  	ignoreReasonArtificialFlag = "artificial flag (0x0020) set"
   498  )
   499  
   500  type preprocessResult struct {
   501  	Ignore       bool          // whether the event should be ignored
   502  	IgnoreReason string        // why the transformer ignore the event
   503  	LogPos       uint32        // binlog event's End_log_pos or Position in RotateEvent
   504  	NextLogName  string        // next binlog filename, only valid for RotateEvent
   505  	GTIDSet      mysql.GTIDSet // GTIDSet got from QueryEvent and XIDEvent when RawModeEnabled not true
   506  	CanSaveGTID  bool          // whether can save GTID into meta, true for DDL query and XIDEvent
   507  }
   508  
   509  func (r *Relay) preprocessEvent(e *replication.BinlogEvent, parser2 *parser.Parser) preprocessResult {
   510  	result := preprocessResult{
   511  		LogPos: e.Header.LogPos,
   512  	}
   513  
   514  	switch ev := e.Event.(type) {
   515  	case *replication.PreviousGTIDsEvent:
   516  		result.CanSaveGTID = true
   517  	case *replication.MariadbGTIDListEvent:
   518  		result.CanSaveGTID = true
   519  	case *replication.RotateEvent:
   520  		// NOTE: we need to get the first binlog filename from fake RotateEvent when using auto position
   521  		result.LogPos = uint32(ev.Position)         // next event's position
   522  		result.NextLogName = string(ev.NextLogName) // for RotateEvent, update binlog name
   523  	case *replication.QueryEvent:
   524  		// when RawModeEnabled not true, QueryEvent will be parsed.
   525  		if parserpkg.CheckIsDDL(string(ev.Query), parser2) {
   526  			// we only update/save GTID for DDL/XID event
   527  			// if the query is something like `BEGIN`, we do not update/save GTID.
   528  			result.GTIDSet = ev.GSet
   529  			result.CanSaveGTID = true
   530  		}
   531  	case *replication.XIDEvent:
   532  		// when RawModeEnabled not true, XIDEvent will be parsed.
   533  		result.GTIDSet = ev.GSet
   534  		result.CanSaveGTID = true // need save GTID for XID
   535  	case *replication.GenericEvent:
   536  		// handle some un-parsed events
   537  		if e.Header.EventType == replication.HEARTBEAT_EVENT {
   538  			// ignore artificial heartbeat event
   539  			// ref: https://dev.mysql.com/doc/internals/en/heartbeat-event.html
   540  			result.Ignore = true
   541  			result.IgnoreReason = ignoreReasonHeartbeat
   542  		}
   543  	default:
   544  		if e.Header.Flags&replication.LOG_EVENT_ARTIFICIAL_F != 0 {
   545  			// ignore events with LOG_EVENT_ARTIFICIAL_F flag(0x0020) set
   546  			// ref: https://dev.mysql.com/doc/internals/en/binlog-event-flag.html
   547  			result.Ignore = true
   548  			result.IgnoreReason = ignoreReasonArtificialFlag
   549  		}
   550  	}
   551  	return result
   552  }
   553  
   554  // handleEvents handles binlog events, including:
   555  //  1. read events from upstream
   556  //  2. transform events
   557  //  3. write events into relay log files
   558  //  4. update metadata if needed.
   559  func (r *Relay) handleEvents(
   560  	ctx context.Context,
   561  	reader2 Reader,
   562  	parser2 *parser.Parser,
   563  ) error {
   564  	var (
   565  		_, lastPos  = r.meta.Pos()
   566  		_, lastGTID = r.meta.GTID()
   567  		err         error
   568  		eventIndex  int // only for test
   569  	)
   570  	if lastGTID == nil {
   571  		if lastGTID, err = gtid.ParserGTID(r.cfg.Flavor, ""); err != nil {
   572  			return err
   573  		}
   574  	}
   575  
   576  	firstEvent := true
   577  	relayPosGauge := relayLogPosGauge.WithLabelValues("relay")
   578  	relayFileGauge := relayLogFileGauge.WithLabelValues("relay")
   579  	for {
   580  		// 1. read events from upstream server
   581  		readTimer := time.Now()
   582  		rResult, err := reader2.GetEvent(ctx)
   583  
   584  		failpoint.Inject("RelayGetEventFailed", func() {
   585  			err = errors.New("RelayGetEventFailed")
   586  		})
   587  		failpoint.Inject("RelayGetEventFailedAt", func(v failpoint.Value) {
   588  			if intVal, ok := v.(int); ok && intVal == eventIndex {
   589  				err = errors.New("fail point triggered")
   590  				_, gtid := r.meta.GTID()
   591  				r.logger.Warn("failed to get event", zap.Int("event_index", eventIndex),
   592  					zap.Any("gtid", gtid), log.ShortError(err))
   593  				// wait backoff retry interval
   594  				time.Sleep(1 * time.Second)
   595  			}
   596  		})
   597  		if err != nil {
   598  			switch errors.Cause(err) {
   599  			case context.Canceled:
   600  				return nil
   601  			case replication.ErrChecksumMismatch:
   602  				relayLogDataCorruptionCounter.Inc()
   603  			case replication.ErrSyncClosed, replication.ErrNeedSyncAgain:
   604  				// do nothing, but the error will be returned
   605  			default:
   606  				if conn.IsErrBinlogPurged(err) {
   607  					// TODO: try auto fix GTID, and can support auto switching between upstream server later.
   608  					cfg := r.cfg.From
   609  					r.logger.Error("the requested binlog files have purged in the master server or the master server have switched, currently DM do no support to handle this error",
   610  						zap.String("db host", cfg.Host), zap.Int("db port", cfg.Port), zap.Stringer("last pos", lastPos), log.ShortError(err))
   611  					// log the status for debug
   612  					tctx := tcontext.NewContext(ctx, r.logger)
   613  					pos, gs, err2 := conn.GetPosAndGs(tctx, r.db, r.cfg.Flavor)
   614  					if err2 == nil {
   615  						r.logger.Info("current master status", zap.Stringer("position", pos), log.WrapStringerField("GTID sets", gs))
   616  					}
   617  				}
   618  				binlogReadErrorCounter.Inc()
   619  			}
   620  			return err
   621  		}
   622  
   623  		binlogReadDurationHistogram.Observe(time.Since(readTimer).Seconds())
   624  		failpoint.Inject("BlackholeReadBinlog", func(_ failpoint.Value) {
   625  			// r.logger.Info("back hole read binlog takes effects")
   626  			failpoint.Continue()
   627  		})
   628  
   629  		e := rResult.Event
   630  		r.logger.Debug("receive binlog event with header", zap.Reflect("header", e.Header))
   631  
   632  		// 2. transform events
   633  		transformTimer := time.Now()
   634  		tResult := r.preprocessEvent(e, parser2)
   635  		binlogTransformDurationHistogram.Observe(time.Since(transformTimer).Seconds())
   636  		if len(tResult.NextLogName) > 0 && tResult.NextLogName > lastPos.Name {
   637  			lastPos = mysql.Position{
   638  				Name: tResult.NextLogName,
   639  				Pos:  tResult.LogPos,
   640  			}
   641  			r.logger.Info("rotate event", zap.Stringer("position", lastPos))
   642  		}
   643  		if tResult.Ignore {
   644  			r.logger.Info("ignore event by transformer",
   645  				zap.Reflect("header", e.Header),
   646  				zap.String("reason", tResult.IgnoreReason))
   647  			continue
   648  		}
   649  
   650  		if _, ok := e.Event.(*replication.RotateEvent); ok && utils.IsFakeRotateEvent(e.Header) {
   651  			isNew, err2 := isNewServer(ctx, r.meta.SubDir(), r.db, r.cfg.Flavor)
   652  			// should start from the transaction beginning when switch to a new server
   653  			if err2 != nil {
   654  				return err2
   655  			}
   656  			// upstream database switch
   657  			// report an error, let outer logic handle it
   658  			// should start from the transaction beginning when switch to a new server
   659  			if isNew {
   660  				return terror.ErrRotateEventWithDifferentServerID.Generate()
   661  			}
   662  		}
   663  
   664  		if firstEvent {
   665  			// on the first event we got, which must be a fake rotate event, save and flush meta once to make sure
   666  			// meta file exists before binlog file exists(lastPos.Name cannot be empty now)
   667  			// when switch from A to B then back to A, meta's been assigned to minCheckpoint, since it's taken as a new server.
   668  			// and meta file is not created when relay resumed.
   669  			firstEvent = false
   670  			if err2 := r.saveAndFlushMeta(lastPos, lastGTID); err2 != nil {
   671  				return err2
   672  			}
   673  		}
   674  
   675  		// 3. save events into file
   676  		writeTimer := time.Now()
   677  		if ce := r.logger.Check(zap.DebugLevel, ""); ce != nil {
   678  			r.logger.Debug("writing binlog event", zap.Reflect("header", e.Header))
   679  		}
   680  		wResult, err := r.writer.WriteEvent(e)
   681  		if err != nil {
   682  			relayLogWriteErrorCounter.Inc()
   683  			return err
   684  		} else if wResult.Ignore {
   685  			r.logger.Info("ignore event by writer",
   686  				zap.Reflect("header", e.Header),
   687  				zap.String("reason", wResult.IgnoreReason))
   688  			r.tryUpdateActiveRelayLog(e, lastPos.Name) // even the event ignored we still need to try this update.
   689  			continue
   690  		}
   691  
   692  		r.notify(e)
   693  
   694  		relayLogWriteDurationHistogram.Observe(time.Since(writeTimer).Seconds())
   695  		r.tryUpdateActiveRelayLog(e, lastPos.Name) // wrote a event, try update the current active relay log.
   696  
   697  		// 4. update meta and metrics
   698  		needSavePos := tResult.CanSaveGTID
   699  		lastPos.Pos = tResult.LogPos
   700  		lastGTID = tResult.GTIDSet
   701  		if !r.cfg.EnableGTID {
   702  			// if go-mysql set RawModeEnabled to true
   703  			// then it will only parse FormatDescriptionEvent and RotateEvent
   704  			// then check `e.Event.(type)` for `QueryEvent` and `XIDEvent` will never be true
   705  			// so we need to update pos for all events
   706  			// and also save pos for all events
   707  			if e.Header.EventType != replication.ROTATE_EVENT {
   708  				lastPos.Pos = e.Header.LogPos // for RotateEvent, lastPos updated to the next binlog file's position.
   709  			}
   710  			needSavePos = true
   711  		}
   712  
   713  		relayLogWriteSizeHistogram.Observe(float64(e.Header.EventSize))
   714  		relayPosGauge.Set(float64(lastPos.Pos))
   715  		if e.Header.EventType == replication.FORMAT_DESCRIPTION_EVENT {
   716  			if index, err2 := utils.GetFilenameIndex(lastPos.Name); err2 != nil {
   717  				r.logger.Error("parse binlog file name", zap.String("file name", lastPos.Name), log.ShortError(err2))
   718  			} else {
   719  				relayFileGauge.Set(float64(index))
   720  			}
   721  		}
   722  
   723  		if needSavePos {
   724  			err = r.SaveMeta(lastPos, lastGTID)
   725  			if err != nil {
   726  				return terror.Annotatef(err, "save position %s, GTID sets %v into meta", lastPos, lastGTID)
   727  			}
   728  			eventIndex = 0
   729  		} else {
   730  			eventIndex++
   731  		}
   732  		if tResult.NextLogName != "" && !utils.IsFakeRotateEvent(e.Header) {
   733  			// if the binlog is rotated, we need to save and flush the next binlog filename to meta
   734  			lastPos.Name = tResult.NextLogName
   735  			if err := r.saveAndFlushMeta(lastPos, lastGTID); err != nil {
   736  				return err
   737  			}
   738  		}
   739  	}
   740  }
   741  
   742  func (r *Relay) saveAndFlushMeta(lastPos mysql.Position, lastGTID mysql.GTIDSet) error {
   743  	if err := r.SaveMeta(lastPos, lastGTID); err != nil {
   744  		return terror.Annotatef(err, "save position %s, GTID sets %v into meta", lastPos, lastGTID)
   745  	}
   746  	return r.FlushMeta()
   747  }
   748  
   749  // tryUpdateActiveRelayLog tries to update current active relay log file.
   750  // we should to update after received/wrote a FormatDescriptionEvent because it means switched to a new relay log file.
   751  // NOTE: we can refactor active (writer/read) relay log mechanism later.
   752  func (r *Relay) tryUpdateActiveRelayLog(e *replication.BinlogEvent, filename string) {
   753  	if e.Header.EventType == replication.FORMAT_DESCRIPTION_EVENT {
   754  		r.setActiveRelayLog(filename)
   755  		r.logger.Info("change the active relay log file", zap.String("file name", filename))
   756  	}
   757  }
   758  
   759  // reSetupMeta re-setup the metadata when switching to a new upstream master server.
   760  func (r *Relay) reSetupMeta(ctx context.Context) error {
   761  	uuid, err := conn.GetServerUUID(tcontext.NewContext(ctx, log.L()), r.db, r.cfg.Flavor)
   762  	if err != nil {
   763  		return err
   764  	}
   765  
   766  	var newPos *mysql.Position
   767  	var newGset mysql.GTIDSet
   768  	var newUUIDSuffix int
   769  	if r.cfg.UUIDSuffix > 0 {
   770  		// if bound or rebound to a source, clear all relay log and meta
   771  		if err = r.PurgeRelayDir(); err != nil {
   772  			return err
   773  		}
   774  		r.ResetMeta()
   775  
   776  		newUUIDSuffix = r.cfg.UUIDSuffix
   777  		// reset the UUIDSuffix
   778  		r.cfg.UUIDSuffix = 0
   779  
   780  		if len(r.cfg.BinLogName) != 0 {
   781  			newPos = &mysql.Position{Name: r.cfg.BinLogName, Pos: binlog.MinPosition.Pos}
   782  		}
   783  		if len(r.cfg.BinlogGTID) != 0 {
   784  			newGset, err = gtid.ParserGTID(r.cfg.Flavor, r.cfg.BinlogGTID)
   785  			if err != nil {
   786  				return err
   787  			}
   788  		}
   789  	}
   790  	err = r.meta.AddDir(uuid, newPos, newGset, newUUIDSuffix)
   791  	if err != nil {
   792  		return err
   793  	}
   794  	err = r.meta.Load()
   795  	if err != nil {
   796  		return err
   797  	}
   798  
   799  	var latestPosName, latestGTIDStr string
   800  	if (r.cfg.EnableGTID && len(r.cfg.BinlogGTID) == 0) || (!r.cfg.EnableGTID && len(r.cfg.BinLogName) == 0) {
   801  		tctx := tcontext.NewContext(ctx, r.logger)
   802  		latestPos, latestGTID, err2 := conn.GetPosAndGs(tctx, r.db, r.cfg.Flavor)
   803  		if err2 != nil {
   804  			return err2
   805  		}
   806  		latestPosName = latestPos.Name
   807  		latestGTIDStr = latestGTID.String()
   808  	}
   809  
   810  	// try adjust meta with start pos from config
   811  	_, err = r.meta.AdjustWithStartPos(r.cfg.BinLogName, r.cfg.BinlogGTID, r.cfg.EnableGTID, latestPosName, latestGTIDStr)
   812  	if err != nil {
   813  		return err
   814  	}
   815  
   816  	_, pos := r.meta.Pos()
   817  	_, gs := r.meta.GTID()
   818  	if r.cfg.EnableGTID {
   819  		// Adjust given gtid
   820  		// This means we always pull the binlog from the beginning of file.
   821  		gs, err = r.adjustGTID(ctx, gs)
   822  		if err != nil {
   823  			return terror.Annotate(err, "fail to adjust gtid for relay")
   824  		}
   825  		err = r.SaveMeta(pos, gs)
   826  		if err != nil {
   827  			return err
   828  		}
   829  	}
   830  
   831  	r.logger.Info("adjusted meta to start pos", zap.Any("start pos", pos), zap.Stringer("start pos's binlog gtid", gs))
   832  	r.updateMetricsRelaySubDirIndex()
   833  	r.logger.Info("resetup meta", zap.String("uuid", uuid))
   834  
   835  	return nil
   836  }
   837  
   838  func (r *Relay) updateMetricsRelaySubDirIndex() {
   839  	// when switching master server, update sub dir index metrics
   840  	node := r.masterNode()
   841  	uuidWithSuffix := r.meta.SubDir() // only change after switch
   842  	_, suffix, err := utils.ParseRelaySubDir(uuidWithSuffix)
   843  	if err != nil {
   844  		r.logger.Error("parse suffix for UUID", zap.String("UUID", uuidWithSuffix), zap.Error(err))
   845  		return
   846  	}
   847  	relaySubDirIndex.WithLabelValues(node, uuidWithSuffix).Set(float64(suffix))
   848  }
   849  
   850  func (r *Relay) doIntervalOps(ctx context.Context) {
   851  	flushTicker := time.NewTicker(flushMetaInterval)
   852  	defer flushTicker.Stop()
   853  	masterStatusTicker := time.NewTicker(getMasterStatusInterval)
   854  	defer masterStatusTicker.Stop()
   855  	trimUUIDsTicker := time.NewTicker(trimUUIDsInterval)
   856  	defer trimUUIDsTicker.Stop()
   857  
   858  	for {
   859  		select {
   860  		case <-flushTicker.C:
   861  			r.RLock()
   862  			if r.closed.Load() {
   863  				r.RUnlock()
   864  				return
   865  			}
   866  			if r.meta.Dirty() {
   867  				err := r.FlushMeta()
   868  				if err != nil {
   869  					r.logger.Error("flush meta", zap.Error(err))
   870  				} else {
   871  					r.logger.Info("flush meta finished", zap.Stringer("meta", r.meta))
   872  				}
   873  			}
   874  			r.RUnlock()
   875  		case <-masterStatusTicker.C:
   876  			r.RLock()
   877  			if r.closed.Load() {
   878  				r.RUnlock()
   879  				return
   880  			}
   881  			ctx2, cancel2 := context.WithTimeout(ctx, conn.DefaultDBTimeout)
   882  			tctx := tcontext.NewContext(ctx2, r.logger)
   883  			pos, _, err := conn.GetPosAndGs(tctx, r.db, r.cfg.Flavor)
   884  			cancel2()
   885  			if err != nil {
   886  				r.logger.Warn("get master status", zap.Error(err))
   887  				r.RUnlock()
   888  				continue
   889  			}
   890  			index, err := utils.GetFilenameIndex(pos.Name)
   891  			if err != nil {
   892  				r.logger.Error("parse binlog file name", zap.String("file name", pos.Name), log.ShortError(err))
   893  				r.RUnlock()
   894  				continue
   895  			}
   896  			relayLogFileGauge.WithLabelValues("master").Set(float64(index))
   897  			relayLogPosGauge.WithLabelValues("master").Set(float64(pos.Pos))
   898  			r.RUnlock()
   899  		case <-trimUUIDsTicker.C:
   900  			r.RLock()
   901  			if r.closed.Load() {
   902  				r.RUnlock()
   903  				return
   904  			}
   905  			trimmed, err := r.meta.TrimUUIDIndexFile()
   906  			if err != nil {
   907  				r.logger.Error("trim UUIDs", zap.Error(err))
   908  			} else if len(trimmed) > 0 {
   909  				r.logger.Info("trim UUIDs", zap.String("UUIDs", strings.Join(trimmed, ";")))
   910  			}
   911  			r.RUnlock()
   912  		case <-ctx.Done():
   913  			return
   914  		}
   915  	}
   916  }
   917  
   918  // setUpReader setups the underlying reader used to read binlog events from the upstream master server.
   919  func (r *Relay) setUpReader(ctx context.Context) (Reader, error) {
   920  	ctx2, cancel := context.WithTimeout(ctx, conn.DefaultDBTimeout)
   921  	defer cancel()
   922  
   923  	// always use a new random serverID
   924  	randomServerID, err := conn.GetRandomServerID(tcontext.NewContext(ctx2, log.L()), r.db)
   925  	if err != nil {
   926  		// should never happened unless the master has too many slave
   927  		return nil, terror.Annotate(err, "fail to get random server id for relay reader")
   928  	}
   929  	r.syncerCfg.ServerID = randomServerID
   930  	r.cfg.ServerID = randomServerID
   931  
   932  	uuid, pos := r.meta.Pos()
   933  	_, gs := r.meta.GTID()
   934  	cfg := &RConfig{
   935  		SyncConfig: r.syncerCfg,
   936  		Pos:        pos,
   937  		GTIDs:      gs,
   938  		MasterID:   r.masterNode(),
   939  		EnableGTID: r.cfg.EnableGTID,
   940  	}
   941  
   942  	reader2 := NewUpstreamReader(cfg)
   943  	err = reader2.Start()
   944  	if err != nil {
   945  		// do not log the whole config to protect the password in `SyncConfig`.
   946  		// and other config items should already logged before or included in `err`.
   947  		return nil, terror.Annotatef(err, "start reader for UUID %s", uuid)
   948  	}
   949  
   950  	r.logger.Info("started underlying reader", zap.String("UUID", uuid))
   951  	return reader2, nil
   952  }
   953  
   954  func (r *Relay) masterNode() string {
   955  	return fmt.Sprintf("%s:%d", r.cfg.From.Host, r.cfg.From.Port)
   956  }
   957  
   958  // IsClosed tells whether Relay unit is closed or not.
   959  func (r *Relay) IsClosed() bool {
   960  	return r.closed.Load()
   961  }
   962  
   963  // SaveMeta save relay meta and update meta in RelayLogInfo.
   964  func (r *Relay) SaveMeta(pos mysql.Position, gset mysql.GTIDSet) error {
   965  	return r.meta.Save(pos, gset)
   966  }
   967  
   968  // ResetMeta reset relay meta.
   969  func (r *Relay) ResetMeta() {
   970  	r.Lock()
   971  	defer r.Unlock()
   972  	r.meta = NewLocalMeta(r.cfg.Flavor, r.cfg.RelayDir)
   973  }
   974  
   975  // FlushMeta flush relay meta.
   976  func (r *Relay) FlushMeta() error {
   977  	return r.meta.Flush()
   978  }
   979  
   980  // stopSync stops syncing, now it used by Close and Pause.
   981  func (r *Relay) stopSync() {
   982  	if err := r.FlushMeta(); err != nil {
   983  		r.logger.Error("flush checkpoint", zap.Error(err))
   984  	}
   985  }
   986  
   987  func (r *Relay) closeDB() {
   988  	if r.db != nil {
   989  		r.db.Close()
   990  		r.db = nil
   991  	}
   992  }
   993  
   994  // Close implements the dm.Unit interface.
   995  func (r *Relay) Close() {
   996  	r.Lock()
   997  	defer r.Unlock()
   998  	if r.closed.Load() {
   999  		return
  1000  	}
  1001  	r.logger.Info("relay unit is closing")
  1002  
  1003  	r.stopSync()
  1004  
  1005  	r.closeDB()
  1006  
  1007  	r.closed.Store(true)
  1008  	r.logger.Info("relay unit closed")
  1009  }
  1010  
  1011  func (r *Relay) IsActive(uuid, filename string) (bool, int64) {
  1012  	return r.writer.IsActive(uuid, filename)
  1013  }
  1014  
  1015  // Status implements the dm.Unit interface.
  1016  func (r *Relay) Status(sourceStatus *binlog.SourceStatus) interface{} {
  1017  	r.RLock()
  1018  	defer r.RUnlock()
  1019  	uuid, relayPos := r.meta.Pos()
  1020  
  1021  	rs := &pb.RelayStatus{
  1022  		RelaySubDir: uuid,
  1023  		RelayBinlog: relayPos.String(),
  1024  	}
  1025  	if _, relayGTIDSet := r.meta.GTID(); relayGTIDSet != nil {
  1026  		rs.RelayBinlogGtid = relayGTIDSet.String()
  1027  	}
  1028  
  1029  	if sourceStatus != nil {
  1030  		masterPos, masterGTID := sourceStatus.Location.Position, sourceStatus.Location.GetGTID()
  1031  		rs.MasterBinlog = masterPos.String()
  1032  		if masterGTID != nil { // masterGTID maybe a nil interface
  1033  			rs.MasterBinlogGtid = masterGTID.String()
  1034  		}
  1035  
  1036  		if r.cfg.EnableGTID {
  1037  			// rely on sorted GTID set when String()
  1038  			rs.RelayCatchUpMaster = rs.MasterBinlogGtid == rs.RelayBinlogGtid
  1039  		} else {
  1040  			rs.RelayCatchUpMaster = masterPos.Compare(relayPos) == 0
  1041  		}
  1042  	}
  1043  
  1044  	return rs
  1045  }
  1046  
  1047  // Error implements the dm.Unit interface.
  1048  func (r *Relay) Error() interface{} {
  1049  	return &pb.RelayError{}
  1050  }
  1051  
  1052  // Type implements the dm.Unit interface.
  1053  func (r *Relay) Type() pb.UnitType {
  1054  	return pb.UnitType_Relay
  1055  }
  1056  
  1057  // IsFreshTask implements Unit.IsFreshTask.
  1058  func (r *Relay) IsFreshTask() (bool, error) {
  1059  	return true, nil
  1060  }
  1061  
  1062  // Pause pauses the process, it can be resumed later.
  1063  func (r *Relay) Pause() {
  1064  	if r.IsClosed() {
  1065  		r.logger.Warn("try to pause, but already closed")
  1066  		return
  1067  	}
  1068  
  1069  	r.stopSync()
  1070  }
  1071  
  1072  // Resume resumes the paused process.
  1073  func (r *Relay) Resume(ctx context.Context, pr chan pb.ProcessResult) {
  1074  	// do nothing now, re-process called `Process` from outer directly
  1075  }
  1076  
  1077  // Update implements Unit.Update.
  1078  func (r *Relay) Update(cfg *config.SubTaskConfig) error {
  1079  	// not support update configuration now
  1080  	return nil
  1081  }
  1082  
  1083  // Reload updates config.
  1084  func (r *Relay) Reload(newCfg *Config) error {
  1085  	r.Lock()
  1086  	defer r.Unlock()
  1087  	r.logger.Info("relay unit is updating")
  1088  
  1089  	// Update From
  1090  	r.cfg.From = newCfg.From
  1091  
  1092  	// Update Charset
  1093  	r.cfg.Charset = newCfg.Charset
  1094  
  1095  	r.closeDB()
  1096  	if r.cfg.From.RawDBCfg == nil {
  1097  		r.cfg.From.RawDBCfg = dbconfig.DefaultRawDBConfig()
  1098  	}
  1099  	r.cfg.From.RawDBCfg.ReadTimeout = showStatusConnectionTimeout
  1100  	db, err := conn.GetUpstreamDB(&r.cfg.From)
  1101  	if err != nil {
  1102  		return terror.DBErrorAdapt(err, db.Scope, terror.ErrDBDriverError)
  1103  	}
  1104  	r.db = db
  1105  
  1106  	if err := r.setSyncConfig(); err != nil {
  1107  		return err
  1108  	}
  1109  
  1110  	r.logger.Info("relay unit is updated")
  1111  
  1112  	return nil
  1113  }
  1114  
  1115  // setActiveRelayLog sets or updates the current active relay log to file.
  1116  func (r *Relay) setActiveRelayLog(filename string) {
  1117  	uuid := r.meta.SubDir()
  1118  	_, suffix, _ := utils.ParseRelaySubDir(uuid)
  1119  	rli := &pkgstreamer.RelayLogInfo{
  1120  		TaskName:     fakeRelayTaskName,
  1121  		SubDir:       uuid,
  1122  		SubDirSuffix: suffix,
  1123  		Filename:     filename,
  1124  	}
  1125  	r.activeRelayLog.Lock()
  1126  	r.activeRelayLog.info = rli
  1127  	r.activeRelayLog.Unlock()
  1128  }
  1129  
  1130  // ActiveRelayLog returns the current active RelayLogInfo.
  1131  func (r *Relay) ActiveRelayLog() *pkgstreamer.RelayLogInfo {
  1132  	r.activeRelayLog.RLock()
  1133  	defer r.activeRelayLog.RUnlock()
  1134  	return r.activeRelayLog.info
  1135  }
  1136  
  1137  func (r *Relay) setSyncConfig() error {
  1138  	var tlsConfig *tls.Config
  1139  	var err error
  1140  	if r.cfg.From.Security != nil {
  1141  		if loadErr := r.cfg.From.Security.LoadTLSContent(); loadErr != nil {
  1142  			return terror.ErrCtlLoadTLSCfg.Delegate(loadErr)
  1143  		}
  1144  		tlsConfig, err = util.NewTLSConfig(
  1145  			util.WithCAContent(r.cfg.From.Security.SSLCABytes),
  1146  			util.WithCertAndKeyContent(r.cfg.From.Security.SSLCertBytes, r.cfg.From.Security.SSLKeyBytes),
  1147  			util.WithVerifyCommonName(r.cfg.From.Security.CertAllowedCN),
  1148  			util.WithMinTLSVersion(tls.VersionTLS10),
  1149  		)
  1150  		if err != nil {
  1151  			return terror.ErrConnInvalidTLSConfig.Delegate(err)
  1152  		}
  1153  	}
  1154  
  1155  	h, _ := os.Hostname()
  1156  	h = "dm-relay-" + h
  1157  	// https://github.com/mysql/mysql-server/blob/1bfe02bdad6604d54913c62614bde57a055c8332/include/my_hostname.h#L33-L42
  1158  	if len(h) > 60 {
  1159  		h = h[:60]
  1160  	}
  1161  	syncerCfg := replication.BinlogSyncerConfig{
  1162  		ServerID:  r.cfg.ServerID,
  1163  		Flavor:    r.cfg.Flavor,
  1164  		Host:      r.cfg.From.Host,
  1165  		Port:      uint16(r.cfg.From.Port),
  1166  		User:      r.cfg.From.User,
  1167  		Password:  r.cfg.From.Password,
  1168  		Charset:   r.cfg.Charset,
  1169  		TLSConfig: tlsConfig,
  1170  		Localhost: h,
  1171  	}
  1172  	common.SetDefaultReplicationCfg(&syncerCfg, common.MaxBinlogSyncerReconnect)
  1173  
  1174  	if !r.cfg.EnableGTID {
  1175  		syncerCfg.RawModeEnabled = true
  1176  	}
  1177  
  1178  	if r.cfg.Flavor == mysql.MariaDBFlavor {
  1179  		syncerCfg.DumpCommandFlag |= dumpFlagSendAnnotateRowsEvent
  1180  	}
  1181  
  1182  	r.syncerCfg = syncerCfg
  1183  	return nil
  1184  }
  1185  
  1186  // AdjustGTID implements Relay.AdjustGTID
  1187  // starting sync at returned gset will wholly fetch a binlog from beginning of the file.
  1188  func (r *Relay) adjustGTID(ctx context.Context, gset mysql.GTIDSet) (mysql.GTIDSet, error) {
  1189  	// setup a TCP binlog reader (because no relay can be used when upgrading).
  1190  	syncCfg := r.syncerCfg
  1191  	// always use a new random serverID
  1192  	randomServerID, err := conn.GetRandomServerID(tcontext.NewContext(ctx, log.L()), r.db)
  1193  	if err != nil {
  1194  		return nil, terror.Annotate(err, "fail to get random server id when relay adjust gtid")
  1195  	}
  1196  	syncCfg.ServerID = randomServerID
  1197  	tcpReader := binlogReader.NewTCPReader(syncCfg)
  1198  	resultGs, err := binlogReader.GetPreviousGTIDFromGTIDSet(ctx, tcpReader, gset)
  1199  	if err != nil {
  1200  		return nil, err
  1201  	}
  1202  
  1203  	dbConn, err2 := r.db.GetBaseConn(ctx)
  1204  	if err2 != nil {
  1205  		return nil, err2
  1206  	}
  1207  	defer r.db.CloseConnWithoutErr(dbConn)
  1208  	return conn.AddGSetWithPurged(ctx, resultGs, dbConn)
  1209  }
  1210  
  1211  func (r *Relay) notify(e *replication.BinlogEvent) {
  1212  	r.RLock()
  1213  	defer r.RUnlock()
  1214  	for el := range r.listeners {
  1215  		el.OnEvent(e)
  1216  	}
  1217  }
  1218  
  1219  func (r *Relay) NewReader(logger log.Logger, cfg *BinlogReaderConfig) *BinlogReader {
  1220  	return newBinlogReader(logger, cfg, r)
  1221  }
  1222  
  1223  // RegisterListener implements Process.RegisterListener.
  1224  func (r *Relay) RegisterListener(el Listener) {
  1225  	r.Lock()
  1226  	defer r.Unlock()
  1227  	r.listeners[el] = struct{}{}
  1228  }
  1229  
  1230  // UnRegisterListener implements Process.UnRegisterListener.
  1231  func (r *Relay) UnRegisterListener(el Listener) {
  1232  	r.Lock()
  1233  	defer r.Unlock()
  1234  	delete(r.listeners, el)
  1235  }