github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/binlogreplication/binlog_replica_controller.go (about)

     1  // Copyright 2022 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package binlogreplication
    16  
    17  import (
    18  	"fmt"
    19  	"strings"
    20  	"sync"
    21  	"time"
    22  
    23  	"github.com/dolthub/dolt/go/libraries/doltcore/sqlserver"
    24  
    25  	"github.com/dolthub/go-mysql-server/sql"
    26  	"github.com/dolthub/go-mysql-server/sql/binlogreplication"
    27  	"github.com/dolthub/go-mysql-server/sql/mysql_db"
    28  )
    29  
    30  var DoltBinlogReplicaController = newDoltBinlogReplicaController()
    31  
    32  // binlogApplierUser is the locked, super user account that is used to execute replicated SQL statements.
    33  // We cannot always assume the root account will exist, so we automatically create this account that is
    34  // specific to binlog replication and lock it so that it cannot be used to login.
    35  const binlogApplierUser = "dolt-binlog-applier"
    36  
    37  // ErrServerNotConfiguredAsReplica is returned when replication is started without enough configuration provided.
    38  var ErrServerNotConfiguredAsReplica = fmt.Errorf(
    39  	"server is not configured as a replica; fix with CHANGE REPLICATION SOURCE TO")
    40  
    41  // ErrEmptyHostname is returned when replication is started without a hostname configured.
    42  var ErrEmptyHostname = fmt.Errorf("fatal error: Invalid (empty) hostname when attempting to connect " +
    43  	"to the source server. Connection attempt terminated")
    44  
    45  // ErrEmptyUsername is returned when replication is started without a username configured.
    46  var ErrEmptyUsername = fmt.Errorf("fatal error: Invalid (empty) username when attempting to connect " +
    47  	"to the source server. Connection attempt terminated")
    48  
    49  // ErrReplicationStopped is an internal error that is not returned to users, and signals that STOP REPLICA was called.
    50  var ErrReplicationStopped = fmt.Errorf("replication stop requested")
    51  
    52  // doltBinlogReplicaController implements the BinlogReplicaController interface for a Dolt database in order to
    53  // provide support for a Dolt server to be a replica of a MySQL primary.
    54  //
    55  // This type is used concurrently – multiple sessions on the DB can call this interface concurrently,
    56  // so all state that the controller tracks MUST be protected with a mutex.
    57  type doltBinlogReplicaController struct {
    58  	status  binlogreplication.ReplicaStatus
    59  	filters *filterConfiguration
    60  	applier *binlogReplicaApplier
    61  	ctx     *sql.Context
    62  
    63  	// statusMutex blocks concurrent access to the ReplicaStatus struct
    64  	statusMutex *sync.Mutex
    65  
    66  	// operationMutex blocks concurrent access to the START/STOP/RESET REPLICA operations
    67  	operationMutex *sync.Mutex
    68  }
    69  
    70  var _ binlogreplication.BinlogReplicaController = (*doltBinlogReplicaController)(nil)
    71  
    72  // newDoltBinlogReplicaController creates a new doltBinlogReplicaController instance.
    73  func newDoltBinlogReplicaController() *doltBinlogReplicaController {
    74  	controller := doltBinlogReplicaController{
    75  		filters:        newFilterConfiguration(),
    76  		statusMutex:    &sync.Mutex{},
    77  		operationMutex: &sync.Mutex{},
    78  	}
    79  	controller.status.ConnectRetry = 60
    80  	controller.status.SourceRetryCount = 86400
    81  	controller.status.AutoPosition = true
    82  	controller.status.ReplicaIoRunning = binlogreplication.ReplicaIoNotRunning
    83  	controller.status.ReplicaSqlRunning = binlogreplication.ReplicaSqlNotRunning
    84  	controller.applier = newBinlogReplicaApplier(controller.filters)
    85  	return &controller
    86  }
    87  
    88  // StartReplica implements the BinlogReplicaController interface.
    89  func (d *doltBinlogReplicaController) StartReplica(ctx *sql.Context) error {
    90  	d.operationMutex.Lock()
    91  	defer d.operationMutex.Unlock()
    92  
    93  	// START REPLICA may be called multiple times, but if replication is already running,
    94  	// it will log a warning and not start up new threads.
    95  	if d.applier.IsRunning() {
    96  		ctx.Warn(3083, "Replication thread(s) for channel '' are already running.")
    97  		return nil
    98  	}
    99  
   100  	if false {
   101  		// TODO: If the database is already configured for Dolt replication/clustering, then error out.
   102  		//       Add a (BATS?) test to cover this case
   103  		return fmt.Errorf("dolt replication already enabled; unable to use binlog replication with other replication modes. " +
   104  			"Disable Dolt replication first before starting binlog replication")
   105  	}
   106  
   107  	// If we aren't running in a sql-server context, it would be nice to return a helpful, Dolt-specific
   108  	// error message. Currently, this case would trigger an error from the GMS layer, so we can't give
   109  	// a specific error message about needing to run Dolt in sql-server mode yet.
   110  
   111  	_, err := loadReplicaServerId()
   112  	if err != nil {
   113  		return fmt.Errorf("unable to start replication: %s", err.Error())
   114  	}
   115  
   116  	configuration, err := loadReplicationConfiguration(ctx)
   117  	if err != nil {
   118  		return err
   119  	} else if configuration == nil {
   120  		return ErrServerNotConfiguredAsReplica
   121  	} else if configuration.Host == "" {
   122  		DoltBinlogReplicaController.setIoError(ERFatalReplicaError, ErrEmptyHostname.Error())
   123  		return ErrEmptyHostname
   124  	} else if configuration.User == "" {
   125  		DoltBinlogReplicaController.setIoError(ERFatalReplicaError, ErrEmptyUsername.Error())
   126  		return ErrEmptyUsername
   127  	}
   128  
   129  	if d.ctx == nil {
   130  		return fmt.Errorf("no execution context set for the replica controller")
   131  	}
   132  
   133  	err = d.configureReplicationUser(ctx)
   134  	if err != nil {
   135  		return err
   136  	}
   137  
   138  	// Set execution context's user to the binlog replication user
   139  	d.ctx.SetClient(sql.Client{
   140  		User:    binlogApplierUser,
   141  		Address: "localhost",
   142  	})
   143  
   144  	ctx.GetLogger().Info("starting binlog replication...")
   145  	d.applier.Go(d.ctx)
   146  	return nil
   147  }
   148  
   149  // configureReplicationUser creates or configures the super user account needed to apply replication
   150  // changes and execute DDL statements on the running server. If the account doesn't exist, it will be
   151  // created and locked to disable log ins, and if it does exist, but is missing super privs or is not
   152  // locked, it will be given super user privs and locked.
   153  func (d *doltBinlogReplicaController) configureReplicationUser(ctx *sql.Context) error {
   154  	server := sqlserver.GetRunningServer()
   155  	if server == nil {
   156  		return fmt.Errorf("unable to access a running SQL server")
   157  	}
   158  	mySQLDb := server.Engine.Analyzer.Catalog.MySQLDb
   159  	ed := mySQLDb.Editor()
   160  	defer ed.Close()
   161  
   162  	replicationUser := mySQLDb.GetUser(ed, binlogApplierUser, "localhost", false)
   163  	if replicationUser == nil {
   164  		// If the replication user doesn't exist yet, create it and lock it
   165  		mySQLDb.AddSuperUser(ed, binlogApplierUser, "localhost", "")
   166  		replicationUser := mySQLDb.GetUser(ed, binlogApplierUser, "localhost", false)
   167  		if replicationUser == nil {
   168  			return fmt.Errorf("unable to load replication user")
   169  		}
   170  		// Make sure this account is locked so that it cannot be used to log in
   171  		replicationUser.Locked = true
   172  		ed.PutUser(replicationUser)
   173  	} else if replicationUser.IsSuperUser == false || replicationUser.Locked == false {
   174  		// Fix the replication user if it has been modified
   175  		replicationUser.IsSuperUser = true
   176  		replicationUser.Locked = true
   177  		ed.PutUser(replicationUser)
   178  	}
   179  
   180  	return nil
   181  }
   182  
   183  // SetExecutionContext sets the unique |ctx| for the replica's applier to use when applying changes from binlog events
   184  // to a database. The applier cannot reuse any existing context, because it executes in a separate routine and would
   185  // cause race conditions.
   186  func (d *doltBinlogReplicaController) SetExecutionContext(ctx *sql.Context) {
   187  	d.ctx = ctx
   188  }
   189  
   190  // StopReplica implements the BinlogReplicaController interface.
   191  func (d *doltBinlogReplicaController) StopReplica(ctx *sql.Context) error {
   192  	if d.applier.IsRunning() == false {
   193  		ctx.Warn(3084, "Replication thread(s) for channel '' are already stopped.")
   194  		return nil
   195  	}
   196  
   197  	d.applier.stopReplicationChan <- struct{}{}
   198  
   199  	d.updateStatus(func(status *binlogreplication.ReplicaStatus) {
   200  		status.ReplicaIoRunning = binlogreplication.ReplicaIoNotRunning
   201  		status.ReplicaSqlRunning = binlogreplication.ReplicaSqlNotRunning
   202  	})
   203  
   204  	return nil
   205  }
   206  
   207  // SetReplicationSourceOptions implements the BinlogReplicaController interface.
   208  func (d *doltBinlogReplicaController) SetReplicationSourceOptions(ctx *sql.Context, options []binlogreplication.ReplicationOption) error {
   209  	replicaSourceInfo, err := loadReplicationConfiguration(ctx)
   210  	if err != nil {
   211  		return err
   212  	}
   213  
   214  	if replicaSourceInfo == nil {
   215  		replicaSourceInfo = mysql_db.NewReplicaSourceInfo()
   216  	}
   217  
   218  	for _, option := range options {
   219  		switch strings.ToUpper(option.Name) {
   220  		case "SOURCE_HOST":
   221  			value, err := getOptionValueAsString(option)
   222  			if err != nil {
   223  				return err
   224  			}
   225  			replicaSourceInfo.Host = value
   226  		case "SOURCE_USER":
   227  			value, err := getOptionValueAsString(option)
   228  			if err != nil {
   229  				return err
   230  			}
   231  			replicaSourceInfo.User = value
   232  		case "SOURCE_PASSWORD":
   233  			value, err := getOptionValueAsString(option)
   234  			if err != nil {
   235  				return err
   236  			}
   237  			replicaSourceInfo.Password = value
   238  		case "SOURCE_PORT":
   239  			intValue, err := getOptionValueAsInt(option)
   240  			if err != nil {
   241  				return err
   242  			}
   243  			replicaSourceInfo.Port = uint16(intValue)
   244  		case "SOURCE_CONNECT_RETRY":
   245  			intValue, err := getOptionValueAsInt(option)
   246  			if err != nil {
   247  				return err
   248  			}
   249  			replicaSourceInfo.ConnectRetryInterval = uint32(intValue)
   250  		case "SOURCE_RETRY_COUNT":
   251  			intValue, err := getOptionValueAsInt(option)
   252  			if err != nil {
   253  				return err
   254  			}
   255  			replicaSourceInfo.ConnectRetryCount = uint64(intValue)
   256  		default:
   257  			return fmt.Errorf("unknown replication source option: %s", option.Name)
   258  		}
   259  	}
   260  
   261  	// Persist the updated replica source configuration to disk
   262  	return persistReplicationConfiguration(ctx, replicaSourceInfo)
   263  }
   264  
   265  // SetReplicationFilterOptions implements the BinlogReplicaController interface.
   266  func (d *doltBinlogReplicaController) SetReplicationFilterOptions(_ *sql.Context, options []binlogreplication.ReplicationOption) error {
   267  	for _, option := range options {
   268  		switch strings.ToUpper(option.Name) {
   269  		case "REPLICATE_DO_TABLE":
   270  			value, err := getOptionValueAsTableNames(option)
   271  			if err != nil {
   272  				return err
   273  			}
   274  			err = d.filters.setDoTables(value)
   275  			if err != nil {
   276  				return err
   277  			}
   278  		case "REPLICATE_IGNORE_TABLE":
   279  			value, err := getOptionValueAsTableNames(option)
   280  			if err != nil {
   281  				return err
   282  			}
   283  			err = d.filters.setIgnoreTables(value)
   284  			if err != nil {
   285  				return err
   286  			}
   287  		default:
   288  			return fmt.Errorf("unsupported replication filter option: %s", option.Name)
   289  		}
   290  	}
   291  
   292  	// TODO: Consider persisting filter settings. MySQL doesn't actually do this... unlike CHANGE REPLICATION SOURCE,
   293  	//       CHANGE REPLICATION FILTER requires users to re-apply the filter options every time a server is restarted,
   294  	//       or to pass them to mysqld on the command line or in configuration. Since we don't want to force users
   295  	//       to specify these on the command line, we should consider diverging from MySQL behavior here slightly and
   296  	//       persisting the filter configuration options if customers want this.
   297  
   298  	return nil
   299  }
   300  
   301  // GetReplicaStatus implements the BinlogReplicaController interface
   302  func (d *doltBinlogReplicaController) GetReplicaStatus(ctx *sql.Context) (*binlogreplication.ReplicaStatus, error) {
   303  	replicaSourceInfo, err := loadReplicationConfiguration(ctx)
   304  	if err != nil {
   305  		return nil, err
   306  	}
   307  
   308  	if replicaSourceInfo == nil {
   309  		return nil, nil
   310  	}
   311  
   312  	// Lock to read status consistently
   313  	d.statusMutex.Lock()
   314  	defer d.statusMutex.Unlock()
   315  	var copy = d.status
   316  
   317  	copy.SourceUser = replicaSourceInfo.User
   318  	copy.SourceHost = replicaSourceInfo.Host
   319  	copy.SourcePort = uint(replicaSourceInfo.Port)
   320  	copy.SourceServerUuid = replicaSourceInfo.Uuid
   321  	copy.ConnectRetry = replicaSourceInfo.ConnectRetryInterval
   322  	copy.SourceRetryCount = replicaSourceInfo.ConnectRetryCount
   323  	copy.ReplicateDoTables = d.filters.getDoTables()
   324  	copy.ReplicateIgnoreTables = d.filters.getIgnoreTables()
   325  
   326  	if d.applier.currentPosition != nil {
   327  		copy.ExecutedGtidSet = d.applier.currentPosition.GTIDSet.String()
   328  		copy.RetrievedGtidSet = copy.ExecutedGtidSet
   329  	}
   330  
   331  	return &copy, nil
   332  }
   333  
   334  // ResetReplica implements the BinlogReplicaController interface
   335  func (d *doltBinlogReplicaController) ResetReplica(ctx *sql.Context, resetAll bool) error {
   336  	d.operationMutex.Lock()
   337  	defer d.operationMutex.Unlock()
   338  
   339  	if d.applier.IsRunning() {
   340  		return fmt.Errorf("unable to reset replica while replication is running; stop replication and try again")
   341  	}
   342  
   343  	// Reset error status
   344  	d.updateStatus(func(status *binlogreplication.ReplicaStatus) {
   345  		status.LastIoErrNumber = 0
   346  		status.LastSqlErrNumber = 0
   347  		status.LastIoErrorTimestamp = nil
   348  		status.LastSqlErrorTimestamp = nil
   349  		status.LastSqlError = ""
   350  		status.LastIoError = ""
   351  	})
   352  
   353  	if resetAll {
   354  		err := deleteReplicationConfiguration(ctx)
   355  		if err != nil {
   356  			return err
   357  		}
   358  
   359  		d.filters = newFilterConfiguration()
   360  	}
   361  
   362  	return nil
   363  }
   364  
   365  // updateStatus allows the caller to safely update the replica controller's status. The controller locks it's mutex
   366  // before the specified function |f| is called, and unlocks it after |f| is finished running. The current status is
   367  // passed into the callback function |f| and the caller can safely update or copy any fields they need.
   368  func (d *doltBinlogReplicaController) updateStatus(f func(status *binlogreplication.ReplicaStatus)) {
   369  	d.statusMutex.Lock()
   370  	defer d.statusMutex.Unlock()
   371  	f(&d.status)
   372  }
   373  
   374  // setIoError updates the current replication status with the specific |errno| and |message| to describe an IO error.
   375  func (d *doltBinlogReplicaController) setIoError(errno uint, message string) {
   376  	d.statusMutex.Lock()
   377  	defer d.statusMutex.Unlock()
   378  
   379  	// truncate the message to avoid errors when reporting replica status
   380  	if len(message) > 256 {
   381  		message = message[:256]
   382  	}
   383  
   384  	currentTime := time.Now()
   385  	d.status.LastIoErrorTimestamp = &currentTime
   386  	d.status.LastIoErrNumber = errno
   387  	d.status.LastIoError = message
   388  }
   389  
   390  // setSqlError updates the current replication status with the specific |errno| and |message| to describe an SQL error.
   391  func (d *doltBinlogReplicaController) setSqlError(errno uint, message string) {
   392  	d.statusMutex.Lock()
   393  	defer d.statusMutex.Unlock()
   394  
   395  	// truncate the message to avoid errors when reporting replica status
   396  	if len(message) > 256 {
   397  		message = message[:256]
   398  	}
   399  
   400  	currentTime := time.Now()
   401  	d.status.LastSqlErrorTimestamp = &currentTime
   402  	d.status.LastSqlErrNumber = errno
   403  	d.status.LastSqlError = message
   404  }
   405  
   406  //
   407  // Helper functions
   408  //
   409  
   410  func getOptionValueAsString(option binlogreplication.ReplicationOption) (string, error) {
   411  	stringOptionValue, ok := option.Value.(binlogreplication.StringReplicationOptionValue)
   412  	if ok {
   413  		return stringOptionValue.GetValueAsString(), nil
   414  	}
   415  
   416  	return "", fmt.Errorf("unsupported value type for option %q; found %T, "+
   417  		"but expected a string", option.Name, option.Value.GetValue())
   418  }
   419  
   420  func getOptionValueAsInt(option binlogreplication.ReplicationOption) (int, error) {
   421  	integerOptionValue, ok := option.Value.(binlogreplication.IntegerReplicationOptionValue)
   422  	if ok {
   423  		return integerOptionValue.GetValueAsInt(), nil
   424  	}
   425  
   426  	return 0, fmt.Errorf("unsupported value type for option %q; found %T, "+
   427  		"but expected an integer", option.Name, option.Value.GetValue())
   428  }
   429  
   430  func getOptionValueAsTableNames(option binlogreplication.ReplicationOption) ([]sql.UnresolvedTable, error) {
   431  	tableNamesOptionValue, ok := option.Value.(binlogreplication.TableNamesReplicationOptionValue)
   432  	if ok {
   433  		return tableNamesOptionValue.GetValueAsTableList(), nil
   434  	}
   435  
   436  	return nil, fmt.Errorf("unsupported value type for option %q; found %T, "+
   437  		"but expected a list of tables", option.Name, option.Value.GetValue())
   438  }
   439  
   440  func verifyAllTablesAreQualified(urts []sql.UnresolvedTable) error {
   441  	for _, urt := range urts {
   442  		if urt.Database().Name() == "" {
   443  			return fmt.Errorf("no database specified for table '%s'; "+
   444  				"all filter table names must be qualified with a database name", urt.Name())
   445  		}
   446  	}
   447  	return nil
   448  }