github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/binlogreplication/binlog_replica_controller.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package binlogreplication 16 17 import ( 18 "fmt" 19 "strings" 20 "sync" 21 "time" 22 23 "github.com/dolthub/dolt/go/libraries/doltcore/sqlserver" 24 25 "github.com/dolthub/go-mysql-server/sql" 26 "github.com/dolthub/go-mysql-server/sql/binlogreplication" 27 "github.com/dolthub/go-mysql-server/sql/mysql_db" 28 ) 29 30 var DoltBinlogReplicaController = newDoltBinlogReplicaController() 31 32 // binlogApplierUser is the locked, super user account that is used to execute replicated SQL statements. 33 // We cannot always assume the root account will exist, so we automatically create this account that is 34 // specific to binlog replication and lock it so that it cannot be used to login. 35 const binlogApplierUser = "dolt-binlog-applier" 36 37 // ErrServerNotConfiguredAsReplica is returned when replication is started without enough configuration provided. 38 var ErrServerNotConfiguredAsReplica = fmt.Errorf( 39 "server is not configured as a replica; fix with CHANGE REPLICATION SOURCE TO") 40 41 // ErrEmptyHostname is returned when replication is started without a hostname configured. 42 var ErrEmptyHostname = fmt.Errorf("fatal error: Invalid (empty) hostname when attempting to connect " + 43 "to the source server. Connection attempt terminated") 44 45 // ErrEmptyUsername is returned when replication is started without a username configured. 46 var ErrEmptyUsername = fmt.Errorf("fatal error: Invalid (empty) username when attempting to connect " + 47 "to the source server. Connection attempt terminated") 48 49 // ErrReplicationStopped is an internal error that is not returned to users, and signals that STOP REPLICA was called. 50 var ErrReplicationStopped = fmt.Errorf("replication stop requested") 51 52 // doltBinlogReplicaController implements the BinlogReplicaController interface for a Dolt database in order to 53 // provide support for a Dolt server to be a replica of a MySQL primary. 54 // 55 // This type is used concurrently – multiple sessions on the DB can call this interface concurrently, 56 // so all state that the controller tracks MUST be protected with a mutex. 57 type doltBinlogReplicaController struct { 58 status binlogreplication.ReplicaStatus 59 filters *filterConfiguration 60 applier *binlogReplicaApplier 61 ctx *sql.Context 62 63 // statusMutex blocks concurrent access to the ReplicaStatus struct 64 statusMutex *sync.Mutex 65 66 // operationMutex blocks concurrent access to the START/STOP/RESET REPLICA operations 67 operationMutex *sync.Mutex 68 } 69 70 var _ binlogreplication.BinlogReplicaController = (*doltBinlogReplicaController)(nil) 71 72 // newDoltBinlogReplicaController creates a new doltBinlogReplicaController instance. 73 func newDoltBinlogReplicaController() *doltBinlogReplicaController { 74 controller := doltBinlogReplicaController{ 75 filters: newFilterConfiguration(), 76 statusMutex: &sync.Mutex{}, 77 operationMutex: &sync.Mutex{}, 78 } 79 controller.status.ConnectRetry = 60 80 controller.status.SourceRetryCount = 86400 81 controller.status.AutoPosition = true 82 controller.status.ReplicaIoRunning = binlogreplication.ReplicaIoNotRunning 83 controller.status.ReplicaSqlRunning = binlogreplication.ReplicaSqlNotRunning 84 controller.applier = newBinlogReplicaApplier(controller.filters) 85 return &controller 86 } 87 88 // StartReplica implements the BinlogReplicaController interface. 89 func (d *doltBinlogReplicaController) StartReplica(ctx *sql.Context) error { 90 d.operationMutex.Lock() 91 defer d.operationMutex.Unlock() 92 93 // START REPLICA may be called multiple times, but if replication is already running, 94 // it will log a warning and not start up new threads. 95 if d.applier.IsRunning() { 96 ctx.Warn(3083, "Replication thread(s) for channel '' are already running.") 97 return nil 98 } 99 100 if false { 101 // TODO: If the database is already configured for Dolt replication/clustering, then error out. 102 // Add a (BATS?) test to cover this case 103 return fmt.Errorf("dolt replication already enabled; unable to use binlog replication with other replication modes. " + 104 "Disable Dolt replication first before starting binlog replication") 105 } 106 107 // If we aren't running in a sql-server context, it would be nice to return a helpful, Dolt-specific 108 // error message. Currently, this case would trigger an error from the GMS layer, so we can't give 109 // a specific error message about needing to run Dolt in sql-server mode yet. 110 111 _, err := loadReplicaServerId() 112 if err != nil { 113 return fmt.Errorf("unable to start replication: %s", err.Error()) 114 } 115 116 configuration, err := loadReplicationConfiguration(ctx) 117 if err != nil { 118 return err 119 } else if configuration == nil { 120 return ErrServerNotConfiguredAsReplica 121 } else if configuration.Host == "" { 122 DoltBinlogReplicaController.setIoError(ERFatalReplicaError, ErrEmptyHostname.Error()) 123 return ErrEmptyHostname 124 } else if configuration.User == "" { 125 DoltBinlogReplicaController.setIoError(ERFatalReplicaError, ErrEmptyUsername.Error()) 126 return ErrEmptyUsername 127 } 128 129 if d.ctx == nil { 130 return fmt.Errorf("no execution context set for the replica controller") 131 } 132 133 err = d.configureReplicationUser(ctx) 134 if err != nil { 135 return err 136 } 137 138 // Set execution context's user to the binlog replication user 139 d.ctx.SetClient(sql.Client{ 140 User: binlogApplierUser, 141 Address: "localhost", 142 }) 143 144 ctx.GetLogger().Info("starting binlog replication...") 145 d.applier.Go(d.ctx) 146 return nil 147 } 148 149 // configureReplicationUser creates or configures the super user account needed to apply replication 150 // changes and execute DDL statements on the running server. If the account doesn't exist, it will be 151 // created and locked to disable log ins, and if it does exist, but is missing super privs or is not 152 // locked, it will be given super user privs and locked. 153 func (d *doltBinlogReplicaController) configureReplicationUser(ctx *sql.Context) error { 154 server := sqlserver.GetRunningServer() 155 if server == nil { 156 return fmt.Errorf("unable to access a running SQL server") 157 } 158 mySQLDb := server.Engine.Analyzer.Catalog.MySQLDb 159 ed := mySQLDb.Editor() 160 defer ed.Close() 161 162 replicationUser := mySQLDb.GetUser(ed, binlogApplierUser, "localhost", false) 163 if replicationUser == nil { 164 // If the replication user doesn't exist yet, create it and lock it 165 mySQLDb.AddSuperUser(ed, binlogApplierUser, "localhost", "") 166 replicationUser := mySQLDb.GetUser(ed, binlogApplierUser, "localhost", false) 167 if replicationUser == nil { 168 return fmt.Errorf("unable to load replication user") 169 } 170 // Make sure this account is locked so that it cannot be used to log in 171 replicationUser.Locked = true 172 ed.PutUser(replicationUser) 173 } else if replicationUser.IsSuperUser == false || replicationUser.Locked == false { 174 // Fix the replication user if it has been modified 175 replicationUser.IsSuperUser = true 176 replicationUser.Locked = true 177 ed.PutUser(replicationUser) 178 } 179 180 return nil 181 } 182 183 // SetExecutionContext sets the unique |ctx| for the replica's applier to use when applying changes from binlog events 184 // to a database. The applier cannot reuse any existing context, because it executes in a separate routine and would 185 // cause race conditions. 186 func (d *doltBinlogReplicaController) SetExecutionContext(ctx *sql.Context) { 187 d.ctx = ctx 188 } 189 190 // StopReplica implements the BinlogReplicaController interface. 191 func (d *doltBinlogReplicaController) StopReplica(ctx *sql.Context) error { 192 if d.applier.IsRunning() == false { 193 ctx.Warn(3084, "Replication thread(s) for channel '' are already stopped.") 194 return nil 195 } 196 197 d.applier.stopReplicationChan <- struct{}{} 198 199 d.updateStatus(func(status *binlogreplication.ReplicaStatus) { 200 status.ReplicaIoRunning = binlogreplication.ReplicaIoNotRunning 201 status.ReplicaSqlRunning = binlogreplication.ReplicaSqlNotRunning 202 }) 203 204 return nil 205 } 206 207 // SetReplicationSourceOptions implements the BinlogReplicaController interface. 208 func (d *doltBinlogReplicaController) SetReplicationSourceOptions(ctx *sql.Context, options []binlogreplication.ReplicationOption) error { 209 replicaSourceInfo, err := loadReplicationConfiguration(ctx) 210 if err != nil { 211 return err 212 } 213 214 if replicaSourceInfo == nil { 215 replicaSourceInfo = mysql_db.NewReplicaSourceInfo() 216 } 217 218 for _, option := range options { 219 switch strings.ToUpper(option.Name) { 220 case "SOURCE_HOST": 221 value, err := getOptionValueAsString(option) 222 if err != nil { 223 return err 224 } 225 replicaSourceInfo.Host = value 226 case "SOURCE_USER": 227 value, err := getOptionValueAsString(option) 228 if err != nil { 229 return err 230 } 231 replicaSourceInfo.User = value 232 case "SOURCE_PASSWORD": 233 value, err := getOptionValueAsString(option) 234 if err != nil { 235 return err 236 } 237 replicaSourceInfo.Password = value 238 case "SOURCE_PORT": 239 intValue, err := getOptionValueAsInt(option) 240 if err != nil { 241 return err 242 } 243 replicaSourceInfo.Port = uint16(intValue) 244 case "SOURCE_CONNECT_RETRY": 245 intValue, err := getOptionValueAsInt(option) 246 if err != nil { 247 return err 248 } 249 replicaSourceInfo.ConnectRetryInterval = uint32(intValue) 250 case "SOURCE_RETRY_COUNT": 251 intValue, err := getOptionValueAsInt(option) 252 if err != nil { 253 return err 254 } 255 replicaSourceInfo.ConnectRetryCount = uint64(intValue) 256 default: 257 return fmt.Errorf("unknown replication source option: %s", option.Name) 258 } 259 } 260 261 // Persist the updated replica source configuration to disk 262 return persistReplicationConfiguration(ctx, replicaSourceInfo) 263 } 264 265 // SetReplicationFilterOptions implements the BinlogReplicaController interface. 266 func (d *doltBinlogReplicaController) SetReplicationFilterOptions(_ *sql.Context, options []binlogreplication.ReplicationOption) error { 267 for _, option := range options { 268 switch strings.ToUpper(option.Name) { 269 case "REPLICATE_DO_TABLE": 270 value, err := getOptionValueAsTableNames(option) 271 if err != nil { 272 return err 273 } 274 err = d.filters.setDoTables(value) 275 if err != nil { 276 return err 277 } 278 case "REPLICATE_IGNORE_TABLE": 279 value, err := getOptionValueAsTableNames(option) 280 if err != nil { 281 return err 282 } 283 err = d.filters.setIgnoreTables(value) 284 if err != nil { 285 return err 286 } 287 default: 288 return fmt.Errorf("unsupported replication filter option: %s", option.Name) 289 } 290 } 291 292 // TODO: Consider persisting filter settings. MySQL doesn't actually do this... unlike CHANGE REPLICATION SOURCE, 293 // CHANGE REPLICATION FILTER requires users to re-apply the filter options every time a server is restarted, 294 // or to pass them to mysqld on the command line or in configuration. Since we don't want to force users 295 // to specify these on the command line, we should consider diverging from MySQL behavior here slightly and 296 // persisting the filter configuration options if customers want this. 297 298 return nil 299 } 300 301 // GetReplicaStatus implements the BinlogReplicaController interface 302 func (d *doltBinlogReplicaController) GetReplicaStatus(ctx *sql.Context) (*binlogreplication.ReplicaStatus, error) { 303 replicaSourceInfo, err := loadReplicationConfiguration(ctx) 304 if err != nil { 305 return nil, err 306 } 307 308 if replicaSourceInfo == nil { 309 return nil, nil 310 } 311 312 // Lock to read status consistently 313 d.statusMutex.Lock() 314 defer d.statusMutex.Unlock() 315 var copy = d.status 316 317 copy.SourceUser = replicaSourceInfo.User 318 copy.SourceHost = replicaSourceInfo.Host 319 copy.SourcePort = uint(replicaSourceInfo.Port) 320 copy.SourceServerUuid = replicaSourceInfo.Uuid 321 copy.ConnectRetry = replicaSourceInfo.ConnectRetryInterval 322 copy.SourceRetryCount = replicaSourceInfo.ConnectRetryCount 323 copy.ReplicateDoTables = d.filters.getDoTables() 324 copy.ReplicateIgnoreTables = d.filters.getIgnoreTables() 325 326 if d.applier.currentPosition != nil { 327 copy.ExecutedGtidSet = d.applier.currentPosition.GTIDSet.String() 328 copy.RetrievedGtidSet = copy.ExecutedGtidSet 329 } 330 331 return ©, nil 332 } 333 334 // ResetReplica implements the BinlogReplicaController interface 335 func (d *doltBinlogReplicaController) ResetReplica(ctx *sql.Context, resetAll bool) error { 336 d.operationMutex.Lock() 337 defer d.operationMutex.Unlock() 338 339 if d.applier.IsRunning() { 340 return fmt.Errorf("unable to reset replica while replication is running; stop replication and try again") 341 } 342 343 // Reset error status 344 d.updateStatus(func(status *binlogreplication.ReplicaStatus) { 345 status.LastIoErrNumber = 0 346 status.LastSqlErrNumber = 0 347 status.LastIoErrorTimestamp = nil 348 status.LastSqlErrorTimestamp = nil 349 status.LastSqlError = "" 350 status.LastIoError = "" 351 }) 352 353 if resetAll { 354 err := deleteReplicationConfiguration(ctx) 355 if err != nil { 356 return err 357 } 358 359 d.filters = newFilterConfiguration() 360 } 361 362 return nil 363 } 364 365 // updateStatus allows the caller to safely update the replica controller's status. The controller locks it's mutex 366 // before the specified function |f| is called, and unlocks it after |f| is finished running. The current status is 367 // passed into the callback function |f| and the caller can safely update or copy any fields they need. 368 func (d *doltBinlogReplicaController) updateStatus(f func(status *binlogreplication.ReplicaStatus)) { 369 d.statusMutex.Lock() 370 defer d.statusMutex.Unlock() 371 f(&d.status) 372 } 373 374 // setIoError updates the current replication status with the specific |errno| and |message| to describe an IO error. 375 func (d *doltBinlogReplicaController) setIoError(errno uint, message string) { 376 d.statusMutex.Lock() 377 defer d.statusMutex.Unlock() 378 379 // truncate the message to avoid errors when reporting replica status 380 if len(message) > 256 { 381 message = message[:256] 382 } 383 384 currentTime := time.Now() 385 d.status.LastIoErrorTimestamp = ¤tTime 386 d.status.LastIoErrNumber = errno 387 d.status.LastIoError = message 388 } 389 390 // setSqlError updates the current replication status with the specific |errno| and |message| to describe an SQL error. 391 func (d *doltBinlogReplicaController) setSqlError(errno uint, message string) { 392 d.statusMutex.Lock() 393 defer d.statusMutex.Unlock() 394 395 // truncate the message to avoid errors when reporting replica status 396 if len(message) > 256 { 397 message = message[:256] 398 } 399 400 currentTime := time.Now() 401 d.status.LastSqlErrorTimestamp = ¤tTime 402 d.status.LastSqlErrNumber = errno 403 d.status.LastSqlError = message 404 } 405 406 // 407 // Helper functions 408 // 409 410 func getOptionValueAsString(option binlogreplication.ReplicationOption) (string, error) { 411 stringOptionValue, ok := option.Value.(binlogreplication.StringReplicationOptionValue) 412 if ok { 413 return stringOptionValue.GetValueAsString(), nil 414 } 415 416 return "", fmt.Errorf("unsupported value type for option %q; found %T, "+ 417 "but expected a string", option.Name, option.Value.GetValue()) 418 } 419 420 func getOptionValueAsInt(option binlogreplication.ReplicationOption) (int, error) { 421 integerOptionValue, ok := option.Value.(binlogreplication.IntegerReplicationOptionValue) 422 if ok { 423 return integerOptionValue.GetValueAsInt(), nil 424 } 425 426 return 0, fmt.Errorf("unsupported value type for option %q; found %T, "+ 427 "but expected an integer", option.Name, option.Value.GetValue()) 428 } 429 430 func getOptionValueAsTableNames(option binlogreplication.ReplicationOption) ([]sql.UnresolvedTable, error) { 431 tableNamesOptionValue, ok := option.Value.(binlogreplication.TableNamesReplicationOptionValue) 432 if ok { 433 return tableNamesOptionValue.GetValueAsTableList(), nil 434 } 435 436 return nil, fmt.Errorf("unsupported value type for option %q; found %T, "+ 437 "but expected a list of tables", option.Name, option.Value.GetValue()) 438 } 439 440 func verifyAllTablesAreQualified(urts []sql.UnresolvedTable) error { 441 for _, urt := range urts { 442 if urt.Database().Name() == "" { 443 return fmt.Errorf("no database specified for table '%s'; "+ 444 "all filter table names must be qualified with a database name", urt.Name()) 445 } 446 } 447 return nil 448 }